#!/use/bin/perl
# demo.pl
# runs demonstration scripts for WWW::Extractor 1.0
use WWW::Extractor;
http_verbose;
http_proxy('http://proxy.schools.net.au:3128');
use WWW::Extractor::Generic::Try;
$| = 1;
print "\n";
for $num (1..6) {
try {
&{"example$num"};
} catch {
print "
Example $num failed : $_
\n";
};
print "
\n";
print STDERR "\n";
}
print "\n";
exit;
sub example1 {
print "Example 1 - Beyond (email)
\n";
load_text 'beyond.txt';
print "Before:
\n";
print "\n", content, "
\n";
$table = read_table 'Report of Sales';
print "After:
\n";
print $table->as_HTML;
}
sub example2 {
print "Example 2 - Amazon (email)
\n";
load_text 'amazon.txt';
print "Before:
\n";
print "\n", content, "
\n";
$table1 = read_table <<'End';
ITEM CODE, HITS, TITLE,
DIR, NDIR, YOUR FEE, DISCOUNT, LIST PRICE
,
---------- ------ ----- ----- ------- --------------------------------------
,
########## ###### @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
##### ##### ####### sold at ##% off list price of #############
,
---------- ------ ----- ----- ------- --------------------------------------
End
cut $table1 'ITEM CODE', 'TITLE', 'DISCOUNT', 'LIST PRICE', 'YOUR FEE';
$table2 = read_table 'Number of Visitors';
cut $table2 2..6;
drop_head_foot $table2 0, 2;
($from_date, $to_date) = read_line
'For the week of', 'through';
print "After:
\n";
print $table1->as_HTML;
print <
From date: $from_date
To date: $to_date
End
print $table2->as_HTML;
}
sub example3 {
print "Example 3 - Avanta (website)
\n";
try { load_html 'avanta.html' }
catch {
get 'http://www.avanta.com.au/affiliate/default.asp';
enter 'Login', 'csymons@schools.net.au';
enter 'Password', 'schoolsales';
submit;
save 'avanta.html';
};
print "Before:
\n";
print content;
($nref, $nret, $amount) = read_lines
'Number of people that have referred and entered this site:',
'Number of people that have returned after a referral:',
'Amount Sold (AUS$):';
print "After:
\n";
print <
# people entering site: $nref
# people returning to site: $nret
Amount sold (AU\$): $amount
End
}
sub example4 {
print "Example 4 - Beyond (website)
\n";
try { load_html 'beyond.html' }
catch {
get 'http://www.beyond.com/forms/affiliates/online_report.htm';
enter 'Affiliate Number', 'AF58087';
enter 'Password', 'dermott';
submit;
save 'beyond.html';
};
print "Before:
\n";
print content;
($from_date, $to_date) =
read_line 'Report of Sales from', 'through';
print "After:
\n";
print <
From date: $from_date
To date: $to_date
End
}
sub example5 {
print "Example 5 - Chaos Music (website)
\n";
try { load_html 'chaosmusic.html' }
catch {
get 'http://www.chaosmusic.com/reseller_login.asp';
enter 'Reseller ID', 377;
enter 'Password', 'dermott';
submit;
save 'chaosmusic.html';
};
print "Before:
\n";
print content;
($balance, $total, $nvis, $nsale, $comm) = read_lines
'Account Balance', 'Total Commisions Earned',
'Number of visitors', 'Number of sales',
'Current Commission';
print "After:
\n";
print <
Balance: $balance
Commissions: $total
Visitors: $nvis
Sales: $nsale
Commission: $comm
End
}
sub example6 {
print "Example 6 - MP3 (website)
\n";
try { load_html 'mp3.html' }
catch {
get 'http://www.reporting.net/';
enter 'schoolsnet';
enter 'dermott';
submit 'GET';
find 'mp3';
link 'enter';
link 'reports';
link 'Sales (Daily)';
select 'Browser';
submit;
save 'mp3.html';
};
print "Before:
\n";
print content;
$table = read_table 'Affiliate Site';
drop_head_foot $table 0, 2;
cut $table 'Transaction Date', '# Orders', 'Net Sales';
print "After:
\n";
print $table->as_HTML;
}
__END__
# example 1: alternative table extraction (safer?)
$table = read_table <<'End';
No., Prod Name, Unit price, Qty, E/P/C/S, Amount, Date, Subcode
,
=== ============= ========== === ======= ======== ============ ===========
,
### @@@@@@@@@@@@@@@@@@@@@@@@@@@@ ########## ### @@@@@@@ ######## @@@@@@@@@@@@ @@@@@@@@@@@
,
End
# example 2: alternative table extraction (safer?)
$table2 = read_table <<'End';
Date, ISBN/ASIN,home page, search, other
,
--------------------- ----------- --------- --------- --------- ---------
,
Number of Visitors on @@@@@@@@@@@ ######### ######### ######### #########
,
--------------------- ----------- --------- --------- --------- ---------
End
# example 6: alternative access method (faster, uses a cookie)
get 'http://www.reporting.net/networks/affiliates/bf_login',
[username_in => 'schoolsnet', password_in => 'dermott'];
cookie 0, 'bfMerch', '1429031|11128519', '/', '.reporting.net';
post 'http://www.reporting.net/networks/affiliates/pub_sales_rpt.run',
[report_id_in => 58, rep_firm_id_in => 1429031,
start_date_in => 13, start_date_in => 01, start_date_in => 2000,
end_date_in => 19, end_date_in => 01, end_date_in => 2000,
export_format_id_in => 0 ];