More information about this script and a link to download it directly.
parse.pl
############################################################################
#
# WordPress.com 7 Day Referrer Log Parser by http://engtech.wordpress.com
#
# Thanks for your interest in this script, but I have to warn you that it
# isn't intended for general usage or that well supported. I'm offering it
# as a "Hey, this might be useful to you if you already know Perl/unix".
#
# How to run
#
# 1. Login to your wordpress.com blog and download the 7 Day Referrer page
# to a file. This script might only work if the page is downloaded with
# Firefox. I've also only tried having the downloaded page in the same
# directory as the script.
#
# 2. Run the script giving the name of the downloaded page as the first
# argument:
# perl parse.pl week1.html
#
# 3. The first time the script is run with a new web page it will create
# a CSV file. This file can be editted with a text editor or with
# Microsoft Excel. When you first edit the file both columns will have
# the same information. What you want to do is edit the second columns
# and create "groups":
# board.progaming.it,other articles
# chris.pirillo.com,chris.pirillo.com
# coolthingoftheday.blogspot.com,long tail
#
# 4. Re-run the script and it will create an HTML file (IE: week1_out.html)
# with all of the URLs from the 7 Day Referrers page grouped as you
# defined in the CSV file.
# It will also create a second CSV file called "week1_graph.csv" that is
# a list of the groups that are used for the first generated graph.
# Any group that is not listed in this CSV file will be put on the second
# generated graph.
#
# 5. The process I use is:
# - Run the script to generate the CSV file
# - Look at the output and create groups where it makes sense
# - Keep re-running the script and editting the CSV until I like way the
# HTML output is organized.
# - Split the graphs the way I want to.
#
# 6. I'm sorry the graphs are so friggin' ugly. I need to read a tutorial
# on "How to not make GD::Graph output shit."
#
############################################################################
use strict;
use warnings;
use Data::Dumper;
use GD::Graph::lines;
use FileHandle;
my $has_csv = 0;
my $has_csv_graph = 0;
my @data = ();
my $data_idx = -1;
my %sites = ();
my %urls = ();
my %translate = ();
my %totalsFromSite = ();
my $file = $ARGV[0];
if (! -f $file) {
die "could not find file '$file': $!";
}
my $prefix = $file;
$prefix =~ s/\..*$//;
my $csv_file = $prefix . ".csv";
my $csv_graph_file = $prefix . "_graph.csv";
my @sites_high = ();
my $graph_high = $prefix."_high.png";
my $graph_low = $prefix."_low.png";
readCSV();
input();
if (not $has_csv) { writeCSV(); }
output();
if ($has_csv && not $has_csv_graph) { writeCSVGraph(); }
if ($has_csv && $has_csv_graph) {
generateGraph();
}
exit(0);
sub readCSV {
if (-r $csv_file) {
$has_csv = 1;
my $ifh = new FileHandle($csv_file, "r") || die "could not read file '$csv_file': $!";
while(<$ifh>) {
chomp;
my @data = split(/,/, $_);
$translate{$data[0]} = $data[1];
}
close($ifh);
# print Dumper(%translate);
}
if (-r $csv_graph_file) {
$has_csv_graph = 1;
my $ifh = new FileHandle($csv_graph_file, "r") || die "could not read file '$csv_graph_file': $!";
while(<$ifh>) {
chomp;
@sites_high = split(/,/, $_);
}
close($ifh);
# print Dumper(@sites_high);
}
}
sub writeCSV {
my $ofh = new FileHandle($csv_file, "w") || die "could not write file '$csv_file': $!";
foreach my $s (sort keys %sites) {
print $ofh "$s,$s\n";
}
close ($ofh);
print "Please edit $csv_file to create groups and rerun.\n";
}
sub writeCSVGraph {
my $text = "";
foreach my $s (sort keys %sites) {
chomp($s);
$text .= ",$s";
}
$text =~ s/^,//;
my $ofh = new FileHandle($csv_graph_file, "w") || die "could not write file '$csv_graph_file': $!";
print $ofh $text . "\n";
close ($ofh);
print "Please edit $csv_graph_file to break groups into high and low for the graphs.\n";
}
sub input {
my $ifh = new FileHandle($file, "r") || die "could not read file '$file': $!";
while (<$ifh>) {
#<table class="statsDay">
if (m/<table class="statsDay">/) {
my %hash = ();
push(@data, \%hash);
$data_idx++;
#print "New day\n";
}
#<tr class="alternate"><td><a href="http://digg.com/view/all/popular/today/page3">digg.com/view/all/popular/today/page3</a></td><td class="views">7</td></tr>
if (m/<td><a href="(.*?)">(.*?)(\/.*|)<\/a><\/td><td class="views">(\d+)<\/td><\/tr>/) {
my ($url, $site, $hits) = ($1, $2, $4);
process($url, $site, $hits);
}
# <tr><td>engtech.wordpress.com/tag/nokia-6682</td><td class="views">4</td></tr>
elsif (m/<td>(.*?)(\/.*|)<\/td><td class="views">(\d+)<\/td><\/tr>/) {
my ($url, $site, $hits) = ("$1$2", $1, $3);
process($url, $site, $hits);
}
}
close($ifh);
}
sub process {
my ($url, $site, $hits) = @_;
#print "$hits, $url\n";
my $ref = $data[$data_idx];
# Normalize site urls
if (defined $translate{$site}) {
$site = $translate{$site};
}
$sites{$site} = $data_idx; # newest to oldest
# Keep track of URLs per site
if (not defined $urls{$site}) {
my %hash = ();
$urls{$site} = \%hash;
}
$urls{$site}{$url} = 1;
# Keep count
if (not defined $ref->{$site}) {
$ref->{$site} = 0;
}
$ref->{$site} += $hits;
}
sub output {
delete($sites{'REMOVE'});
delete($urls{'REMOVE'});
my $ofile = $prefix."_out.html";
my $ofh = new FileHandle($ofile, "w") || die "could not write '$ofile': $!";
print $ofh "<TABLE BORDER=\"1\" CELLPADDING=\"5\" CELLSPACING=\"5\" WIDTH=\"100%\">\n";
print $ofh "<TR><TH>Site</TH>";
for(my $i=$#data; $i>=0; $i–) {
my $day = $#data - $i + 1;
print $ofh "<TH>Day $day</TH>";
}
print $ofh "<TH>Totals</TH></TR>\n";
my @text = ();
my @totals = ();
foreach my $site (sort {sortSites($a, $b)} keys %sites) {
push(@text, "<tr><td>$site</td>");
push(@totals, 0);
}
for(my $i=$#data; $i>=0; $i–) {
my $index = 0;
foreach my $site (sort {sortSites($a, $b)} keys %sites) {
# Normalize
if (not defined $data[$i]->{$site}) {
$data[$i]->{$site} = 0;
}
my $value = $data[$i]->{$site};
$totals[$index] += $value;
$text[$index] .= "<TD>".$value."</TD>";
$totalsFromSite{$site} = $totals[$index];
$index++;
}
}
for(my $i=0; $i<=$#text; $i++) {
print $ofh $text[$i] . "</TD><TD>" . $totals[$i] . "</TD></TR>\n";
}
print $ofh "</TABLE>\n";
print $ofh "<TABLE BORDER=\"1\" CELLPADDING=\"5\" CELLSPACING=\"5\" WIDTH=\"100%\">\n";
foreach my $site (sort {sortSites($a, $b)} keys %sites) {
my $total = $totalsFromSite{$site};
my $url_text = "<UL>";
foreach my $url (sort keys %{$urls{$site}}) {
$url_text .= "<LI><A HREF=\"$url\">$url</A></LI>";
}
$url_text .= "</UL>";
print $ofh "<TR><TD>$site</TD><TD>$total</TD><TD>$url_text</TD></TR>";
}
print $ofh "</TABLE>\n";
close($ofh);
}
sub sortSites {
my ($a, $b) = @_;
my $vala = $sites{$a};
my $valb = $sites{$b};
if ($vala == $valb) {
return($a cmp $b);
}
else {
return($valb <=> $vala);
}
}
sub generateGraph {
my @graph_high = ();
my @graph_low = ();
my $max_high = 0;
my $max_low = 0;
my $day = 0;
foreach my $ref (reverse @data) {
my @row_high = ();
push(@row_high, $day);
foreach my $site (@sites_high) {
my $value = $ref->{$site};
if ($value > $max_high) {
$max_high = $value;
}
push(@row_high, $value);
delete($sites{$site});
}
push(@graph_high, \@row_high);
$day++;
}
image($graph_high, $max_high, \@graph_high, \@sites_high);
$day = 0;
foreach my $ref (reverse @data) {
my @row_low = ();
push(@row_low, $day);
foreach my $site (sort {sortSites($a, $b)} keys %sites) {
my $value = $ref->{$site};
if ($value > $max_low) {
$max_low = $value;
}
push(@row_low, $value);
}
push(@graph_low, \@row_low);
$day++;
}
my @legend = ();
foreach my $site (sort {sortSites($a, $b)} keys %sites) {
push(@legend, $site);
}
image($graph_low, $max_low, \@graph_low, \@legend);
}
sub image {
my ($file, $max, $gref, $lref) = @_;
my @graph = @{$gref};
my @legend = @{$lref};
my $gdata = GD::Graph::Data->new();
foreach my $row (@graph) {
$gdata->add_point(@{$row});
}
my @colours = ("black", "blue", "purple", "green", "red", "gray", "dgray");
my $chart = GD::Graph::lines->new(600,375);
$chart->set_legend(@legend);
$chart -> set_x_axis_font("/usr/X11R6/lib/X11/fonts/TTF/lusimbi.ttf", 10);
$chart -> set_y_axis_font("/usr/X11R6/lib/X11/fonts/TTF/luximbi.ttf", 10);
$chart -> set_x_label_font("/usr/X11R6/lib/X11/fonts/TTF/luximb.ttf", 12);
$chart -> set_y_label_font("/usr/X11R6/lib/X11/fonts/TTF/luximb.ttf", 12);
$chart -> set_legend_font("/usr/X11R6/lib/X11/fonts/TTF/luximbi.ttf", 10);
$chart->set
(
y_label => "Traffic",
x_label => "Days",
y_max_value => $max,
line_width => 3,
y_long_ticks => 1,
dclrs => [@colours]
);
open(IMAGE, ">$file") or
die "Cannot open $file output png file for writing: $!";
print IMAGE $chart->plot($gdata)->png;
close IMAGE;
}
Generated using PerlTidy (-html -nss)

How to Earn a Six Figure Income from Blogging in Two Easy Steps
How NOT to be a Successful Blogger
81 movies for geeks that do not suck
Web Anonymity 101 - Digital Breadcrumbs
76 Romantic Movies for Guys and Girls
107 t-shirts for geeks that do not suck


