From: Sven Dowideit <[email protected]>
---
AnyData/Format/Weblog.pm | 24 +++++++++++-------------
Changes | 2 ++
t/fixed.tbl | 8 +++++++-
t/weblog.t | 42 ++++++++++++++++++++++++++++++++++++++++++
t/weblog.tbl | 1 +
5 files changed, 63 insertions(+), 14 deletions(-)
create mode 100644 t/weblog.t
create mode 100644 t/weblog.tbl
diff --git a/AnyData/Format/Weblog.pm b/AnyData/Format/Weblog.pm
index 0c89da8..339623f 100644
--- a/AnyData/Format/Weblog.pm
+++ b/AnyData/Format/Weblog.pm
@@ -61,12 +61,11 @@ all rights reserved
=cut
-
use strict;
use AnyData::Format::Base;
use vars qw( @ISA $DEBUG $VERSION);
@AnyData::Format::Weblog::ISA = qw( AnyData::Format::Base );
-$DEBUG = 0;
+$DEBUG = 0;
$VERSION = '0.06';
@@ -74,9 +73,9 @@ sub new {
my $class = shift;
my $self = shift || {};
$self->{col_names} =
-
'remotehost,username,authuser,date,request,status,bytes,client,referer';
- $self->{record_sep} = "\n";
- $self->{key} = 'datestamp';
+ 'remotehost,username,authuser,date,request,status,bytes,referer,client';
+ $self->{record_sep} = "\n";
+ $self->{key} = 'datestamp';
$self->{keep_first_line} = 1;
return bless $self, $class;
}
@@ -84,21 +83,20 @@ sub new {
sub read_fields {
print "PARSE RECORD\n" if $DEBUG;
my $self = shift;
- my $str = shift || return undef;
+ my $str = shift || return undef;
$str =~ s/^\s+//;
$str =~ s/\s+$//;
return undef unless $str;
- my(@row) = $str =~
- /^(\S*) (\S*) (\S*) \[([^\]]*)\] "(.*)" (\S*) (\S*)\s*(.*)$/;
+ my (@row) =
+ $str =~ /^(\S*) (\S*) (\S*) \[([^\]]*)\] "(.*?)" (\S*) (\S*)\s*(.*)$/;
return undef unless defined $row[0];
- my($client,$referer) = $row[7] =~ /^(.*) (\S*)$/;
+ my ( $referer, $client ) = $row[7] =~ /^(.*?)\s(.*)$/;
$client ||= '';
$referer ||= '';
- ($row[7],$row[8])=($client,$referer);
+ ( $row[7], $row[8] ) = ( $referer, $client );
+
# $row[3] =~ s/\s*-\s*(\S*)$//; # hide GMT offset on datestamp
- return @row
+ return @row;
}
1;
-
-
diff --git a/Changes b/Changes
index 9593473..e09929f 100644
--- a/Changes
+++ b/Changes
@@ -11,6 +11,8 @@ version 0.11, released Aug 2012
* Fix spelling errors (debian) Ansgar Burchardt <[email protected]>
* adColumn $distinct_flag not handled (RT#6248 & RT#6251) John D. Lima
* writing fields containing 0 with AnyData::Format::Fixed (RT#8671)
<elodie+cpan [...] pasteur.fr>
+ * weblog request and referer regexs were too greedy, and the referer and
client regex's where in the wrong order (RT#34063) and (RT#72334) Wes Brown
<wes [...] smellycat.com> and pawal [...] blipp.com
+ *
version 0.10, released 19 April 2004
diff --git a/t/fixed.tbl b/t/fixed.tbl
index aeac94d..02ffb63 100644
--- a/t/fixed.tbl
+++ b/t/fixed.tbl
@@ -1 +1,7 @@
-country, code australia au germany de france fr switzerlandch broken 0 broken2 0
+country,code
+australia au
+germany de
+france fr
+switzerlandch
+broken 0
+broken2 0
diff --git a/t/weblog.t b/t/weblog.t
new file mode 100644
index 0000000..2d494e6
--- /dev/null
+++ b/t/weblog.t
@@ -0,0 +1,42 @@
+#!/usr/local/bin/perl -wT
+use strict;
+use warnings;
+
+use Test::More;
+plan tests => 10;
+
+use AnyData;
+
+my $table = adTie( 'Weblog', 't/weblog.tbl', 'r', {} );
+
+ok( 1 == adRows($table), "Failed rows" );
+
+#remotehost,username,authuser,date,request,status,bytes,client,referer
+#12.34.56.78 - - [13/Mar/2008:07:38:53 +0100] "GET /creeper/image HTTP/1.1"
200 252 "http://www.example.com/" "Mozilla/5.0 (Windows; U; Windows NT 6.0;
sv-SE; rv:1.8.1.12) Gecko/20080201 Firefox/2.0.0.12"
+
+my $row = each %$table;
+ok( '12.34.56.78' eq $row->{remotehost}, 'remotehost' );
+ok( '-' eq $row->{username}, 'username' );
+ok( '-' eq $row->{authuser}, 'authuser' );
+ok( '13/Mar/2008:07:38:53 +0100' eq $row->{date}, 'date' );
+ok( 'GET /creeper/image HTTP/1.1' eq $row->{request}, 'request' );
+ok( '200' eq $row->{status}, 'status' );
+ok( '252' eq $row->{bytes}, 'bytes' );
+ok(
+'"Mozilla/5.0 (Windows; U; Windows NT 6.0; sv-SE; rv:1.8.1.12) Gecko/20080201
Firefox/2.0.0.12"'
+ eq $row->{client},
+ 'client ' . $row->{client}
+);
+ok( '"http://www.example.com/"' eq $row->{referer},
+ 'referer: ' . $row->{referer} );
+
+#write test
+#TODO: looks like writing a weblog is broken
+#print STDERR "\n---\n";
+#print STDERR adExport( $table, 'Weblog', undef, { } );
+#print STDERR "\n---\n";
+#ok(
+# <<'HERE' eq adExport( $table, 'Weblog', undef, { } ), 'export weblog
format' );
+#HERE
+
+__END__
diff --git a/t/weblog.tbl b/t/weblog.tbl
new file mode 100644
index 0000000..54e379f
--- /dev/null
+++ b/t/weblog.tbl
@@ -0,0 +1 @@
+12.34.56.78 - - [13/Mar/2008:07:38:53 +0100] "GET /creeper/image HTTP/1.1" 200
252 "http://www.example.com/" "Mozilla/5.0 (Windows; U; Windows NT 6.0; sv-SE;
rv:1.8.1.12) Gecko/20080201 Firefox/2.0.0.12"
--
1.7.10.4