DammitCompactHourlyOrDailyPageCountFiles.pl

ezachte Tue, 17 Jan 2012 10:36:48 -0800

https://www.mediawiki.org/wiki/Special:Code/MediaWiki/109176


Revision: 109176
Author:   ezachte
Date:     2012-01-17 18:36:17 +0000 (Tue, 17 Jan 2012)
Log Message:
-----------
intermediate release: cleanup before major restructuring

Modified Paths:
--------------
    trunk/wikistats/dammit.lt/DammitCompactHourlyOrDailyPageCountFiles.pl

Modified: trunk/wikistats/dammit.lt/DammitCompactHourlyOrDailyPageCountFiles.pl
===================================================================
--- trunk/wikistats/dammit.lt/DammitCompactHourlyOrDailyPageCountFiles.pl       
2012-01-17 18:33:29 UTC (rev 109175)
+++ trunk/wikistats/dammit.lt/DammitCompactHourlyOrDailyPageCountFiles.pl       
2012-01-17 18:36:17 UTC (rev 109176)
@@ -1,18 +1,8 @@
 #!/usr/local/bin/perl
 
-# 4/27/2010 renamed from WikiStatsCompactDammitFiles.pl
+#  4/27/2010 renamed from WikiStatsCompactDammitFiles.pl
+# 11/23/2011 renamed lots of dead (commented) code
 
-# 
http://article.gmane.org/gmane.science.linguistics.wikipedia.technical/38154/match=new+statistics+stuff
-# http://svn.wikimedia.org/viewvc/mediawiki/trunk/webstatscollector/
-# https://bugzilla.wikimedia.org/show_bug.cgi?id=13541
-# 
http://de.wikipedia.org/w/api.php?action=query&meta=siteinfo&siprop=general|namespaces|namespacealiases
-
-# Ideas:
-# 1 namespace string -> namespace number ? (may not save much space: compress 
will deal with recurring patterns like these)
-# 2 frequency distribution hits per file per first letter _-> manifest crawler
-#   assuming crawler collects articles in alphabetical order
-# 3 first letter uppercase -> sort (in sections per first two chars ?)
-
   use lib "/home/ezachte/lib" ;
   use EzLib ;
 
@@ -27,7 +17,6 @@
   use URI::Escape;
   use Cwd ;
   $bayes = -d "/a/dammit.lt" ;
-# $path_7za = "/usr/lib/p7zip/7za" ;
 
   use IO::Compress::Bzip2 qw(bzip2 $Bzip2Error) ;
 
@@ -59,8 +48,6 @@
   print "Track: $track\n" ;
   $reg_exp_track = qr"$track" ;
 
-# -i "D:/\@Wikimedia/!Perl/#Projects/Visitors Compact Log Files/in" -o 
"D:/\@Wikimedia/!Perl/#Projects/Visitors Compact Log Files/out"
-
   my $options ;
   getopt ("iodft", \%options) ;
 
@@ -80,7 +67,6 @@
     if (! defined ($options {"d"})) { &Abort ("Specify date range: as yyyymm, 
yyyy* or *") } ;
   }
 
-
   $dir_in       = $options {"i"} ;
   $dir_out      = $options {"o"} ;
   $dir_filtered = $options {"f"} ;
@@ -129,7 +115,6 @@
     $daterange =~ s/\*/\\d+/ ;
 
     &CompactVisitorStatsOneDay ($dir_in, $dir_out, $dir_filtered, $dir_track, 
$daterange) ;
-  # &UncompactVisitorStats ; # test only, to see if process is revertible
   }
 
   if ($compactmonth)
@@ -199,7 +184,7 @@
   my $month = substr ($date,4,2) ;
   my $day   = substr ($date,6,2) ;
 
-  my ($file_out1, $file_out2, $file_out3, $out_day, $hours_missing) ;
+  my ($file_out2, $out_day, $hours_missing) ;
 
   $dir_out = "$dir_out/${year}-${month}" ;
   if (! -d $dir_out)
@@ -248,16 +233,9 @@
 
   undef @in_hour ;
 
-  # $file_out = "pagecounts-$year$month$day_full_day" ;
-  # open OUT, ">", $file_out ;
-  # binmode $file_out ;
-
-# my $out_day1 = IO::Compress::Gzip->new ($file_out1) || &Abort 
("IO::Compress::Gzip failed: $GzipError\n") ;
   if ($bayes)
   {
-  # $file_out1 = "$dir_out/pagecounts-$year$month$day" . "_fd"  ; # full day
     $file_out2 = "$dir_out/pagecounts-$year$month$day" . "_h" ; # full day, 
hourly data
-  # $file_out3 = "$dir_out/pagecounts-$year$month$day" . "_d" ; # full day, 
compact, just daily totals
     if ((-e "$file_out2.7z") || (-e "$file_out2.bz2") || (-e "$file_out2.zip") 
|| (-e "$file_out2.gz"))
     {
       &Log ("\nTarget file '$file_out2.[7z|bz2|zip|gz]' exists already. Skip 
this date.\n") ;
@@ -270,24 +248,16 @@
     }
 
     open $out_day2, ">", "$file_out2" || &Abort ("Output file '$file_out2' 
could not be opened.") ;
-  # open $out_day3, ">", "$file_out3" || &Abort ("Output file '$file_out3' 
could not be opened.") ;
   }
   else
   {
-  # $file_out1 = "$dir_out/pagecounts-$year$month$day" . "_fd.gz"  ; # full day
     $file_out2 = "$dir_out/pagecounts-$year$month$day" . "_h.gz" ; # full day, 
hourly data, count above threshold
     $out_day2 = IO::Compress::Gzip->new ($file_out2) || &Abort 
("IO::Compress::Gzip failed: $GzipError\n") ;
-  # $file_out3 = "$dir_out/pagecounts-$year$month$day" . "_d.gz" ; # full day, 
count above threshold
-  # $out_day3 = IO::Compress::Gzip->new ($file_out3) || &Abort 
("IO::Compress::Gzip failed: $GzipError\n") ;
   }
 
-# binmode $out_day1 ;
   binmode $out_day2 ;
-# binmode $out_day3 ;
 
-  # print "File_out1 $file_out1\n" ;
   print "File_out2 $file_out2\n" ;
-  # print "File_out3 $file_out3\n" ;
 
   $file_filtered = "$dir_filtered/pagecounts-$year$month$day.txt" ;
   &Log ("\nFilter file: $file_filtered\n") ;
@@ -338,7 +308,6 @@
   $comment .= "# Counts format is total per day, followed by count per hour if 
larger than zero, hour 0..23 shown as A..X (saves up to 22 bytes per line 
compared to comma separated values)\n" ;
   $comment .= "# If data are missing for some hour (file missing or corrupt) a 
question mark (?) is shown (and for each missing hour the daily total is 
incremented with hourly average)\n\n" ;
   print $out_day2 $comment ;
-# print $out_day3 $comment ;
 
   if ($files_in_found < 24)
   {
@@ -357,8 +326,6 @@
   {
     print $out_day2 "#\n" ;
     print $out_day2 "# In this file data are missing for hour(s) 
$hours_missing!\n" ;
-  # print $out_day3 "#\n" ;
-  # print $out_day3 "# In this file data are missing for hour(s) 
$hours_missing!\n" ;
   }
   $comment  = "#\n" ;
   $comment .= "# Lines starting with ampersand (@) show totals per 'namespace' 
(including omitted counts for low traffic articles)\n" ;
@@ -369,7 +336,6 @@
   $comment .= "# Page titles are shown unmodified (preserves sort sequence)\n" 
;
   $comment .= "#\n" ;
   print $out_day2 $comment ;
-# print $out_day3 $comment ;
 
   $key_low_prev = "" ;
   while ($files_in_open > 0)
@@ -473,9 +439,6 @@
       &Abort ("Sequence error: '$key_low_prev' eq '$key_low'\n") ;
     }
 
-    # print OUT "$key_low $total$counts\n" ;
-#    print $out_day1 "$key_low $total$counts\n" ;
-
     ($lang,$title) = split (' ', $key_low) ;
 
     $title =~ s/\%20/_/g ;
@@ -496,7 +459,6 @@
       # { print "- $lang\n" ; }
 
       &WriteTotalsPerNamespace ($out_day2, $langprev) ;
-    # &WriteTotalsPerNamespace ($out_day3, $langprev) ;
       undef %totals_per_namespace ;
     }
     $langprev = $lang ;
@@ -518,48 +480,22 @@
     }
 
     if ($total >= $threshold)
-    { print $out_day2 "$key_low $total$counts\n" ;
-    # print $out_day3 "$key_low $total\n" ;
-    }
+    { print $out_day2 "$key_low $total$counts\n" ; }
 
     $key_low_prev = $key_low ;
-  # print "OUT $key_low $counts\n" ;
   }
 
   &WriteTotalsPerNamespace ($out_day2, $langprev) ;
-# &WriteTotalsPerNamespace ($out_day3, $langprev) ;
 
   &Log ("File production took " . (time-$time_start) . " seconds\n\n") ;
 
   &Log ("[$lines, $files_in_open] $key_low\n") ;
-# close OUT ;
 
   if ($bayes)
   {
-  # close $out_day1 ;
     close $out_day2 ;
-  # close $out_day3 ;
     close $out_filtered ;
 
-#    $cmd = "$path_7za a $file_out2.7z $file_out2" ;
-#    $result = `$cmd` ;
-#    if ($result =~ /Everything is Ok/s)
-#    {
-#      $result =~ s/^.*?(Updating.*?)\n.*$/$1 -> OK/s ;
-#      unlink $file_out2 ;
-#      foreach $file_in (@files_today)
-#      {
-#        print "unlink $dir_in/$file_in\n" ;
-#        unlink "$dir_in/$file_in" ;
-#      }
-#    }
-#    else
-#    {
-#      print "Delete $file_out2.7z\n" ;
-#      unlink "$file_out2.7z" ;
-#    }
-
-
     $time_start_compression = time ;
     $cmd = "bzip2 -9 -v $file_out2" ;
     &Log ("\n\n$cmd ->\n") ;
@@ -582,9 +518,7 @@
   }
   else
   {
-  # $out_day1->close() ;
     $out_day2->close() ;
-  # $out_day3->close() ;
     close $out_filtered ;
   }
 
@@ -740,7 +674,6 @@
     }
   }
 
-
   my $out_month_all = new IO::Compress::Bzip2 "$file_out.bz2" or die "bzip2 
failed for $file_out.bz2: $Bzip2Error\n";
   my $out_month_ge5 = new IO::Compress::Bzip2 "${file_out}_ge5.bz2" or die 
"bzip2 failed for ${file_out}_ge5.bz2: $Bzip2Error\n";
 
@@ -1211,358 +1144,15 @@
 
 
#=============================================================================================================
 
-# snippets obsolete but revivable code / test code
+# 
http://article.gmane.org/gmane.science.linguistics.wikipedia.technical/38154/match=new+statistics+stuff
+# http://svn.wikimedia.org/viewvc/mediawiki/trunk/webstatscollector/
+# https://bugzilla.wikimedia.org/show_bug.cgi?id=13541
+# 
http://de.wikipedia.org/w/api.php?action=query&meta=siteinfo&siprop=general|namespaces|namespacealiases
 
-#sub Compact
-#{
-#  my $day = shift ;
-#  &Log ("Compact files for $day\n") ;
+# Ideas:
+# 1 namespace string -> namespace number ? (may not save much space: compress 
will deal with recurring patterns like these)
+# 2 frequency distribution hits per file per first letter _-> manifest crawler
+#   assuming crawler collects articles in alphabetical order
+# 3 always convert first letter after namespace string to uppercase, then sort 
and merge
 
-#  $file_in  = "pagecounts-$day.out" ;
-#  $file_out1 = "pagecounts-${day}_all.gz" ;
-#  $file_out2 = "pagecounts-${day}_10plus.gz" ;
-#  open IN, "<", $file_in ;
-#  binmode $file_in ;
 
-#  my $out_day1 = IO::Compress::Gzip->new ($file_out1) || &Abort 
("IO::Compress::Gzip failed: $GzipError\n") ;
-#  my $out_day2 = IO::Compress::Gzip->new ($file_out2) || &Abort 
("IO::Compress::Gzip failed: $GzipError\n") ;
-
-#  open OUT, ">", $file_out ;
-#  binmode $file_out ;
-
-#  $lang_prev = "" ;
-#  while ($line = <IN>)
-#  {
-#    chomp ($line) ;
-#    ($lang, $title, $counts) = split (' ', $line) ;
-#    $title2 = $title ;
-#    $title =~ s/\%20/_/g ;
-#    $title =~ s/\%3A/:/g ;
-#  # $title =~ s/\%([0-9A-F]{2})/chr(hex($1))/ge ;
-#  # if ($title =~ /[\x00-\x1F]/)
-#  # { &Log ("> '$title2'\n") ; }
-#    $title =~ s/\x00-\x1F/"%" . sprintf ("%X", ord($1)) ;/ge ;
-#    print $out_day1 "$lang $title $counts\n" ;
-#    ($counts2 = $counts) =~ s/^(\d+).*/$1/ ;
-#    if ($counts2 >= $threshold)
-#    { print $out_day2 "$lang $title $counts\n" ; }
-#    $lang_prev = $lang ;
-#  }
-#
-#  close IN ;
-#  $out_day1->close() ;
-#  $out_day2->close() ;
-#}
-
-
-#sub GetViewDistribution
-#{
-#  open OUT, ">", "Views.csv" ;
-#  foreach $file_in (@files)
-#  {
-#    ($hour = $file_in) =~ s/^pagecounts-\d+-(\d\d)\d+\.gz$/$1/ ;
-#    $hour = chr(ord('A')+$hour) ;
-#    &Log ("Process $hour $file_in\n") ;
-
-#    $in_hour1 = IO::Uncompress::Gunzip->new ($file_in) || &Abort 
("IO::Uncompress::Gunzip failed: $GunzipError\n") ;
-#    while ($line = <$in_hour1>)
-#    {
-#      ($lang,$title,$count,$dummy) = split (' ', $line) ;
-#      if (($lang eq "en") && ($title !~ /:/)) # only en: and namespace 0
-#      {
-#        $tot {$hour} += $count ;
-#        if ($count < 3)
-#        { @counts {$hour . substr ($title,0,1)}++ ; }
-#      }
-#    }
-#    $in_hour1->close () ;
-#  }
-#
-#  print OUT "," ;
-#  foreach $hour ('A'..'X')
-#  {  print OUT $hour . ", " ; }
-#  print OUT "\n" ;
-#
-#  print OUT "," ;
-#  foreach $hour ('A'..'X')
-#  {  print OUT $tot {$hour} . ", " ; }
-#  print OUT "\n" ;
-#
-#  for ($c=0; $c < 256; $c++)
-#  {
-#    # do not print chars " and , as such: confuses csv format
-#    if ($c < 33)
-#    { print OUT "chr($c), " ; }
-#    elsif (chr($c) eq '"')
-#    { print OUT "dquote, " ; }
-#    elsif (chr($c) eq ',')
-#    { print OUT "comma, " ; }
-#    else
-#    { print OUT chr($c) . ", " ; }
-#
-#    foreach $hour ('A'..'X')
-#    {  print OUT (0+@counts {$hour.chr($c)}) , ", " ; }
-#
-#    if ($c < 255)
-#    { print OUT "\n" ; }
-#  }
-#  close OUT ;
-#}
-
-
-#sub RecompactVisitorStats
-#{
-#  my $dir_in = "D:/Wikipedia_Visitors/full_day/" ;
-#  chdir ($dir_in) || &Abort ("Cannot chdir to $dir_in\n") ;
-#  local (*DIR);
-#  opendir (DIR, ".");
-#  @files = () ;
-#  while ($file_in = readdir (DIR))
-#  {
-#    next if $file_in !~ /^pagecounts-\d{8,8}_fd.gz$/ ;
-#
-#    push @files, $file_in ;
-#  }
-
-#  $filecnt = $#files+1 ;
-#  @files = sort { substr ($a, 20,2) <=> substr ($b, 20,2)} @files ;
-
-#  foreach $file (@files)
-#  { &RecompactVisitorStats2 ($file) ; }
-#  closedir (DIR, ".");
-#}
-
-#sub RecompactVisitorStats2
-#{
-## http://www.7-zip.org/7z.html
-#  my $file = shift ;
-#  my $time_start = time ;
-#  my $path_7z  = "D:/Wikipedia_Visitors/7z.exe" ;
-## my $file_in  = 
"D:/Wikipedia_Visitors/full_day/2008-07-pagecounts/pagecounts-20080702_fd.gz" ;
-#  my $file_in  = "D:/Wikipedia_Visitors/full_day/$file" ;
-#  my $file_out ; ($file_out  = $file_in) =~ s/gz$/txt/ ;
-#  my $file_7z ;  ($file_7z  = $file_in) =~ s/gz$/7z/ ;
-
-#  &Log ("Process $file_in\n") ;
-
-#  $in_hour  = IO::Uncompress::Gunzip->new ($file_in) || &Abort 
("IO::Uncompress::Gunzip failed for '$file_in': $GunzipError\n") ;
-#  binmode $in_hour ;
-#  open OUT, ">", $file_out ;
-#  binmode OUT ;
-
-#  my ($title, $title2) ;
-#  while ($line = <$in_hour>)
-#  {
-#    chomp ($line) ;
-#    ($lang,$title,$counts) = split (" ", $line) ;
-
-#    if ($lang ne $lang_prev) { print "$lang " ; }
-#    $lang_prev = $lang ;
-
-#    # test pagecounts-20080701_fd.gz
-#    # all records  424 Mib compressed (1984 uncompressed)
-#    # count > 1    212 Mib compressed ( 733 uncompressed)
-#    # count > 2    169 Mib compressed ( 551 uncompressed)
-#    next if $counts <= 1 ;
-
-#    $title =~ s/%([a-fA-F0-9]{2})/chr(hex($1))/seg;
-#    $title =~ s/\s/_/g;
-#    $lang  =~ s/\.z// ; # remove codes that were added to fix sort sequence
-#    $lang  =~ s/\.y/2/ ;
-
-#    print OUT "$lang $title $counts\n" ;
-#  }
-
-#  print "Close files\n" ;
-#  $in_hour  -> close () ;
-#  close (OUT) ;
-
-#  &Log ("Compress $file_out\n") ;
-
-#  unlink $file_7z ;
-#  $result = `$path_7z a $file_7z $file_out` ;
-#  &Log ("Compressed\n") ;
-#  &Log ("Result " . ($result+0) . " \n") ;
-#  if ((-e $file_7z) && (-s $file_7z > 0) && (($result == 0) || ($result == 
7)))
-#  { unlink $file_out ; }
-
-#  &Log ("Processed in " . (time-$time_start) . " seconds\n\n") ;
-## 0 No error
-## 1 Warning (Non fatal error(s)). For example, one or more files were locked 
by some other application, so they were not compressed.
-## 2 Fatal error
-## 7 Command line error
-## 8 Not enough memory for operation
-## 255 User stopped the process
-#}
-
-
-#sub RecompactVisitorStats3
-#{
-## http://www.7-zip.org/7z.html
-#  my $path_7z  = "D:/Wikipedia_Visitors/7z.exe" ;
-#  my $file_in  = 
"D:/Wikipedia_Visitors/full_day/2008-07-pagecounts/pagecounts-20080702_fd.gz" ;
-#  my $file_out ; ($file_out  = $file_in) =~ s/gz$/txt/ ;
-#  my $file_7z ;  ($file_7z  = $file_in) =~ s/gz$/7z/ ;
-## my $file_log = 
"D:/Wikipedia_Visitors/full_day/2008-07-pagecounts/pagecounts.log" ;
-
-#  $in_hour  = IO::Uncompress::Gunzip->new ($file_in) || &Abort 
("IO::Uncompress::Gunzip failed for '$file_in': $GunzipError\n") ;
-#  binmode $in_hour ;
-## $out_day = IO::Compress::Gzip->new ($file_out) || &Abort 
("IO::Compress::Gzip failed: $GzipError\n") ;
-## binmode $out_day ;
-#  open OUT, ">", $file_out ;
-#  binmode OUT ;
-## open LOG, ">", $file_log ;
-## binmode LOG ;
-
-#  my ($title, $title2) ;
-#  while ($line = <$in_hour>)
-#  {
-#    chomp ($line) ;
-#    ($lang,$title,$counts) = split (" ", $line) ;
-
-#    if ($lang ne $lang_prev) { print "$lang\n" ; }
-##   last if $lang gt "fs" ;
-#    $lang_prev = $lang ;
-
-#    # test pagecounts-20080701_fd.gz
-#    # all records  424 Mib compressed (1984 uncompressed)
-#    # count > 1    212 Mib compressed ( 733 uncompressed)
-#    # count > 2    169 Mib compressed ( 551 uncompressed)
-#    next if $counts <= 1 ;
-
-##   next if $lang !~ /^(?:ar|fr)/ ;
-
-#if ($false)
-#{
-#    $title1b = $title ;
-#    $title1b =~ s/(\%[A-Fa-f0-9]{2})/uc($1)/seg;
-#    $title1b =~ s/\%28/(/g ;
-#    $title1b =~ s/\%29/)/g ;
-#    $title1b =~ s/\%3A/:/g ;
-#    $title1b =~ s/\%2F/\//g ;
-#    $title1b =~ s/\%5C/\\/g ;
-#    $title1b =~ s/\%2A/*/g ;
-#    $title1b =~ s/\%21/!/g ;
-#    $title1b =~ s/\%5F/_/g ;
-#    $title1b =~ s/\%2C/,/g ;
-#    $title1b =~ s/\%2E/./g ;
-#    $title1b =~ s/\%2D/-/g ;
-#    $title1b =~ s/\%25/%/g ;
-#    $title1b =~ s/\%7E/~/g ;
-#    $title1b =~ s/\%27/'/g ;
-#    $title1b =~ s/\%3D/=/g ;
-#    $title1b =~ s/\%26/&/g ;
-#    $title1b =~ s/\%3B/;/g ;
-#    $title1b =~ s/\%3F/?/g ;
-#    $title1b =~ s/\%2B/+/g ;
-#    $title2 = $title1b ;
-#    $title2 =~ s/%([A-F0-9]{2})/chr(hex($1))/seg;
-
-#    if ($title1b ne $title2) # if changed anything at all
-#    {
-#      $title3 = uri_escape ($title2) ;
-#      $title3 =~ s/\%28/(/g ;
-#      $title3 =~ s/\%29/)/g ;
-#      $title3 =~ s/\%3A/:/g ;
-#      $title3 =~ s/\%2F/\//g ;
-#      $title3 =~ s/\%5C/\\/g ;
-#      $title3 =~ s/\%2A/\*/g ;
-#      $title3 =~ s/\%21/\!/g ;
-#      $title3 =~ s/\%5F/\_/g ;
-#      $title3 =~ s/\%2C/,/g ;
-#      $title3 =~ s/\%2E/./g ;
-#      $title3 =~ s/\%2D/-/g ;
-#      $title3 =~ s/\%25/%/g ;
-#      $title3 =~ s/\%7E/~/g ;
-#      $title3 =~ s/\%27/'/g ;
-#      $title3 =~ s/\%3D/=/g ;
-#      $title3 =~ s/\%26/&/g ;
-#      $title3 =~ s/\%3B/;/g ;
-#      $title3 =~ s/\%3F/?/g ;
-#      $title3 =~ s/\%2B/+/g ;
-
-#      if ($title1b eq $title3) # process reversible ?
-#      {
-#        $y++ ;
-#        $title2 =~ s/\s/_/g;
-#        $title = $title2 ;
-#      }
-#      else
-#      {
-#        $n++ ;
-#        print "Y $y N $n\n$title\n$title3\n\n" ;
-#        print LOG "Y $y N $n\n$title\n$title3\n\n" ;
-#      }
-#    }
-#}
-#    $title =~ s/%([a-fA-F0-9]{2})/chr(hex($1))/seg;
-#    $title =~ s/\s/_/g;
-#    $lang  =~ s/\.z// ; # remove codes that were added to fix sort sequence
-#    $lang  =~ s/\.y/2/ ;
-
-#  # print $out_day "$lang $title $counts\n" ;
-#    print OUT "$lang $title $counts\n" ;
-#  }
-
-#  print "Close files\n" ;
-#  $in_hour  -> close () ;
-## $out_day -> close () ;
-#  close (OUT) ;
-#  $result = `$path_7z a $file_out $file_txt` ;
-#  print $result ;
-#}
-
-
-
-# test (partial) reversibility of process
-#sub UncompactVisitorStats
-#{
-#  my $file_in = "out/2009-03/pagecounts-20090301_fdt" ;
-#  my $dir_out = "out" ;
-#  # $in_hour = IO::Uncompress::Gunzip->new ($file_in) || &Abort 
("IO::Uncompress::Gunzip failed for '$file_in': $GunzipError\n") ;
-#  open $in_hour, '<', $file_in ;
-#  binmode $in_hour ;
-
-#  for ($h=0 ; $h<=23 ; $h++)
-#  {
-#    $time = sprintf ("%02d",$h) . "0000" ;
-##   $file_out = "$dir_out/pagecounts-20090301-$time.gz" ;
-#    $file_out = "$dir_out/pagecounts-20090301-$time" ;
-#    open $out_day [$h], '>', $file_out ;
-##    $out_day [$h] = IO::Compress::Gzip->new ($file_out) ||  &Abort 
("IO::Compress::Gzip failed: $GzipError\n");
-#    binmode $out_day [$h] ;
-#  }
-
-#  while ($line = <$in_hour>)
-#  {
-#    next if $line =~ /^#/ ;
-#    next if $line =~ /^@/ ;
-#    chomp ($line) ;
-##   print "$line\n" ;
-#   if ($lines++ > 10000) { exit ; }
-#    ($lang,$title,$counts) = split (" ", $line) ;
-#    $lang =~ s/\.z// ;
-#    $lang =~ s/\.y/2/ ;
-#    $counts =~ s/^\d+// ; # remove (redundant) preceding total
-#    while ($counts ne "")
-#    {
-#      $letter = substr ($counts,0,1) ;
-#      $counts = substr ($counts,1) ;
-#      ($count = $counts) =~ s/^(\d+).*$/$1/ ;
-#      $counts =~ s/^\d+(.*)$/$1/ ;
-#      $h = ord ($letter) - ord ('A') ;
-#      $file = $out_day [$h] ;
-#      $writes {$h} ++ ;
-#      print $file "$lang $title $count\n" ;
-#    }
-
-#  }
-
-#  for ($h=0 ; $h<=23 ; $h++)
-#  {
-##   $out_day [$h] -> close () ;
-#    close $out_day [$h] ;
-#  }
-#}
-
-


_______________________________________________
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

[MediaWiki-CVS] SVN: [109176] trunk/wikistats/dammit.lt/ DammitCompactHourlyOrDailyPageCountFiles.pl

Reply via email to