http://www.mediawiki.org/wiki/Special:Code/MediaWiki/97361

Revision: 97361
Author:   ezachte
Date:     2011-09-17 02:30:24 +0000 (Sat, 17 Sep 2011)
Log Message:
-----------
extra validation checks + be smarter when to run reporting + rsync uodated files

Modified Paths:
--------------
    trunk/wikistats/dumps/report.sh

Modified: trunk/wikistats/dumps/report.sh
===================================================================
--- trunk/wikistats/dumps/report.sh     2011-09-17 02:29:06 UTC (rev 97360)
+++ trunk/wikistats/dumps/report.sh     2011-09-17 02:30:24 UTC (rev 97361)
@@ -1,27 +1,144 @@
 #!/bin/bash
+
+# Update English reports for project $1 whenever input csv files are newer 
than html reports
+# Update reports for other 25+ languages at most once a month, to economize 
processing time 
+# Whenever Englisgh reports have been updated run archive job
+
+interval=30  # only update non-English reports once per 'interval' days 
+force_run_report=1
+
+function echo2 {
+  echo $1
+  echo $1 >> log_report_sh.txt
+}
+
+clear 
+echo2 "---------------" 
+echo2 "Start report.sh $1 $2"
+date >> log_report_sh.txt
+
+# Validate project code
+if [ "$1" == "" ] ; then
+  echo2 "Project code missing! Specify as 1st argument one of 
wb,wk,wn,wp,wq,ws,wv,wx"
+  exit
+fi  
+ 
+# Abort when 2nd argument specifies a threshold in days, which is not met
+# This prevents costly reporting step when new month has just started and most 
counting still needs to be done
 abort_before=$2
 day_of_month=$(date +"%d")
-if [ $day_of_month -lt ${abort_before:=0} ]
-then     
-  echo report.sh: day of month $day_of_month lt $abort_before - exit
+if [ $day_of_month -lt ${abort_before:=0} ] ; then
+  echo2 "report.sh: day of month $day_of_month lt $abort_before - exit"
   exit
 fi
 
-echo day of month $day_of_month le $abort_before - continue
+if [ "$abort_before" != "" ] ; then
+  echo2 "Day of month $day_of_month le $abort_before - continue"
+fi  
 
-echo "\nStart report.sh $1" >> report.txt
-date >> report.txt
-
+# Once in a while update and cache language names in so many target languages
+# Sources are TranslateWiki and interwiki links on English Wikipedia 
 ./sync_language_files.sh 
 
-for x in en ast bg br ca cs da de eo es fr he hu id it ja nl nn pl pt ro ru sk 
sl sr sv wa zh ;
-#for  x in en ;
-do perl WikiReports.pl -m $1 -l $x -i /a/wikistats/csv_$1/ -o /a/out/out_$1 ;
+do_zip=0 # trigger archive step ?
+
+case "$1" in 
+  wb) project='Wikibooks' ;     dir='wikibooks' ;;
+  wk) project='Wiktionaries' ;  dir='wiktionary' ;;
+  wn) project='Wikinews' ;      dir='wikinews' ;;
+  wp) project='Wikipedias' ;    dir='.' ;;
+  wq) project='Wikiquotes' ;    dir='wikiquote' ;;
+  ws) project='Wikisources' ;   dir='wikisource' ;;
+  wv) project='Wikiversities' ; dir='wikiversity' ;;
+  wx) project='Wikispecial' ;   dir='wikispecial' ;;
+  *)  project='unknown' ;       dir='...' ;; 
+esac  
+echo2 "Generate and publish reports for project $project"
+
+for x in en bg br ca cs da de eo es fr he hu id it ja nl nn pl pt ro ru sk sl 
sr sv wa zh ;
+do
+
+  echo2 ""
+  echo2 "Language code $x"
+
+  # Get timestamp last reports for language x
+  x_upper=$( echo "$x" | tr '[:lower:]' '[:upper:]' )  
+  file="/a/out/out_$1/$x_upper/#index.html"    
+  now=`date +%s`
+  prevrun=`stat -c %Y $file`
+  let secs_out="$now - $prevrun" 
+  let days_out="$secs_out/86400"
+  echo2 "File $file generated $days_out days ago"
+  
+  # Get timestamp for most recent csv files 
+  file="/a/wikistats/csv_$1/StatisticsLog.csv" 
+  now=`date +%s`
+  prevrun=`stat -c %Y $file`
+  let secs_csv="$now - $prevrun" 
+  let days_csv="$secs_csv/86400"
+  echo2 "File $file generated $days_csv days ago"
+  
+  # Set source and destination paths for publishing reports
+  out=/a/out/out_$1/$x_upper/ 
+  htdocs=/mnt/htdocs/$dir/$x_upper
+
+  # Check if reports need to be run now for language x
+  run_report=0
+  if [ $force_run_report -ne 0 ] ; then
+    run_report=1               
+    do_zip=1 
+  else  
+    if [ "$secs_csv" -eq "$secs_out" ] ; then
+      if [ "$force_run_report" -eq 0  ] ; then
+        echo2 "Forced run of reports"                                  
+      else                                                     
+        echo2 "Csv files are newer than reports ... "
+      fi
+
+      if [ "$x" == "en" ] ; then
+        do_zip=1
+        run_report=1
+      else  
+        if [ $days_out -gt $interval ] ; then
+          run_report=1
+        else
+                                                       if [ 
"$force_run_report" -ne 0 ] ; then
+            echo2 "Skip reporting for non-English languages, only update these 
once every $interval days"
+          fi                                                   
+        fi     
+      fi  
+    else
+      echo2 "Reports for language code '$x' are up to date -> skip reporting"  
                
+    fi  
+  fi
+
+  # If reporting needed now, do it now 
+  if [ $run_report -eq 1 ] ; then
+    echo2 "Run reporting for language $x"
+    perl WikiReports.pl -m $1 -l $x -i /a/wikistats/csv_$1/ -o /a/out/out_$1
+
+    echo2 ""
+    echo2 "Copy new and updated files from $out -> $htdocs"
+    rsync -a $out/* $htdocs/ >> log_report_sh.txt
+    echo2 "List files from target folder older than a day"
+    rsync -a $out/* $htdocs/ >> log_report_sh.txt
+    find $htdocs/ -mtime +1 | xargs ls -l # rather than 'ls -l $htdocs'
+  fi
+
 done;
 
+# Generate category overviews (deactivated, reports became too large)
 # perl WikiReports.pl -c -m $1 -l en -i /a/wikistats/csv_$1/ -o /a/out/out_$1 
 
-./zip_out.sh $1
+echo2 ""
 
-echo "Ready" >> report.txt
-date >> report.txt
+# Archive English reports
+if [ $do_zip -eq 1 ] ; then
+  echo2 "Archive new English reports" 
+  ./zip_out.sh $1
+else
+  echo2 "No English reports built. Skip zip phase"                             
                        
+fi  
+
+echo2 ""
+echo2 "Ready" 


_______________________________________________
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

Reply via email to