ArielGlenn has submitted this change and it was merged.

Change subject: Adopt dumpwikidatajson.sh to the new naming pattern
......................................................................


Adopt dumpwikidatajson.sh to the new naming pattern

Also some code style changes.

Bug: T72385
Change-Id: I5df824839e09c5237e22e1b8f4bea54aa6b9a255
---
M modules/snapshot/files/dumpwikidatajson.sh
1 file changed, 33 insertions(+), 9 deletions(-)

Approvals:
  ArielGlenn: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/modules/snapshot/files/dumpwikidatajson.sh 
b/modules/snapshot/files/dumpwikidatajson.sh
index e83c963..0ca7ed8 100644
--- a/modules/snapshot/files/dumpwikidatajson.sh
+++ b/modules/snapshot/files/dumpwikidatajson.sh
@@ -1,19 +1,31 @@
 #!/bin/bash
 #
 # Generate a json dump for Wikidata and remove old ones.
+# To be run weekly.
 #
 # @author Marius Hoch < h...@online.de >
 
-
 configfile="/srv/dumps/confs/wikidump.conf"
 
-apacheDir=`egrep "^dir=" "$configfile" | mawk -Fdir= '{ print $2 }'`
-targetDir=`egrep "^public=" "$configfile" | mawk -Fpublic= '{ print $2 
}'`/other/wikidata
-tempDir=`egrep "^temp=" "$configfile" | mawk -Ftemp= '{ print $2 }'`
+today=`date +'%Y%m%d'`
+apacheDir=`awk -Fdir= '/^dir=/ { print $2 }' "$configfile"`
+targetDirBase=`awk -Fpublic= '/^public=/ { print $2 }' 
"$configfile"`/other/wikibase/wikidatawiki
+targetDir=$targetDirBase/$today
+legacyDirectory=`awk -Fpublic= '/^public=/ { print $2 }' 
"$configfile"`/other/wikidata
+tempDir=`awk -Ftemp= '/^temp=/ { print $2 }' "$configfile"`
+daysToKeep=70
+
+if [ -z "$targetDirBase" ]; then
+       echo "Empty \$targetDirBase"
+       exit 1
+fi
+
+# Create the dir for the day: This may or may not already exist, we don't care
+mkdir -p $targetDir
 
 multiversionscript="${apacheDir}/multiversion/MWScript.php"
 
-filename=`date +'%Y%m%d'`
+filename=wikidata-$today-all
 targetFile=$targetDir/$filename.json.gz
 
 i=0
@@ -26,11 +38,10 @@
 
 wait
 
-i=0
-
 # Open the json list
 echo '[' | gzip -f > $targetFile
 
+i=0
 while [ $i -lt $shards ]; do
        cat $tempDir/wikidataJson.$i.gz >> $targetFile
        rm $tempDir/wikidataJson.$i.gz
@@ -44,8 +55,21 @@
 # Close the json list
 echo -e '\n]' | gzip -f >> $targetFile
 
-# Remove dumps we no longer need (keep 10 => last 70 days)
-find $targetDir -name '20*.gz' -mtime +71 -delete
+# Remove dump-folders we no longer need (keep $daysToKeep days)
+cutOff=$(( `date +%s` - `expr $daysToKeep + 1` * 24 * 3600)) # Timestamp from 
$daysToKeep + 1 days ago
+foldersToDelete=`ls -d -r $targetDirBase/*` # $targetDirBase is known to be 
non-empty
+for folder in $foldersToDelete; do
+       # Try to get the unix time from the folder name, if this fails we'll 
just
+       # keep the folder (as it's not a valid date, thus hasn't been created 
by this script).
+       creationTime=$(date --utc --date="$(basename $folder)" +%s 2>/dev/null)
+       if [ -n "$creationTime" ] && [ "$cutOff" -gt "$creationTime" ]; then
+               rm -rf $folder
+       fi
+done
+
+# Legacy directory (with legacy naming scheme)
+ln -s $targetFile "$legacyDirectory/$today.json"
+find $legacyDirectory -name '*.json.gz' -mtime +`expr $daysToKeep + 1` -delete
 
 # Remove old logs (keep 5 => last 35 days)
 find /var/log/wikidatadump/ -name 'dumpwikidatajson-*-*.log' -mtime +36 -delete

-- 
To view, visit https://gerrit.wikimedia.org/r/201238
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I5df824839e09c5237e22e1b8f4bea54aa6b9a255
Gerrit-PatchSet: 6
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Hoo man <h...@online.de>
Gerrit-Reviewer: ArielGlenn <ar...@wikimedia.org>
Gerrit-Reviewer: Hoo man <h...@online.de>
Gerrit-Reviewer: Smalyshev <smalys...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to