Addshore has submitted this change and it was merged.

Change subject: Add and use retryingExternalCurlGet for php scripts
......................................................................


Add and use retryingExternalCurlGet for php scripts

Change-Id: I4bda7ab80a4e3a7020868c8ab3f7b9411744002f
---
M graphite/social/facebook.php
M graphite/social/googleplus.php
M graphite/social/identica.php
M graphite/social/irc.php
M graphite/social/newsletter.php
M graphite/social/twitter.php
M src/WikimediaCurl.php
7 files changed, 42 insertions(+), 24 deletions(-)

Approvals:
  Addshore: Verified; Looks good to me, approved
  jenkins-bot: Verified



diff --git a/graphite/social/facebook.php b/graphite/social/facebook.php
index 8e2b66b..525ca29 100644
--- a/graphite/social/facebook.php
+++ b/graphite/social/facebook.php
@@ -21,7 +21,7 @@
 
        private function getFacebookLikes() {
                $url = 'http://m.facebook.com/wikidata';
-               $response = WikimediaCurl::externalCurlGet( $url );
+               $response = WikimediaCurl::retryingExternalCurlGet( $url );
                preg_match( '/([\d,]+) people like this/i', $response, $matches 
);
                return str_replace( ',', '', $matches[1] );
        }
diff --git a/graphite/social/googleplus.php b/graphite/social/googleplus.php
index e533630..2310068 100644
--- a/graphite/social/googleplus.php
+++ b/graphite/social/googleplus.php
@@ -50,7 +50,7 @@
 
        private function getGooglePlusFollowers( $googlePlusKey ) {
                $url = 
'https://www.googleapis.com/plus/v1/people/105776413863749545202?key=' . 
$googlePlusKey;
-               $response = WikimediaCurl::externalCurlGet( $url );
+               $response = WikimediaCurl::retryingExternalCurlGet( $url );
                return json_decode($response)->{'circledByCount'};
        }
 
diff --git a/graphite/social/identica.php b/graphite/social/identica.php
index bc7b1c7..4e06d7d 100644
--- a/graphite/social/identica.php
+++ b/graphite/social/identica.php
@@ -22,21 +22,7 @@
        private function getIdenticaFollowers() {
                $url = 'https://identi.ca/wikidata';
                $dom = new DomDocument();
-               $response = WikimediaCurl::externalCurlGet( $url );
-
-               /**
-                * identi.ca likes to be unreliable and give us nothing.
-                * So pause and retry once when trying to get the number!
-                */
-               if( empty( $response ) ) {
-                       echo "Got an empty response, retrying in 30 seconds.";
-                       sleep( 30 );
-                       $response = WikimediaCurl::externalCurlGet( $url );
-                       if( empty( $response ) ) {
-                               die( "Got 2 empty responses. Failed!" );
-                       }
-               }
-
+               $response = WikimediaCurl::retryingExternalCurlGet( $url );
                $dom->loadHTML( $response );
                $xpath = new DomXPath($dom);
                $nodes = $xpath->query( 
'//a[@href="/wikidata/followers"]/span[@class="label"]' );
diff --git a/graphite/social/irc.php b/graphite/social/irc.php
index 42c27d9..c4f5c84 100644
--- a/graphite/social/irc.php
+++ b/graphite/social/irc.php
@@ -26,7 +26,7 @@
        }
 
        private function getIrcChannelMembers() {
-               $data = WikimediaCurl::externalCurlGet( 
'http://en.irc2go.com/webchat/?net=freenode&room=wikidata' );
+               $data = WikimediaCurl::retryingExternalCurlGet( 
'http://en.irc2go.com/webchat/?net=freenode&room=wikidata' );
                preg_match_all( '/(\d+) users/', $data, $matches );
                return $matches[1][0];
        }
diff --git a/graphite/social/newsletter.php b/graphite/social/newsletter.php
index a7f1762..baa0c2c 100644
--- a/graphite/social/newsletter.php
+++ b/graphite/social/newsletter.php
@@ -20,7 +20,7 @@
 
        private function getNewsletterSubscribers() {
                $url = 
'https://meta.wikimedia.org/wiki/Global_message_delivery/Targets/Wikidata?action=raw';
-               $raw = WikimediaCurl::externalCurlGet( $url );
+               $raw = WikimediaCurl::retryingExternalCurlGet( $url );
                return substr_count( $raw, '{{target' );
        }
 
diff --git a/graphite/social/twitter.php b/graphite/social/twitter.php
index a37b831..fa46d06 100644
--- a/graphite/social/twitter.php
+++ b/graphite/social/twitter.php
@@ -26,7 +26,7 @@
        private function getTwitterFollowers() {
                $dom = new DomDocument();
                $url = 'https://twitter.com/Wikidata';
-               $response = WikimediaCurl::externalCurlGet( $url );
+               $response = WikimediaCurl::retryingExternalCurlGet( $url );
                $dom->loadHTML( $response );
                $xpath = new DomXPath($dom);
                $nodes = $xpath->query( 
'//a[@data-nav="followers"]/span[@class="ProfileNav-value"]' );
diff --git a/src/WikimediaCurl.php b/src/WikimediaCurl.php
index c4934e4..ea3c57a 100644
--- a/src/WikimediaCurl.php
+++ b/src/WikimediaCurl.php
@@ -8,6 +8,42 @@
 class WikimediaCurl {
 
        /**
+        * Retries an external get at most 9 times with an exponential back off
+        * The final retry wait period will be 640 seconds = 10 mins
+        * Max execution time would thus be 22 mins
+        *
+        * @param string $url
+        *
+        * @return mixed
+        */
+       public static function retryingExternalCurlGet( $url ) {
+               $retriesLeft = 7;
+               $nextWait = 10;
+               $result = false;
+
+               while( $retriesLeft > 0 ) {
+                       $result = self::externalCurlGet( $url );
+                       if( $result !== false && !empty( $result ) ) {
+                               return $result;
+                       }
+
+                       if( $result === false ) {
+                               trigger_error( "curl request failed - sleeping 
for $nextWait seconds", E_WARNING );
+                       } elseif( empty( $result ) ) {
+                               trigger_error( "curl request returned empty - 
sleeping for $nextWait seconds", E_WARNING );
+                       } else {
+                               throw new LogicException( "Retrying request for 
unknown reason" );
+                       }
+
+                       sleep( $nextWait );
+                       $retriesLeft--;
+                       $nextWait = $nextWait * 2;
+               }
+
+               return $result;
+       }
+
+       /**
         * @param string $url
         *
         * @return mixed
@@ -22,10 +58,6 @@
                curl_setopt( $ch, CURLOPT_USERAGENT, "WMDE Wikidata metrics 
gathering" );
                $curl_scraped_page = curl_exec( $ch );
                curl_close( $ch );
-               if ( $curl_scraped_page === false ) {
-                       // TODO Throw exception?
-               }
-
                return $curl_scraped_page;
        }
 

-- 
To view, visit https://gerrit.wikimedia.org/r/254834
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I4bda7ab80a4e3a7020868c8ab3f7b9411744002f
Gerrit-PatchSet: 2
Gerrit-Project: analytics/limn-wikidata-data
Gerrit-Branch: master
Gerrit-Owner: Addshore <addshorew...@gmail.com>
Gerrit-Reviewer: Addshore <addshorew...@gmail.com>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to