Addshore has submitted this change and it was merged. Change subject: Add and use retryingExternalCurlGet for php scripts ......................................................................
Add and use retryingExternalCurlGet for php scripts Change-Id: I4bda7ab80a4e3a7020868c8ab3f7b9411744002f --- M graphite/social/facebook.php M graphite/social/googleplus.php M graphite/social/identica.php M graphite/social/irc.php M graphite/social/newsletter.php M graphite/social/twitter.php M src/WikimediaCurl.php 7 files changed, 42 insertions(+), 24 deletions(-) Approvals: Addshore: Verified; Looks good to me, approved jenkins-bot: Verified diff --git a/graphite/social/facebook.php b/graphite/social/facebook.php index 8e2b66b..525ca29 100644 --- a/graphite/social/facebook.php +++ b/graphite/social/facebook.php @@ -21,7 +21,7 @@ private function getFacebookLikes() { $url = 'http://m.facebook.com/wikidata'; - $response = WikimediaCurl::externalCurlGet( $url ); + $response = WikimediaCurl::retryingExternalCurlGet( $url ); preg_match( '/([\d,]+) people like this/i', $response, $matches ); return str_replace( ',', '', $matches[1] ); } diff --git a/graphite/social/googleplus.php b/graphite/social/googleplus.php index e533630..2310068 100644 --- a/graphite/social/googleplus.php +++ b/graphite/social/googleplus.php @@ -50,7 +50,7 @@ private function getGooglePlusFollowers( $googlePlusKey ) { $url = 'https://www.googleapis.com/plus/v1/people/105776413863749545202?key=' . $googlePlusKey; - $response = WikimediaCurl::externalCurlGet( $url ); + $response = WikimediaCurl::retryingExternalCurlGet( $url ); return json_decode($response)->{'circledByCount'}; } diff --git a/graphite/social/identica.php b/graphite/social/identica.php index bc7b1c7..4e06d7d 100644 --- a/graphite/social/identica.php +++ b/graphite/social/identica.php @@ -22,21 +22,7 @@ private function getIdenticaFollowers() { $url = 'https://identi.ca/wikidata'; $dom = new DomDocument(); - $response = WikimediaCurl::externalCurlGet( $url ); - - /** - * identi.ca likes to be unreliable and give us nothing. - * So pause and retry once when trying to get the number! - */ - if( empty( $response ) ) { - echo "Got an empty response, retrying in 30 seconds."; - sleep( 30 ); - $response = WikimediaCurl::externalCurlGet( $url ); - if( empty( $response ) ) { - die( "Got 2 empty responses. Failed!" ); - } - } - + $response = WikimediaCurl::retryingExternalCurlGet( $url ); $dom->loadHTML( $response ); $xpath = new DomXPath($dom); $nodes = $xpath->query( '//a[@href="/wikidata/followers"]/span[@class="label"]' ); diff --git a/graphite/social/irc.php b/graphite/social/irc.php index 42c27d9..c4f5c84 100644 --- a/graphite/social/irc.php +++ b/graphite/social/irc.php @@ -26,7 +26,7 @@ } private function getIrcChannelMembers() { - $data = WikimediaCurl::externalCurlGet( 'http://en.irc2go.com/webchat/?net=freenode&room=wikidata' ); + $data = WikimediaCurl::retryingExternalCurlGet( 'http://en.irc2go.com/webchat/?net=freenode&room=wikidata' ); preg_match_all( '/(\d+) users/', $data, $matches ); return $matches[1][0]; } diff --git a/graphite/social/newsletter.php b/graphite/social/newsletter.php index a7f1762..baa0c2c 100644 --- a/graphite/social/newsletter.php +++ b/graphite/social/newsletter.php @@ -20,7 +20,7 @@ private function getNewsletterSubscribers() { $url = 'https://meta.wikimedia.org/wiki/Global_message_delivery/Targets/Wikidata?action=raw'; - $raw = WikimediaCurl::externalCurlGet( $url ); + $raw = WikimediaCurl::retryingExternalCurlGet( $url ); return substr_count( $raw, '{{target' ); } diff --git a/graphite/social/twitter.php b/graphite/social/twitter.php index a37b831..fa46d06 100644 --- a/graphite/social/twitter.php +++ b/graphite/social/twitter.php @@ -26,7 +26,7 @@ private function getTwitterFollowers() { $dom = new DomDocument(); $url = 'https://twitter.com/Wikidata'; - $response = WikimediaCurl::externalCurlGet( $url ); + $response = WikimediaCurl::retryingExternalCurlGet( $url ); $dom->loadHTML( $response ); $xpath = new DomXPath($dom); $nodes = $xpath->query( '//a[@data-nav="followers"]/span[@class="ProfileNav-value"]' ); diff --git a/src/WikimediaCurl.php b/src/WikimediaCurl.php index c4934e4..ea3c57a 100644 --- a/src/WikimediaCurl.php +++ b/src/WikimediaCurl.php @@ -8,6 +8,42 @@ class WikimediaCurl { /** + * Retries an external get at most 9 times with an exponential back off + * The final retry wait period will be 640 seconds = 10 mins + * Max execution time would thus be 22 mins + * + * @param string $url + * + * @return mixed + */ + public static function retryingExternalCurlGet( $url ) { + $retriesLeft = 7; + $nextWait = 10; + $result = false; + + while( $retriesLeft > 0 ) { + $result = self::externalCurlGet( $url ); + if( $result !== false && !empty( $result ) ) { + return $result; + } + + if( $result === false ) { + trigger_error( "curl request failed - sleeping for $nextWait seconds", E_WARNING ); + } elseif( empty( $result ) ) { + trigger_error( "curl request returned empty - sleeping for $nextWait seconds", E_WARNING ); + } else { + throw new LogicException( "Retrying request for unknown reason" ); + } + + sleep( $nextWait ); + $retriesLeft--; + $nextWait = $nextWait * 2; + } + + return $result; + } + + /** * @param string $url * * @return mixed @@ -22,10 +58,6 @@ curl_setopt( $ch, CURLOPT_USERAGENT, "WMDE Wikidata metrics gathering" ); $curl_scraped_page = curl_exec( $ch ); curl_close( $ch ); - if ( $curl_scraped_page === false ) { - // TODO Throw exception? - } - return $curl_scraped_page; } -- To view, visit https://gerrit.wikimedia.org/r/254834 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I4bda7ab80a4e3a7020868c8ab3f7b9411744002f Gerrit-PatchSet: 2 Gerrit-Project: analytics/limn-wikidata-data Gerrit-Branch: master Gerrit-Owner: Addshore <addshorew...@gmail.com> Gerrit-Reviewer: Addshore <addshorew...@gmail.com> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits