Nikerabbit has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/118444

Change subject: Fetchers
......................................................................

Fetchers

LU rewrite part 3: classes which can fetch files from local file
system or over the web. Since there is no standard way to fetch
directories over the web (design decision to be able to pick up new
languages when json files are used) I also implemented a special
fetcher for GitHub that downloads a list of files first and then
only fetches the files which exist.

Change-Id: Ib712553ea450ca1d8e0219e81dc5147fb53f2128
---
M Autoload.php
A fetcher/Fetcher.php
A fetcher/FetcherFactory.php
A fetcher/FileSystemFetcher.php
A fetcher/GitHubFetcher.php
A fetcher/HttpFetcher.php
6 files changed, 170 insertions(+), 0 deletions(-)


  git pull 
ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/LocalisationUpdate 
refs/changes/44/118444/1

diff --git a/Autoload.php b/Autoload.php
index 238b509..25cb86d 100644
--- a/Autoload.php
+++ b/Autoload.php
@@ -12,6 +12,13 @@
        'LocalisationUpdate' => "$dir/LocalisationUpdate.class.php",
        'QuickArrayReader' => "$dir/QuickArrayReader.php",
 
+       # fetcher
+       'LU_Fetcher' => "$dir/fetcher/Fetcher.php",
+       'LU_FetcherFactory' => "$dir/fetcher/FetcherFactory.php",
+       'LU_FileSystemFetcher' => "$dir/fetcher/FileSystemFetcher.php",
+       'LU_GitHubFetcher' => "$dir/fetcher/GitHubFetcher.php",
+       'LU_HttpFetcher' => "$dir/fetcher/HttpFetcher.php",
+
        # finder
        'LU_Finder' => "$dir/finder/Finder.php",
 
diff --git a/fetcher/Fetcher.php b/fetcher/Fetcher.php
new file mode 100644
index 0000000..9c06c10
--- /dev/null
+++ b/fetcher/Fetcher.php
@@ -0,0 +1,26 @@
+<?php
+/**
+ * @file
+ * @author Niklas Laxström
+ * @license GPL-2.0+
+ */
+
+/**
+ * Interface for classes which fetch files over different protocols and ways.
+ */
+interface LU_Fetcher {
+       /**
+        * Fetches a single resource.
+        *
+        * @return bool|string False on failure.
+        */
+       public function fetchFile( $url );
+
+       /**
+        * Fetch a list of resources. This has the benefit of being able to 
pick up
+        * new languages as they appear if languages are stored in separate 
files.
+        *
+        * @return array
+        */
+       public function fetchDirectory( $pattern );
+}
diff --git a/fetcher/FetcherFactory.php b/fetcher/FetcherFactory.php
new file mode 100644
index 0000000..4e26dc1
--- /dev/null
+++ b/fetcher/FetcherFactory.php
@@ -0,0 +1,24 @@
+<?php
+/**
+ * @file
+ * @author Niklas Laxström
+ * @license GPL-2.0+
+ */
+
+/**
+ * Constructs fetchers based on the repository urls.
+ */
+class LU_FetcherFactory {
+       public function getFetcher( $path ) {
+
+               if ( strpos( $path, 'https://raw.github.com/' ) === 0 ) {
+                       return new LU_GitHubFetcher();
+               } elseif ( strpos( $path, 'http://' ) === 0 ) {
+                       return new LU_HttpFetcher();
+               } elseif ( strpos( $path, 'https://' ) === 0 ) {
+                       return new LU_HttpFetcher();
+               } else {
+                       return new LU_FileSystemFetcher();
+               }
+       }
+}
diff --git a/fetcher/FileSystemFetcher.php b/fetcher/FileSystemFetcher.php
new file mode 100644
index 0000000..240248f
--- /dev/null
+++ b/fetcher/FileSystemFetcher.php
@@ -0,0 +1,35 @@
+<?php
+/**
+ * @file
+ * @author Niklas Laxström
+ * @license GPL-2.0+
+ */
+
+/**
+ * Accesses file system directly.
+ */
+class LU_FileSystemFetcher implements LU_Fetcher {
+       public function fetchFile( $url ) {
+               // Remove the protocol prefix
+               $url = preg_replace( '~file://~', '', $url );
+
+               if ( !is_readable( $url ) ) {
+                       return false;
+               }
+
+               return file_get_contents( $url );
+       }
+
+       public function fetchDirectory( $pattern ) {
+               // Remove the protocol prefix
+               $pattern = preg_replace( '~file://~', '', $pattern );
+
+               $data = array();
+               foreach ( glob( $pattern ) as $file ) {
+                       if ( is_readable( $file ) ) {
+                               $data["file://$file"] = file_get_contents( 
$file );
+                       }
+               }
+               return $data;
+       }
+}
diff --git a/fetcher/GitHubFetcher.php b/fetcher/GitHubFetcher.php
new file mode 100644
index 0000000..7c5af32
--- /dev/null
+++ b/fetcher/GitHubFetcher.php
@@ -0,0 +1,38 @@
+<?php
+/**
+ * @file
+ * @author Niklas Laxström
+ * @license GPL-2.0+
+ */
+
+/**
+ * This class uses GitHub api to obtain a list of files present in a directory
+ * to avoid fetching files that don't exist.
+ *
+ * @todo Could use file hashes to 1) avoid fetching files with same hash as
+ * the source. 2) avoid fetching files which haven't changed since last check
+ * if we store them.
+ */
+class LU_GitHubFetcher extends LU_HttpFetcher {
+
+       public function fetchDirectory( $pattern ) {
+               $p =  
'~github\.com/(?P<org>[^/]+)/(?P<repo>[^/]+)/(?P<branch>[^/]+)/(?P<path>.+)/.+$~';
+               preg_match( $p, $pattern, $m );
+
+               $json = Http::get( 
"https://api.github.com/repos/{$m['org']}/{$m['repo']}/contents/{$m['path']}" );
+               if ( !$json ) {
+                       throw new MWException( "Unable to get directory listing 
for {$m['org']}/{$m['repo']}" );
+               }
+
+               $files = array();
+               $json = FormatJson::decode( $json, true );
+               foreach ( $json as $fileinfo ) {
+                       $fileurl = dirname( $pattern ) . '/' . 
$fileinfo['name'];
+                       $file = $this->fetchFile( $fileurl );
+                       if ( $file ) {
+                               $files[$fileurl] = $file;
+                       }
+               }
+               return $files;
+       }
+}
diff --git a/fetcher/HttpFetcher.php b/fetcher/HttpFetcher.php
new file mode 100644
index 0000000..9dfed8d
--- /dev/null
+++ b/fetcher/HttpFetcher.php
@@ -0,0 +1,40 @@
+<?php
+/**
+ * @file
+ * @author Niklas Laxström
+ * @license GPL-2.0+
+ */
+
+/**
+ * Fetches files over HTTP(s).
+ */
+class LU_HttpFetcher implements LU_Fetcher {
+       public function fetchFile( $url ) {
+               return Http::get( $url );
+       }
+
+       /**
+        * This is horribly inefficient. Subclasses have more efficient
+        * implementation of this.
+        */
+       public function fetchDirectory( $pattern ) {
+               $files = array();
+
+               $languages = Language::fetchLanguageNames( null, 'mwfile' );
+
+               foreach( array_keys( $languages ) as $code ) {
+                       // Hack for core
+                       if ( strpos( $pattern, 'Messages*.php' ) !== false ) {
+                               $code = ucfirst( strtr( $code, '-', '_' ) );
+                       }
+
+                       $url = str_replace( '*', $code, $pattern );
+                       $file = $this->fetchFile( $url );
+                       if ( $file ) {
+                               $files[$url] = $file;
+                       }
+               }
+
+               return $files;
+       }
+}

-- 
To view, visit https://gerrit.wikimedia.org/r/118444
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ib712553ea450ca1d8e0219e81dc5147fb53f2128
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/LocalisationUpdate
Gerrit-Branch: json-rewrite
Gerrit-Owner: Nikerabbit <niklas.laxst...@gmail.com>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to