Nikerabbit has uploaded a new change for review. https://gerrit.wikimedia.org/r/118444
Change subject: Fetchers ...................................................................... Fetchers LU rewrite part 3: classes which can fetch files from local file system or over the web. Since there is no standard way to fetch directories over the web (design decision to be able to pick up new languages when json files are used) I also implemented a special fetcher for GitHub that downloads a list of files first and then only fetches the files which exist. Change-Id: Ib712553ea450ca1d8e0219e81dc5147fb53f2128 --- M Autoload.php A fetcher/Fetcher.php A fetcher/FetcherFactory.php A fetcher/FileSystemFetcher.php A fetcher/GitHubFetcher.php A fetcher/HttpFetcher.php 6 files changed, 170 insertions(+), 0 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/LocalisationUpdate refs/changes/44/118444/1 diff --git a/Autoload.php b/Autoload.php index 238b509..25cb86d 100644 --- a/Autoload.php +++ b/Autoload.php @@ -12,6 +12,13 @@ 'LocalisationUpdate' => "$dir/LocalisationUpdate.class.php", 'QuickArrayReader' => "$dir/QuickArrayReader.php", + # fetcher + 'LU_Fetcher' => "$dir/fetcher/Fetcher.php", + 'LU_FetcherFactory' => "$dir/fetcher/FetcherFactory.php", + 'LU_FileSystemFetcher' => "$dir/fetcher/FileSystemFetcher.php", + 'LU_GitHubFetcher' => "$dir/fetcher/GitHubFetcher.php", + 'LU_HttpFetcher' => "$dir/fetcher/HttpFetcher.php", + # finder 'LU_Finder' => "$dir/finder/Finder.php", diff --git a/fetcher/Fetcher.php b/fetcher/Fetcher.php new file mode 100644 index 0000000..9c06c10 --- /dev/null +++ b/fetcher/Fetcher.php @@ -0,0 +1,26 @@ +<?php +/** + * @file + * @author Niklas Laxström + * @license GPL-2.0+ + */ + +/** + * Interface for classes which fetch files over different protocols and ways. + */ +interface LU_Fetcher { + /** + * Fetches a single resource. + * + * @return bool|string False on failure. + */ + public function fetchFile( $url ); + + /** + * Fetch a list of resources. This has the benefit of being able to pick up + * new languages as they appear if languages are stored in separate files. + * + * @return array + */ + public function fetchDirectory( $pattern ); +} diff --git a/fetcher/FetcherFactory.php b/fetcher/FetcherFactory.php new file mode 100644 index 0000000..4e26dc1 --- /dev/null +++ b/fetcher/FetcherFactory.php @@ -0,0 +1,24 @@ +<?php +/** + * @file + * @author Niklas Laxström + * @license GPL-2.0+ + */ + +/** + * Constructs fetchers based on the repository urls. + */ +class LU_FetcherFactory { + public function getFetcher( $path ) { + + if ( strpos( $path, 'https://raw.github.com/' ) === 0 ) { + return new LU_GitHubFetcher(); + } elseif ( strpos( $path, 'http://' ) === 0 ) { + return new LU_HttpFetcher(); + } elseif ( strpos( $path, 'https://' ) === 0 ) { + return new LU_HttpFetcher(); + } else { + return new LU_FileSystemFetcher(); + } + } +} diff --git a/fetcher/FileSystemFetcher.php b/fetcher/FileSystemFetcher.php new file mode 100644 index 0000000..240248f --- /dev/null +++ b/fetcher/FileSystemFetcher.php @@ -0,0 +1,35 @@ +<?php +/** + * @file + * @author Niklas Laxström + * @license GPL-2.0+ + */ + +/** + * Accesses file system directly. + */ +class LU_FileSystemFetcher implements LU_Fetcher { + public function fetchFile( $url ) { + // Remove the protocol prefix + $url = preg_replace( '~file://~', '', $url ); + + if ( !is_readable( $url ) ) { + return false; + } + + return file_get_contents( $url ); + } + + public function fetchDirectory( $pattern ) { + // Remove the protocol prefix + $pattern = preg_replace( '~file://~', '', $pattern ); + + $data = array(); + foreach ( glob( $pattern ) as $file ) { + if ( is_readable( $file ) ) { + $data["file://$file"] = file_get_contents( $file ); + } + } + return $data; + } +} diff --git a/fetcher/GitHubFetcher.php b/fetcher/GitHubFetcher.php new file mode 100644 index 0000000..7c5af32 --- /dev/null +++ b/fetcher/GitHubFetcher.php @@ -0,0 +1,38 @@ +<?php +/** + * @file + * @author Niklas Laxström + * @license GPL-2.0+ + */ + +/** + * This class uses GitHub api to obtain a list of files present in a directory + * to avoid fetching files that don't exist. + * + * @todo Could use file hashes to 1) avoid fetching files with same hash as + * the source. 2) avoid fetching files which haven't changed since last check + * if we store them. + */ +class LU_GitHubFetcher extends LU_HttpFetcher { + + public function fetchDirectory( $pattern ) { + $p = '~github\.com/(?P<org>[^/]+)/(?P<repo>[^/]+)/(?P<branch>[^/]+)/(?P<path>.+)/.+$~'; + preg_match( $p, $pattern, $m ); + + $json = Http::get( "https://api.github.com/repos/{$m['org']}/{$m['repo']}/contents/{$m['path']}" ); + if ( !$json ) { + throw new MWException( "Unable to get directory listing for {$m['org']}/{$m['repo']}" ); + } + + $files = array(); + $json = FormatJson::decode( $json, true ); + foreach ( $json as $fileinfo ) { + $fileurl = dirname( $pattern ) . '/' . $fileinfo['name']; + $file = $this->fetchFile( $fileurl ); + if ( $file ) { + $files[$fileurl] = $file; + } + } + return $files; + } +} diff --git a/fetcher/HttpFetcher.php b/fetcher/HttpFetcher.php new file mode 100644 index 0000000..9dfed8d --- /dev/null +++ b/fetcher/HttpFetcher.php @@ -0,0 +1,40 @@ +<?php +/** + * @file + * @author Niklas Laxström + * @license GPL-2.0+ + */ + +/** + * Fetches files over HTTP(s). + */ +class LU_HttpFetcher implements LU_Fetcher { + public function fetchFile( $url ) { + return Http::get( $url ); + } + + /** + * This is horribly inefficient. Subclasses have more efficient + * implementation of this. + */ + public function fetchDirectory( $pattern ) { + $files = array(); + + $languages = Language::fetchLanguageNames( null, 'mwfile' ); + + foreach( array_keys( $languages ) as $code ) { + // Hack for core + if ( strpos( $pattern, 'Messages*.php' ) !== false ) { + $code = ucfirst( strtr( $code, '-', '_' ) ); + } + + $url = str_replace( '*', $code, $pattern ); + $file = $this->fetchFile( $url ); + if ( $file ) { + $files[$url] = $file; + } + } + + return $files; + } +} -- To view, visit https://gerrit.wikimedia.org/r/118444 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Ib712553ea450ca1d8e0219e81dc5147fb53f2128 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/LocalisationUpdate Gerrit-Branch: json-rewrite Gerrit-Owner: Nikerabbit <niklas.laxst...@gmail.com> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits