jenkins-bot has submitted this change and it was merged. Change subject: Add tracking categories for files with attribution problems ......................................................................
Add tracking categories for files with attribution problems Adds a tracking category to file pages for each of these problems: * no description * no author * no source * no license Needs to be enabled with $wgCommonsMetadataSetTrackingCategories. Depends on core changes I685b285fcc772382993116f7822a832eeecc0681 and I89d9ea1db3a4e6486e77eee940bd438f7753b776. Change-Id: I43ed79b6a54cd31820ecae8139e29c5880f5dd1b Mingle: https://wikimedia.mingle.thoughtworks.com/projects/multimedia/cards/859 --- M CommonsMetadata.php M DataCollector.php M HookHandler.php M i18n/en.json M i18n/qqq.json 5 files changed, 117 insertions(+), 6 deletions(-) Approvals: Gergő Tisza: Looks good to me, approved jenkins-bot: Verified diff --git a/CommonsMetadata.php b/CommonsMetadata.php index 5ffa245..5db1c83 100755 --- a/CommonsMetadata.php +++ b/CommonsMetadata.php @@ -25,6 +25,12 @@ 'descriptionmsg' => 'commonsmetadata-desc', ); +/** + * Set tracking categories on file pages where description, author, source or license cannot be parsed + * @var bool + */ +$wgCommonsMetadataSetTrackingCategories = false; + $wgAutoloadClasses['CommonsMetadata\HookHandler'] = __DIR__ . '/HookHandler.php'; $wgAutoloadClasses['CommonsMetadata\DataCollector'] = __DIR__ . '/DataCollector.php'; $wgAutoloadClasses['CommonsMetadata\DomNavigator'] = __DIR__ . '/DomNavigator.php'; @@ -36,4 +42,11 @@ $wgHooks['GetExtendedMetadata'][] = 'CommonsMetadata\HookHandler::onGetExtendedMetadata'; $wgHooks['ValidateExtendedMetadataCache'][] = 'CommonsMetadata\HookHandler::onValidateExtendedMetadataCache'; +$wgHooks['ContentAlterParserOutput'][] = 'CommonsMetadata\HookHandler::onContentAlterParserOutput'; $wgHooks['UnitTestsList'][] = 'CommonsMetadata\HookHandler::onUnitTestsList'; + + +$wgTrackingCategories[] = 'commonsmetadata-trackingcategory-no-license'; +$wgTrackingCategories[] = 'commonsmetadata-trackingcategory-no-description'; +$wgTrackingCategories[] = 'commonsmetadata-trackingcategory-no-author'; +$wgTrackingCategories[] = 'commonsmetadata-trackingcategory-no-source'; diff --git a/DataCollector.php b/DataCollector.php index 373d38d..b8d7e0d 100755 --- a/DataCollector.php +++ b/DataCollector.php @@ -99,6 +99,38 @@ } /** + * Checks for the presence of metadata needed for attributing the file (author, source, license) + * and returns a list of keys corresponding to problems. + * @param string $descriptionText HTML code of the file description + * @return array one or more of the following keys: + * - no-license - failed to detect a license + * - no-description - failed to detect any image description + * - no-author - failed to detect author name or a custom attribution text + * - no-source - failed to detect the source of the image or a custom attribution text + */ + public function verifyAttributionMetadata( $descriptionText ) { + $problems = array(); + $templateData = $this->templateParser->parsePage( $descriptionText ); + $licenseData = $this->selectLicense( $templateData[TemplateParser::LICENSES_KEY] ); + $informationData = $this->selectInformationTemplate( $templateData[TemplateParser::INFORMATION_FIELDS_KEY] ); + + if ( !$licenseData || empty( $licenseData['LicenseShortName'] ) ) { + $problems[] = 'no-license'; + } + if ( !$informationData || empty( $informationData['ImageDescription'] ) ) { + $problems[] = 'no-description'; + } + if ( !$informationData || empty( $informationData['Artist'] ) && empty( $informationData['Attribution'] ) ) { + $problems[] = 'no-author'; + } + if ( !$informationData || empty( $informationData['Credit'] ) && empty( $informationData['Attribution'] ) ) { + $problems[] = 'no-source'; + } + + return $problems; + } + + /** * @param array $categories * @return array */ diff --git a/HookHandler.php b/HookHandler.php index 75c323e..84f235a 100755 --- a/HookHandler.php +++ b/HookHandler.php @@ -84,6 +84,36 @@ } /** + * Check HTML output of a file page to see if it has all the basic metadata, and add tracking categories + * if it does not. + * @param \Content $content + * @param \Title $title + * @param \ParserOutput $parserOutput + * @return bool this hook handler always returns true. + */ + public static function onContentAlterParserOutput( $content, $title, $parserOutput ) { + global $wgCommonsMetadataSetTrackingCategories; + + if ( + !$wgCommonsMetadataSetTrackingCategories + || !$title->inNamespace( NS_FILE ) + || $content->getModel() !== CONTENT_MODEL_WIKITEXT + ) { + return true; + } + + $language = $content->getContentHandler()->getPageViewLanguage( $title, $content ); + $dataCollector = self::getDataCollector( $language, true ); + + $categoryKeys = $dataCollector->verifyAttributionMetadata( $parserOutput->getText() ); + foreach ( $categoryKeys as $key ) { + $parserOutput->addTrackingCategory( 'commonsmetadata-trackingcategory-' . $key, $title ); + } + + return true; + } + + /** * Hook to add unit tests * @param array $files * @return bool @@ -93,4 +123,24 @@ $files = array_merge( $files, glob( $testDir . DIRECTORY_SEPARATOR . '*Test.php' ) ); return true; } + + /** + * @param Language $lang + * @param bool $singleLang + */ + private static function getDataCollector( Language $lang, $singleLang ) { + $templateParser = new TemplateParser(); + $templateParser->setMultiLanguage( !$singleLang ); + $fallbacks = Language::getFallbacksFor( $lang->getCode() ); + array_unshift( $fallbacks, $lang->getCode() ); + $templateParser->setPriorityLanguages( $fallbacks ); + + $dataCollector = new DataCollector(); + $dataCollector->setLanguage( $lang ); + $dataCollector->setMultiLang( !$singleLang ); + $dataCollector->setTemplateParser( $templateParser ); + $dataCollector->setLicenseParser( new LicenseParser() ); + + return $dataCollector; + } } diff --git a/i18n/en.json b/i18n/en.json index 1b9c42c..273f0e2 100644 --- a/i18n/en.json +++ b/i18n/en.json @@ -1,6 +1,14 @@ { - "@metadata": { - "authors": [] - }, - "commonsmetadata-desc": "Extends the \"extmetadata\" property of the image information API module to include information stored in image description pages that use the templates commonly used on Wikimedia Commons" -} \ No newline at end of file + "@metadata": { + "authors": [] + }, + "commonsmetadata-desc": "Extends the \"extmetadata\" property of the image information API module to include information stored in image description pages that use the templates commonly used on Wikimedia Commons", + "commonsmetadata-trackingcategory-no-license": "Files with no machine-readable license", + "commonsmetadata-trackingcategory-no-license-desc": "The file does not have any [https://commons.wikimedia.org/wiki/Commons:Machine-readable_data machine-readable] license template.", + "commonsmetadata-trackingcategory-no-description": "Files with no machine-readable description", + "commonsmetadata-trackingcategory-no-description-desc": "The file does not have a [https://commons.wikimedia.org/wiki/Commons:Machine-readable_data machine-readable] information template, or its description field is not filled out.", + "commonsmetadata-trackingcategory-no-author": "Files with no machine-readable author", + "commonsmetadata-trackingcategory-no-author-desc": "The file does not have a [https://commons.wikimedia.org/wiki/Commons:Machine-readable_data machine-readable] information template, or its author field is not filled out.", + "commonsmetadata-trackingcategory-no-source": "Files with no machine-readable source", + "commonsmetadata-trackingcategory-no-source-desc": "The file does not have a [https://commons.wikimedia.org/wiki/Commons:Machine-readable_data machine-readable] information template, or its source field is not filled out." +} diff --git a/i18n/qqq.json b/i18n/qqq.json index d09eaab..3f93512 100644 --- a/i18n/qqq.json +++ b/i18n/qqq.json @@ -4,5 +4,13 @@ "Bawolff" ] }, - "commonsmetadata-desc": "{{desc}}" + "commonsmetadata-desc": "{{desc}}", + "commonsmetadata-trackingcategory-no-license": "Name of the tracking category for files with no machine-readable license", + "commonsmetadata-trackingcategory-no-license-desc": "Description of the inclusion criteria for the tracking category for files with no machine-readable license", + "commonsmetadata-trackingcategory-no-description": "Name of the tracking category for files with no machine-readable description", + "commonsmetadata-trackingcategory-no-description-desc": "Description of the inclusion criteria for the tracking category for files with no machine-readable license", + "commonsmetadata-trackingcategory-no-author": "Name of the tracking category for files with no machine-readable author", + "commonsmetadata-trackingcategory-no-author-desc": "Description of the inclusion criteria for the tracking category for files with no machine-readable license", + "commonsmetadata-trackingcategory-no-source": "Name of the tracking category for files with no machine-readable source", + "commonsmetadata-trackingcategory-no-source-desc": "Description of the the inclusion criteria for tracking category for files with no machine-readable license" } -- To view, visit https://gerrit.wikimedia.org/r/160580 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I43ed79b6a54cd31820ecae8139e29c5880f5dd1b Gerrit-PatchSet: 6 Gerrit-Project: mediawiki/extensions/CommonsMetadata Gerrit-Branch: master Gerrit-Owner: Gergő Tisza <[email protected]> Gerrit-Reviewer: Brian Wolff <[email protected]> Gerrit-Reviewer: Gergő Tisza <[email protected]> Gerrit-Reviewer: Gilles <[email protected]> Gerrit-Reviewer: Guillom <[email protected]> Gerrit-Reviewer: Siebrand <[email protected]> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list [email protected] https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits
