jenkins-bot has submitted this change and it was merged.

Change subject: Add tracking categories for files with attribution problems
......................................................................


Add tracking categories for files with attribution problems

Adds a tracking category to file pages for each of these problems:
* no description
* no author
* no source
* no license

Needs to be enabled with $wgCommonsMetadataSetTrackingCategories.

Depends on core changes I685b285fcc772382993116f7822a832eeecc0681
and I89d9ea1db3a4e6486e77eee940bd438f7753b776.

Change-Id: I43ed79b6a54cd31820ecae8139e29c5880f5dd1b
Mingle: https://wikimedia.mingle.thoughtworks.com/projects/multimedia/cards/859
---
M CommonsMetadata.php
M DataCollector.php
M HookHandler.php
M i18n/en.json
M i18n/qqq.json
5 files changed, 117 insertions(+), 6 deletions(-)

Approvals:
  Gergő Tisza: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/CommonsMetadata.php b/CommonsMetadata.php
index 5ffa245..5db1c83 100755
--- a/CommonsMetadata.php
+++ b/CommonsMetadata.php
@@ -25,6 +25,12 @@
        'descriptionmsg' => 'commonsmetadata-desc',
 );
 
+/**
+ * Set tracking categories on file pages where description, author, source or 
license cannot be parsed
+ * @var bool
+ */
+$wgCommonsMetadataSetTrackingCategories = false;
+
 $wgAutoloadClasses['CommonsMetadata\HookHandler'] = __DIR__ . 
'/HookHandler.php';
 $wgAutoloadClasses['CommonsMetadata\DataCollector'] = __DIR__ . 
'/DataCollector.php';
 $wgAutoloadClasses['CommonsMetadata\DomNavigator'] = __DIR__ . 
'/DomNavigator.php';
@@ -36,4 +42,11 @@
 
 $wgHooks['GetExtendedMetadata'][] = 
'CommonsMetadata\HookHandler::onGetExtendedMetadata';
 $wgHooks['ValidateExtendedMetadataCache'][] = 
'CommonsMetadata\HookHandler::onValidateExtendedMetadataCache';
+$wgHooks['ContentAlterParserOutput'][] = 
'CommonsMetadata\HookHandler::onContentAlterParserOutput';
 $wgHooks['UnitTestsList'][] = 'CommonsMetadata\HookHandler::onUnitTestsList';
+
+
+$wgTrackingCategories[] = 'commonsmetadata-trackingcategory-no-license';
+$wgTrackingCategories[] = 'commonsmetadata-trackingcategory-no-description';
+$wgTrackingCategories[] = 'commonsmetadata-trackingcategory-no-author';
+$wgTrackingCategories[] = 'commonsmetadata-trackingcategory-no-source';
diff --git a/DataCollector.php b/DataCollector.php
index 373d38d..b8d7e0d 100755
--- a/DataCollector.php
+++ b/DataCollector.php
@@ -99,6 +99,38 @@
        }
 
        /**
+        * Checks for the presence of metadata needed for attributing the file 
(author, source, license)
+        * and returns a list of keys corresponding to problems.
+        * @param string $descriptionText HTML code of the file description
+        * @return array one or more of the following keys:
+        *  - no-license - failed to detect a license
+        *  - no-description - failed to detect any image description
+        *  - no-author - failed to detect author name or a custom attribution 
text
+        *  - no-source - failed to detect the source of the image or a custom 
attribution text
+        */
+       public function verifyAttributionMetadata( $descriptionText ) {
+               $problems = array();
+               $templateData = $this->templateParser->parsePage( 
$descriptionText );
+               $licenseData = $this->selectLicense( 
$templateData[TemplateParser::LICENSES_KEY] );
+               $informationData = $this->selectInformationTemplate( 
$templateData[TemplateParser::INFORMATION_FIELDS_KEY] );
+
+               if ( !$licenseData || empty( $licenseData['LicenseShortName'] ) 
) {
+                       $problems[] = 'no-license';
+               }
+               if ( !$informationData || empty( 
$informationData['ImageDescription'] ) ) {
+                       $problems[] = 'no-description';
+               }
+               if ( !$informationData || empty( $informationData['Artist'] ) 
&& empty( $informationData['Attribution'] ) ) {
+                       $problems[] = 'no-author';
+               }
+               if ( !$informationData || empty( $informationData['Credit'] ) 
&& empty( $informationData['Attribution'] ) ) {
+                       $problems[] = 'no-source';
+               }
+
+               return $problems;
+       }
+
+       /**
         * @param array $categories
         * @return array
         */
diff --git a/HookHandler.php b/HookHandler.php
index 75c323e..84f235a 100755
--- a/HookHandler.php
+++ b/HookHandler.php
@@ -84,6 +84,36 @@
        }
 
        /**
+        * Check HTML output of a file page to see if it has all the basic 
metadata, and add tracking categories
+        * if it does not.
+        * @param \Content $content
+        * @param \Title $title
+        * @param \ParserOutput $parserOutput
+        * @return bool this hook handler always returns true.
+        */
+       public static function onContentAlterParserOutput( $content, $title, 
$parserOutput ) {
+               global $wgCommonsMetadataSetTrackingCategories;
+
+               if (
+                       !$wgCommonsMetadataSetTrackingCategories
+                       || !$title->inNamespace( NS_FILE )
+                       || $content->getModel() !== CONTENT_MODEL_WIKITEXT
+               ) {
+                       return true;
+               }
+
+               $language = $content->getContentHandler()->getPageViewLanguage( 
$title, $content );
+               $dataCollector = self::getDataCollector( $language, true );
+
+               $categoryKeys = $dataCollector->verifyAttributionMetadata( 
$parserOutput->getText() );
+               foreach ( $categoryKeys as $key ) {
+                       $parserOutput->addTrackingCategory( 
'commonsmetadata-trackingcategory-' . $key, $title );
+               }
+
+               return true;
+       }
+
+       /**
         * Hook to add unit tests
         * @param array $files
         * @return bool
@@ -93,4 +123,24 @@
                $files = array_merge( $files, glob( $testDir . 
DIRECTORY_SEPARATOR . '*Test.php' ) );
                return true;
        }
+
+       /**
+        * @param Language $lang
+        * @param bool $singleLang
+        */
+       private static function getDataCollector( Language $lang, $singleLang ) 
{
+               $templateParser = new TemplateParser();
+               $templateParser->setMultiLanguage( !$singleLang );
+               $fallbacks = Language::getFallbacksFor( $lang->getCode() );
+               array_unshift( $fallbacks, $lang->getCode() );
+               $templateParser->setPriorityLanguages( $fallbacks );
+
+               $dataCollector = new DataCollector();
+               $dataCollector->setLanguage( $lang );
+               $dataCollector->setMultiLang( !$singleLang );
+               $dataCollector->setTemplateParser( $templateParser );
+               $dataCollector->setLicenseParser( new LicenseParser() );
+
+               return $dataCollector;
+       }
 }
diff --git a/i18n/en.json b/i18n/en.json
index 1b9c42c..273f0e2 100644
--- a/i18n/en.json
+++ b/i18n/en.json
@@ -1,6 +1,14 @@
 {
-    "@metadata": {
-        "authors": []
-    },
-    "commonsmetadata-desc": "Extends the \"extmetadata\" property of the image 
information API module to include information stored in image description pages 
that use the templates commonly used on Wikimedia Commons"
-}
\ No newline at end of file
+       "@metadata": {
+               "authors": []
+       },
+       "commonsmetadata-desc": "Extends the \"extmetadata\" property of the 
image information API module to include information stored in image description 
pages that use the templates commonly used on Wikimedia Commons",
+       "commonsmetadata-trackingcategory-no-license": "Files with no 
machine-readable license",
+       "commonsmetadata-trackingcategory-no-license-desc": "The file does not 
have any [https://commons.wikimedia.org/wiki/Commons:Machine-readable_data 
machine-readable] license template.",
+       "commonsmetadata-trackingcategory-no-description": "Files with no 
machine-readable description",
+       "commonsmetadata-trackingcategory-no-description-desc": "The file does 
not have a [https://commons.wikimedia.org/wiki/Commons:Machine-readable_data 
machine-readable] information template, or its description field is not filled 
out.",
+       "commonsmetadata-trackingcategory-no-author": "Files with no 
machine-readable author",
+       "commonsmetadata-trackingcategory-no-author-desc": "The file does not 
have a [https://commons.wikimedia.org/wiki/Commons:Machine-readable_data 
machine-readable] information template, or its author field is not filled out.",
+       "commonsmetadata-trackingcategory-no-source": "Files with no 
machine-readable source",
+       "commonsmetadata-trackingcategory-no-source-desc": "The file does not 
have a [https://commons.wikimedia.org/wiki/Commons:Machine-readable_data 
machine-readable] information template, or its source field is not filled out."
+}
diff --git a/i18n/qqq.json b/i18n/qqq.json
index d09eaab..3f93512 100644
--- a/i18n/qqq.json
+++ b/i18n/qqq.json
@@ -4,5 +4,13 @@
                        "Bawolff"
                ]
        },
-       "commonsmetadata-desc": "{{desc}}"
+       "commonsmetadata-desc": "{{desc}}",
+       "commonsmetadata-trackingcategory-no-license": "Name of the tracking 
category for files with no machine-readable license",
+       "commonsmetadata-trackingcategory-no-license-desc": "Description of the 
inclusion criteria for the tracking category for files with no machine-readable 
license",
+       "commonsmetadata-trackingcategory-no-description": "Name of the 
tracking category for files with no machine-readable description",
+       "commonsmetadata-trackingcategory-no-description-desc": "Description of 
the inclusion criteria for the tracking category for files with no 
machine-readable license",
+       "commonsmetadata-trackingcategory-no-author": "Name of the tracking 
category for files with no machine-readable author",
+       "commonsmetadata-trackingcategory-no-author-desc": "Description of the 
inclusion criteria for the tracking category for files with no machine-readable 
license",
+       "commonsmetadata-trackingcategory-no-source": "Name of the tracking 
category for files with no machine-readable source",
+       "commonsmetadata-trackingcategory-no-source-desc": "Description of the 
the inclusion criteria for tracking category for files with no machine-readable 
license"
 }

-- 
To view, visit https://gerrit.wikimedia.org/r/160580
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I43ed79b6a54cd31820ecae8139e29c5880f5dd1b
Gerrit-PatchSet: 6
Gerrit-Project: mediawiki/extensions/CommonsMetadata
Gerrit-Branch: master
Gerrit-Owner: Gergő Tisza <[email protected]>
Gerrit-Reviewer: Brian Wolff <[email protected]>
Gerrit-Reviewer: Gergő Tisza <[email protected]>
Gerrit-Reviewer: Gilles <[email protected]>
Gerrit-Reviewer: Guillom <[email protected]>
Gerrit-Reviewer: Siebrand <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to