jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/348496 )
Change subject: Make filters thresholds more configurable ...................................................................... Make filters thresholds more configurable * Supports numerical values, test_stats entries, or 'false' to disable a level * Introduced very likely badfaith * Reword messages to omit specific numerical targets * Set better default values Bug: T162760 Change-Id: I6b839a6a8644637cfff6fbc83bb18952d97ad9f2 --- M extension.json M i18n/en.json M i18n/qqq.json M includes/Hooks.php M includes/Stats.php M tests/phpunit/includes/StatsTest.php 6 files changed, 283 insertions(+), 408 deletions(-) Approvals: Catrope: Looks good to me, approved jenkins-bot: Verified diff --git a/extension.json b/extension.json index 325ab6c..fd688c3 100644 --- a/extension.json +++ b/extension.json @@ -138,16 +138,18 @@ }, "OresFiltersThresholds": { "damaging": { - "likelygood": { "min": 0, "max": 0.55 }, - "maybebad": { "min": 0.16, "max": 1 }, - "likelybad": { "min": 0.75, "max": 1 }, - "verylikelybad": { "min": 0.92, "max": 1 } + "likelygood": { "min": 0, "max": "recall_at_precision(min_precision=0.995)" }, + "maybebad": { "min": "filter_rate_at_recall(min_recall=0.9)", "max": 1 }, + "likelybad": { "min": "recall_at_precision(min_precision=0.6)", "max": 1 }, + "verylikelybad": { "min": "recall_at_precision(min_precision=0.9)", "max": 1 } }, "goodfaith": { - "good": { "min": 0.35, "max": 1 }, - "maybebad": { "min": 0, "max": 0.65 }, - "bad": { "min": 0, "max": 0.15 } - } + "likelygood": { "min": "recall_at_precision(min_precision=0.995)", "max": 1 }, + "maybebad": { "min": 0, "max": "filter_rate_at_recall(min_recall=0.9)" }, + "likelybad": { "min": 0, "max": "recall_at_precision(min_precision=0.6)" }, + "verylikelybad": false + }, + "_merge_strategy": "array_plus_2d" }, "OresEnabledNamespaces": {}, "OresWikiId": null, diff --git a/i18n/en.json b/i18n/en.json index f8904b3..e5012ec 100644 --- a/i18n/en.json +++ b/i18n/en.json @@ -28,9 +28,9 @@ "ores-rcfilters-damaging-maybebad-label": "May have problems", "ores-rcfilters-damaging-maybebad-desc": "Finds most flawed or damaging edits but with lower accuracy.", "ores-rcfilters-damaging-likelybad-label": "Likely have problems", - "ores-rcfilters-damaging-likelybad-desc": "Finds half of flawed or damaging edits with medium accuracy.", + "ores-rcfilters-damaging-likelybad-desc": "With medium accuracy, finds more problem edits than the \"Very Likely\" filter but fewer than \"May.\"", "ores-rcfilters-damaging-verylikelybad-label": "Very likely have problems", - "ores-rcfilters-damaging-verylikelybad-desc": "Highly accurate at finding the most obvious 10% of flawed or damaging edits.", + "ores-rcfilters-damaging-verylikelybad-desc": "Highly accurate at finding the most obvious flawed or damaging edits.", "ores-rcfilters-goodfaith-title": "User intent predictions", "ores-rcfilters-goodfaith-whats-this-header": "About user intent predictions", "ores-rcfilters-goodfaith-whats-this-body": "These predictions about users' good faith are made by a machine-learning service trained on a large set of edits scored by human editors. Stricter, more accurate filters find fewer false positives but miss more of their target. Less accurate filters find more of their target, but they also find more false positives.", @@ -40,7 +40,9 @@ "ores-rcfilters-goodfaith-maybebad-label": "May be bad faith", "ores-rcfilters-goodfaith-maybebad-desc": "Finds most bad-faith edits but with a lower accuracy.", "ores-rcfilters-goodfaith-bad-label": "Likely bad faith", - "ores-rcfilters-goodfaith-bad-desc": "With medium accuracy, finds the most obvious 25% of bad-faith edits.", + "ores-rcfilters-goodfaith-bad-desc": "With medium accuracy, finds more bad-faith edits than the \"Very likely\" filter but fewer than \"May.\"", + "ores-rcfilters-goodfaith-verylikelybad-label": "Very likely bad faith", + "ores-rcfilters-goodfaith-verylikelybad-desc": "Highly accurate at finding the most obvious bad faith edits.", "ores-pref-highlight": "Highlight likely problem edits with colors and an \"{{int:ores-damaging-letter}}\" for \"needs review\"", "ores-pref-rc-hidenondamaging": "Show only likely problem edits (and hide probably good edits)", "ores-pref-watchlist-hidenondamaging": "Show only likely problem edits (and hide probably good edits)", diff --git a/i18n/qqq.json b/i18n/qqq.json index f965d13..91b7aa7 100644 --- a/i18n/qqq.json +++ b/i18n/qqq.json @@ -47,6 +47,8 @@ "ores-rcfilters-goodfaith-maybebad-desc": "Description for the filter for showing edits that are somewhat likely to be bad faith.", "ores-rcfilters-goodfaith-bad-label": "Label for the filter for showing edits that are likely to be bad faith.", "ores-rcfilters-goodfaith-bad-desc": "Description for the filter for showing edits that are likely to be bad faith.", + "ores-rcfilters-goodfaith-verylikelybad-label": "Label for the filter for showing edits that are very likely to be bad faith.", + "ores-rcfilters-goodfaith-verylikelybad-desc": "Description for the filter for showing edits that are very likely to be bad faith.", "ores-pref-highlight": "Display message for user preference to enable highlighting of damaging edits.", "ores-pref-rc-hidenondamaging": "Display message for user preferences to make hidenondamaging default in recent changes", "ores-pref-watchlist-hidenondamaging": "Display message for user preferences to make hidenondamaging default in the watchlist", diff --git a/includes/Hooks.php b/includes/Hooks.php index a136e51..f017a38 100644 --- a/includes/Hooks.php +++ b/includes/Hooks.php @@ -98,90 +98,110 @@ if ( self::isModelEnabled( 'damaging' ) ) { $damagingLevels = $stats->getThresholds( 'damaging' ); - $newDamagingGroup = new ChangesListStringOptionsFilterGroup( [ - 'name' => 'damaging', - 'title' => 'ores-rcfilters-damaging-title', - 'whatsThisHeader' => 'ores-rcfilters-damaging-whats-this-header', - 'whatsThisBody' => 'ores-rcfilters-damaging-whats-this-body', - 'whatsThisUrl' => 'https://www.mediawiki.org/wiki/' . - 'Special:MyLanguage/Help:New_filters_for_edit_review/Quality_and_Intent_Filters', - 'whatsThisLinkText' => 'ores-rcfilters-whats-this-link-text', - 'priority' => 2, - 'filters' => [ - [ - 'name' => 'likelygood', - 'label' => 'ores-rcfilters-damaging-likelygood-label', - 'description' => 'ores-rcfilters-damaging-likelygood-desc', - 'cssClassSuffix' => 'damaging-likelygood', - 'isRowApplicableCallable' => self::makeApplicableCallback( - 'damaging', - $damagingLevels['likelygood'] - ), - ], - [ - 'name' => 'maybebad', - 'label' => 'ores-rcfilters-damaging-maybebad-label', - 'description' => 'ores-rcfilters-damaging-maybebad-desc', - 'cssClassSuffix' => 'damaging-maybebad', - 'isRowApplicableCallable' => self::makeApplicableCallback( - 'damaging', - $damagingLevels['maybebad'] - ), - ], - [ - 'name' => 'likelybad', - 'label' => 'ores-rcfilters-damaging-likelybad-label', - 'description' => 'ores-rcfilters-damaging-likelybad-desc', - 'cssClassSuffix' => 'damaging-likelybad', - 'isRowApplicableCallable' => self::makeApplicableCallback( - 'damaging', - $damagingLevels['likelybad'] - ), - ], - [ - 'name' => 'verylikelybad', - 'label' => 'ores-rcfilters-damaging-verylikelybad-label', - 'description' => 'ores-rcfilters-damaging-verylikelybad-desc', - 'cssClassSuffix' => 'damaging-verylikelybad', - 'isRowApplicableCallable' => self::makeApplicableCallback( - 'damaging', - $damagingLevels['verylikelybad'] - ), - ], - ], - 'default' => ChangesListStringOptionsFilterGroup::NONE, - 'isFullCoverage' => false, - 'queryCallable' => function ( $specialClassName, $ctx, $dbr, &$tables, &$fields, - &$conds, &$query_options, &$join_conds, $selectedValues ) { - $condition = self::buildRangeFilter( 'damaging', $selectedValues ); - if ( $condition ) { - $conds[] = $condition; - $join_conds['ores_damaging_mdl'][0] = 'INNER JOIN'; - $join_conds['ores_damaging_cls'][0] = 'INNER JOIN'; - // Performance hack: add STRAIGHT_JOIN (146111) - $query_options[] = 'STRAIGHT_JOIN'; - } - }, - ] ); + $filters = []; + if ( isset( $damagingLevels[ 'likelygood' ] ) ) { + $filters[] = [ + 'name' => 'likelygood', + 'label' => 'ores-rcfilters-damaging-likelygood-label', + 'description' => 'ores-rcfilters-damaging-likelygood-desc', + 'cssClassSuffix' => 'damaging-likelygood', + 'isRowApplicableCallable' => self::makeApplicableCallback( + 'damaging', + $damagingLevels['likelygood'] + ), + ]; + } + if ( isset( $damagingLevels[ 'maybebad' ] ) ) { + $filters[] = [ + 'name' => 'maybebad', + 'label' => 'ores-rcfilters-damaging-maybebad-label', + 'description' => 'ores-rcfilters-damaging-maybebad-desc', + 'cssClassSuffix' => 'damaging-maybebad', + 'isRowApplicableCallable' => self::makeApplicableCallback( + 'damaging', + $damagingLevels['maybebad'] + ), + ]; + } + if ( isset( $damagingLevels[ 'likelybad' ] ) ) { + $filters[] = [ + 'name' => 'likelybad', + 'label' => 'ores-rcfilters-damaging-likelybad-label', + 'description' => 'ores-rcfilters-damaging-likelybad-desc', + 'cssClassSuffix' => 'damaging-likelybad', + 'isRowApplicableCallable' => self::makeApplicableCallback( + 'damaging', + $damagingLevels['likelybad'] + ), + ]; + } + if ( isset( $damagingLevels[ 'verylikelybad' ] ) ) { + $filters[] = [ + 'name' => 'verylikelybad', + 'label' => 'ores-rcfilters-damaging-verylikelybad-label', + 'description' => 'ores-rcfilters-damaging-verylikelybad-desc', + 'cssClassSuffix' => 'damaging-verylikelybad', + 'isRowApplicableCallable' => self::makeApplicableCallback( + 'damaging', + $damagingLevels['verylikelybad'] + ), + ]; + } - $newDamagingGroup->conflictsWith( - $logFilter, - 'ores-rcfilters-ores-conflicts-logactions-global', - 'ores-rcfilters-damaging-conflicts-logactions', - 'ores-rcfilters-logactions-conflicts-ores' - ); + if ( $filters ) { + $newDamagingGroup = new ChangesListStringOptionsFilterGroup( [ + 'name' => 'damaging', + 'title' => 'ores-rcfilters-damaging-title', + 'whatsThisHeader' => 'ores-rcfilters-damaging-whats-this-header', + 'whatsThisBody' => 'ores-rcfilters-damaging-whats-this-body', + 'whatsThisUrl' => 'https://www.mediawiki.org/wiki/' . + 'Special:MyLanguage/Help:New_filters_for_edit_review/Quality_and_Intent_Filters', + 'whatsThisLinkText' => 'ores-rcfilters-whats-this-link-text', + 'priority' => 2, + 'filters' => $filters, + 'default' => ChangesListStringOptionsFilterGroup::NONE, + 'isFullCoverage' => false, + 'queryCallable' => function ( $specialClassName, $ctx, $dbr, &$tables, &$fields, + &$conds, &$query_options, &$join_conds, $selectedValues ) { + $condition = self::buildRangeFilter( 'damaging', $selectedValues ); + if ( $condition ) { + $conds[] = $condition; + $join_conds['ores_damaging_mdl'][0] = 'INNER JOIN'; + $join_conds['ores_damaging_cls'][0] = 'INNER JOIN'; + // Performance hack: add STRAIGHT_JOIN (146111) + $query_options[] = 'STRAIGHT_JOIN'; + } + }, + ] ); - $newDamagingGroup->getFilter( 'maybebad' )->setAsSupersetOf( - $newDamagingGroup->getFilter( 'likelybad' ) - ); - $newDamagingGroup->getFilter( 'likelybad' )->setAsSupersetOf( - $newDamagingGroup->getFilter( 'verylikelybad' ) - ); - // Transitive closure - $newDamagingGroup->getFilter( 'maybebad' )->setAsSupersetOf( - $newDamagingGroup->getFilter( 'verylikelybad' ) - ); - $clsp->registerFilterGroup( $newDamagingGroup ); + $newDamagingGroup->conflictsWith( + $logFilter, + 'ores-rcfilters-ores-conflicts-logactions-global', + 'ores-rcfilters-damaging-conflicts-logactions', + 'ores-rcfilters-logactions-conflicts-ores' + ); + + if ( isset( $filters[ 'maybebad' ] ) && isset( $filters[ 'likelybad' ] ) ) { + $newDamagingGroup->getFilter( 'maybebad' )->setAsSupersetOf( + $newDamagingGroup->getFilter( 'likelybad' ) + ); + } + + if ( isset( $filters[ 'likelybad' ] ) && isset( $filters[ 'verylikelybad' ] ) ) { + $newDamagingGroup->getFilter( 'likelybad' )->setAsSupersetOf( + $newDamagingGroup->getFilter( 'verylikelybad' ) + ); + } + + // Transitive closure + if ( isset( $filters[ 'maybebad' ] ) && isset( $filters[ 'verylikelybad' ] ) ) { + $newDamagingGroup->getFilter( 'maybebad' )->setAsSupersetOf( + $newDamagingGroup->getFilter( 'verylikelybad' ) + ); + } + + $clsp->registerFilterGroup( $newDamagingGroup ); + } if ( $clsp instanceof SpecialRecentChanges ) { $damagingDefault = $clsp->getUser()->getOption( 'oresRCHideNonDamaging' ); @@ -220,73 +240,109 @@ } if ( self::isModelEnabled( 'goodfaith' ) ) { $goodfaithLevels = $stats->getThresholds( 'goodfaith' ); - $goodfaithGroup = new ChangesListStringOptionsFilterGroup( [ - 'name' => 'goodfaith', - 'title' => 'ores-rcfilters-goodfaith-title', - 'whatsThisHeader' => 'ores-rcfilters-goodfaith-whats-this-header', - 'whatsThisBody' => 'ores-rcfilters-goodfaith-whats-this-body', - 'whatsThisUrl' => 'https://www.mediawiki.org/wiki/' . - 'Special:MyLanguage/Help:New_filters_for_edit_review/Quality_and_Intent_Filters', - 'whatsThisLinkText' => 'ores-rcfilters-whats-this-link-text', - 'priority' => 1, - 'filters' => [ - [ - 'name' => 'good', - 'label' => 'ores-rcfilters-goodfaith-good-label', - 'description' => 'ores-rcfilters-goodfaith-good-desc', - 'cssClassSuffix' => 'goodfaith-good', - 'isRowApplicableCallable' => self::makeApplicableCallback( - 'goodfaith', - $goodfaithLevels['good'] - ), - ], - [ - 'name' => 'maybebad', - 'label' => 'ores-rcfilters-goodfaith-maybebad-label', - 'description' => 'ores-rcfilters-goodfaith-maybebad-desc', - 'cssClassSuffix' => 'goodfaith-maybebad', - 'isRowApplicableCallable' => self::makeApplicableCallback( - 'goodfaith', - $goodfaithLevels['maybebad'] - ), - ], - [ - 'name' => 'bad', - 'label' => 'ores-rcfilters-goodfaith-bad-label', - 'description' => 'ores-rcfilters-goodfaith-bad-desc', - 'cssClassSuffix' => 'goodfaith-bad', - 'isRowApplicableCallable' => self::makeApplicableCallback( - 'goodfaith', - $goodfaithLevels['bad'] - ), - ], - ], - 'default' => ChangesListStringOptionsFilterGroup::NONE, - 'isFullCoverage' => false, - 'queryCallable' => function ( $specialClassName, $ctx, $dbr, &$tables, &$fields, + $filters = []; + if ( isset( $goodfaithLevels['likelygood'] ) ) { + $filters[] = [ + 'name' => 'likelygood', + 'label' => 'ores-rcfilters-goodfaith-good-label', + 'description' => 'ores-rcfilters-goodfaith-good-desc', + 'cssClassSuffix' => 'goodfaith-good', + 'isRowApplicableCallable' => self::makeApplicableCallback( + 'goodfaith', + $goodfaithLevels['likelygood'] + ), + ]; + } + if ( isset( $goodfaithLevels['maybebad'] ) ) { + $filters[] = [ + 'name' => 'maybebad', + 'label' => 'ores-rcfilters-goodfaith-maybebad-label', + 'description' => 'ores-rcfilters-goodfaith-maybebad-desc', + 'cssClassSuffix' => 'goodfaith-maybebad', + 'isRowApplicableCallable' => self::makeApplicableCallback( + 'goodfaith', + $goodfaithLevels['maybebad'] + ), + ]; + } + if ( isset( $goodfaithLevels['likelybad'] ) ) { + $filters[] = [ + 'name' => 'likelybad', + 'label' => 'ores-rcfilters-goodfaith-bad-label', + 'description' => 'ores-rcfilters-goodfaith-bad-desc', + 'cssClassSuffix' => 'goodfaith-bad', + 'isRowApplicableCallable' => self::makeApplicableCallback( + 'goodfaith', + $goodfaithLevels['likelybad'] + ), + ]; + } + if ( isset( $goodfaithLevels['verylikelybad'] ) ) { + $filters[] = [ + 'name' => 'verylikelybad', + 'label' => 'ores-rcfilters-goodfaith-verylikelybad-label', + 'description' => 'ores-rcfilters-goodfaith-verylikelybad-desc', + 'cssClassSuffix' => 'goodfaith-verylikelybad', + 'isRowApplicableCallable' => self::makeApplicableCallback( + 'goodfaith', + $goodfaithLevels['verylikelybad'] + ), + ]; + } + + if ( $filters ) { + $goodfaithGroup = new ChangesListStringOptionsFilterGroup( [ + 'name' => 'goodfaith', + 'title' => 'ores-rcfilters-goodfaith-title', + 'whatsThisHeader' => 'ores-rcfilters-goodfaith-whats-this-header', + 'whatsThisBody' => 'ores-rcfilters-goodfaith-whats-this-body', + 'whatsThisUrl' => 'https://www.mediawiki.org/wiki/' . + 'Special:MyLanguage/Help:New_filters_for_edit_review/Quality_and_Intent_Filters', + 'whatsThisLinkText' => 'ores-rcfilters-whats-this-link-text', + 'priority' => 1, + 'filters' => $filters, + 'default' => ChangesListStringOptionsFilterGroup::NONE, + 'isFullCoverage' => false, + 'queryCallable' => function ( $specialClassName, $ctx, $dbr, &$tables, &$fields, &$conds, &$query_options, &$join_conds, $selectedValues ) { - $condition = self::buildRangeFilter( 'goodfaith', $selectedValues ); - if ( $condition ) { - $conds[] = $condition; - $join_conds['ores_goodfaith_mdl'][0] = 'INNER JOIN'; - $join_conds['ores_goodfaith_cls'][0] = 'INNER JOIN'; - // Performance hack: add STRAIGHT_JOIN (146111) - $query_options[] = 'STRAIGHT_JOIN'; - } - }, - ] ); - $goodfaithGroup->getFilter( 'maybebad' )->setAsSupersetOf( - $goodfaithGroup->getFilter( 'bad' ) - ); + $condition = self::buildRangeFilter( 'goodfaith', $selectedValues ); + if ( $condition ) { + $conds[] = $condition; + $join_conds['ores_goodfaith_mdl'][0] = 'INNER JOIN'; + $join_conds['ores_goodfaith_cls'][0] = 'INNER JOIN'; + // Performance hack: add STRAIGHT_JOIN (146111) + $query_options[] = 'STRAIGHT_JOIN'; + } + }, + ] ); - $goodfaithGroup->conflictsWith( - $logFilter, - 'ores-rcfilters-ores-conflicts-logactions-global', - 'ores-rcfilters-goodfaith-conflicts-logactions', - 'ores-rcfilters-logactions-conflicts-ores' - ); + if ( isset( $filters['maybebad'] ) && isset( $filters['likelybad'] ) ) { + $goodfaithGroup->getFilter( 'maybebad' )->setAsSupersetOf( + $goodfaithGroup->getFilter( 'likelybad' ) + ); + } - $clsp->registerFilterGroup( $goodfaithGroup ); + if ( isset( $filters['likelybad'] ) && isset( $filters['verylikelybad'] ) ) { + $goodfaithGroup->getFilter( 'likelybad' )->setAsSupersetOf( + $goodfaithGroup->getFilter( 'verylikelybad' ) + ); + } + + if ( isset( $filters['maybebad'] ) && isset( $filters['verylikelybad'] ) ) { + $goodfaithGroup->getFilter( 'maybebad' )->setAsSupersetOf( + $goodfaithGroup->getFilter( 'verylikelybad' ) + ); + } + + $goodfaithGroup->conflictsWith( + $logFilter, + 'ores-rcfilters-ores-conflicts-logactions-global', + 'ores-rcfilters-goodfaith-conflicts-logactions', + 'ores-rcfilters-logactions-conflicts-ores' + ); + + $clsp->registerFilterGroup( $goodfaithGroup ); + } } } diff --git a/includes/Stats.php b/includes/Stats.php index 5192e07..8ce5442 100644 --- a/includes/Stats.php +++ b/includes/Stats.php @@ -23,69 +23,6 @@ */ private $logger; - private $thresholdsConfig = [ - 'damaging' => [ - 'likelygood' => [ - 'min' => 0, - 'max' => [ - 'stat' => 'recall_at_precision(min_precision=0.98)', - 'outcome' => 'false', - 'default' => 0.55, - ] - ], - 'maybebad' => [ - 'min' => [ - 'stat' => 'recall_at_precision(min_precision=0.15)', - 'outcome' => 'true', - 'default' => 0.16, - ], - 'max' => 1, - ], - 'likelybad' => [ - 'min' => [ - 'stat' => 'recall_at_precision(min_precision=0.45)', - 'outcome' => 'true', - 'default' => 0.75, - ], - 'max' => 1, - ], - 'verylikelybad' => [ - 'min' => [ - 'stat' => 'recall_at_precision(min_precision=0.9)', - 'outcome' => 'true', - 'default' => 0.92, - ], - 'max' => 1, - ], - ], - 'goodfaith' => [ - 'good' => [ - 'min' => [ - 'stat' => 'recall_at_precision(min_precision=0.98)', - 'outcome' => 'true', - 'default' => 0.35, - ], - 'max' => 1, - ], - 'maybebad' => [ - 'min' => 0, - 'max' => [ - 'stat' => 'recall_at_precision(min_precision=0.15)', - 'outcome' => 'false', - 'default' => 0.65, - ], - ], - 'bad' => [ - 'min' => 0, - 'max' => [ - 'stat' => 'recall_at_precision(min_precision=0.45)', - 'outcome' => 'false', - 'default' => 0.15, - ], - ], - ], - ]; - /** * @param Api $api * @param \WANObjectCache $cache @@ -98,17 +35,16 @@ } public function getThresholds( $model, $fromCache = true ) { - global $wgOresFiltersThresholds; - - if ( isset( $wgOresFiltersThresholds[ $model ] ) ) { - return $wgOresFiltersThresholds[ $model ]; - } - - if ( isset( $this->thresholdsConfig[ $model ] ) ) { + if ( $this->getFiltersConfig( $model ) ) { return $this->parseThresholds( $this->fetchStats( $model, $fromCache ), $model ); } else { return []; } + } + + private function getFiltersConfig( $model ) { + global $wgOresFiltersThresholds; + return isset( $wgOresFiltersThresholds[ $model ] ) ? $wgOresFiltersThresholds[ $model ] : false; } private function fetchStats( $model, $fromCache ) { @@ -135,7 +71,12 @@ private function parseThresholds( $statsData, $model ) { $thresholds = []; - foreach ( $this->thresholdsConfig[ $model ] as $levelName => $config ) { + foreach ( $this->getFiltersConfig( $model ) as $levelName => $config ) { + if ( $config === false ) { + // level is disabled + continue; + } + $min = $this->extractBoundValue( $model, $levelName, @@ -152,10 +93,12 @@ $statsData ); - $thresholds[ $levelName ] = [ - 'min' => $min, - 'max' => $max, - ]; + if ( is_numeric( $min ) && is_numeric( $max ) ) { + $thresholds[$levelName] = [ + 'min' => $min, + 'max' => $max, + ]; + } } return $thresholds; } @@ -165,10 +108,10 @@ return $config; } - $stat = $config[ 'stat' ]; - $outcome = $config[ 'outcome' ]; - if ( isset( $statsData[ $stat ][ $outcome ][ 'threshold' ] ) ) { - $threshold = $statsData[ $stat ][ $outcome ][ 'threshold' ]; + $stat = $config; + $outcome = $bound === 'min' ? 'true' : 'false'; + if ( isset( $statsData[$stat][$outcome]['threshold'] ) ) { + $threshold = $statsData[$stat][$outcome]['threshold']; // Thresholds reported for "false" outcomes apply to "false" scores, but we always // apply thresholds against "true" scores, so we need to invert "false" thresholds here. return $outcome === 'false' ? 1 - $threshold : $threshold; @@ -184,8 +127,6 @@ 'statsData' => print_r( $statsData, true ), ] ); - - return $config[ 'default' ]; } public static function newFromGlobalState() { diff --git a/tests/phpunit/includes/StatsTest.php b/tests/phpunit/includes/StatsTest.php index c43bd57..e5ca2a1 100644 --- a/tests/phpunit/includes/StatsTest.php +++ b/tests/phpunit/includes/StatsTest.php @@ -36,141 +36,6 @@ ->getMock(); } - public function testGetThresholds_damaging() { - $api = $this->getMockBuilder( 'ORES\Api' )->getMock(); - $api->method( 'request' ) - ->with( [ 'model_info' => 'test_stats' ], 'damaging' ) - ->willReturn( [ - 'test_stats' => [ - 'recall_at_precision(min_precision=0.15)' => [ - 'true' => [ 'threshold' => 0.281 ], // maybebad min - ], - 'recall_at_precision(min_precision=0.45)' => [ - 'true' => [ 'threshold' => 0.831 ], // likelybad min - ], - 'recall_at_precision(min_precision=0.9)' => [ - 'true' => [ 'threshold' => 0.945 ], // verylikelybad min - ], - 'recall_at_precision(min_precision=0.98)' => [ - 'false' => [ 'threshold' => 0.259 ], // likelygood max - ], - ], - ] ); - - $stats = new ORES\Stats( - $api, - WANObjectCache::newEmpty(), - LoggerFactory::getInstance( 'test' ) - ); - - $thresholds = $stats->getThresholds( 'damaging' ); - - $this->assertEquals( - $thresholds, - [ - 'likelygood' => [ - 'min' => 0, - 'max' => 0.741, // 1-0.259 - ], - 'maybebad' => [ - 'min' => 0.281, - 'max' => 1, - ], - 'likelybad' => [ - 'min' => 0.831, - 'max' => 1, - ], - 'verylikelybad' => [ - 'min' => 0.945, - 'max' => 1, - ], - ] - ); - } - - public function testGetThresholds_goodfaith() { - $api = $this->getMockBuilder( 'ORES\Api' )->getMock(); - $api->method( 'request' ) - ->with( [ 'model_info' => 'test_stats' ], 'goodfaith' ) - ->willReturn( [ - 'test_stats' => [ - 'recall_at_precision(min_precision=0.15)' => [ - 'false' => [ 'threshold' => 0.322 ], // maybebad max - ], - 'recall_at_precision(min_precision=0.45)' => [ - 'false' => [ 'threshold' => 0.808 ], // bad max - ], - 'recall_at_precision(min_precision=0.98)' => [ - 'true' => [ 'threshold' => 0.24 ], // good min - ], - ], - ] ); - - $stats = new ORES\Stats( - $api, - WANObjectCache::newEmpty(), - LoggerFactory::getInstance( 'test' ) - ); - - $thresholds = $stats->getThresholds( 'goodfaith' ); - - $this->assertEquals( - $thresholds, - [ - 'good' => [ - 'min' => 0.24, - 'max' => 1, - ], - 'maybebad' => [ - 'min' => 0, - 'max' => 0.678, // 1-0.322 - ], - 'bad' => [ - 'min' => 0, - 'max' => 0.192, // 1-0.808 - ] - ] - ); - } - - public function testGetThresholds_statNotFound() { - $api = $this->getMockBuilder( 'ORES\Api' )->getMock(); - $api->method( 'request' ) - ->with( [ 'model_info' => 'test_stats' ], 'goodfaith' ) - ->willReturn( [ - 'test_stats' => [ - 'some_other_stats' => [ - 'false' => [ 'recall' => 0.1234 ], - ], - ], - ] ); - - $logger = $this->getLoggerMock(); - $logger->expects( $this->exactly( 3 ) )->method( 'warning' ); - - $stats = new ORES\Stats( $api, WANObjectCache::newEmpty(), $logger ); - - $thresholds = $stats->getThresholds( 'goodfaith' ); - - $this->assertEquals( - $thresholds, - [ - 'good' => [ - 'min' => 0.35, - 'max' => 1, - ], - 'maybebad' => [ - 'min' => 0, - 'max' => 0.65, - ], - 'bad' => [ - 'min' => 0, - 'max' => 0.15, - ] - ] - ); - } - public function testGetThresholds_modelConfigNotFound() { $api = $this->getMockBuilder( 'ORES\Api' )->getMock(); $logger = $this->getLoggerMock(); @@ -190,8 +55,16 @@ ->with( [ 'model_info' => 'test_stats' ], 'goodfaith' ) ->willReturn( 'this is not the stat object you were expecting...' ); + $this->setMwGlobals( [ + 'wgOresFiltersThresholds' => [ + 'goodfaith' => [ + 'level1' => [ 'min' => 'some_stat', 'max' => 'some_other_stat' ], + ] + ], + ] ); + $logger = $this->getLoggerMock(); - $logger->expects( $this->exactly( 3 ) )->method( 'warning' ); + $logger->expects( $this->exactly( 2 ) )->method( 'warning' ); $stats = new ORES\Stats( $api, WANObjectCache::newEmpty(), $logger ); @@ -199,60 +72,59 @@ $this->assertEquals( $thresholds, - [ - 'good' => [ - 'min' => 0.35, - 'max' => 1, - ], - 'maybebad' => [ - 'min' => 0, - 'max' => 0.65, - ], - 'bad' => [ - 'min' => 0, - 'max' => 0.15, - ] - ] + [] ); } - public function testGetThresholds_everythingWouldHaveGoneWrong() { + public function testGetThresholds_filtersConfig() { $api = $this->getMockBuilder( 'ORES\Api' )->getMock(); $api->method( 'request' ) - ->with( [ 'model_info' => 'test_stats' ], 'goodfaith' ) - ->willReturn( 'this is not the stat object you were expecting...' ); - - $logger = $this->getLoggerMock(); + ->with( [ 'model_info' => 'test_stats' ], 'damaging' ) + ->willReturn( [ + 'test_stats' => [ + 'recall_at_precision(min_precision=0.9)' => [ + 'true' => [ 'threshold' => 0.945 ], // verylikelybad min + ], + 'recall_at_precision(min_precision=0.98)' => [ + 'false' => [ 'threshold' => 0.259 ], // verylikelygood max + ], + ], + ] ); $this->setMwGlobals( [ 'wgOresFiltersThresholds' => [ - "goodfaith" => [ - "good" => [ "min" => 0.7, "max" => 1 ], - "maybebad" => [ "min" => 0, "max" => 0.69 ], - "bad" => [ "min" => 0, "max" => 0.25 ], + 'damaging' => [ + 'verylikelygood' => [ 'min' => 0, 'max' => 'recall_at_precision(min_precision=0.98)' ], + 'maybebad' => false, + 'likelybad' => [ 'min' => 0.831, 'max' => 1 ], + 'verylikelybad' => [ 'min' => 'recall_at_precision(min_precision=0.9)', 'max' => 1 ], ], ], ] ); - $stats = new ORES\Stats( $api, WANObjectCache::newEmpty(), $logger ); + $stats = new ORES\Stats( + $api, + WANObjectCache::newEmpty(), + LoggerFactory::getInstance( 'test' ) + ); - $thresholds = $stats->getThresholds( 'goodfaith', false ); + $thresholds = $stats->getThresholds( 'damaging' ); $this->assertEquals( $thresholds, [ - 'good' => [ - 'min' => 0.7, + 'verylikelygood' => [ + 'min' => 0, + 'max' => 0.741, // 1-0.259 + ], + 'likelybad' => [ + 'min' => 0.831, 'max' => 1, ], - 'maybebad' => [ - 'min' => 0, - 'max' => 0.69, + 'verylikelybad' => [ + 'min' => 0.945, + 'max' => 1, ], - 'bad' => [ - 'min' => 0, - 'max' => 0.25, - ] ] ); } -- To view, visit https://gerrit.wikimedia.org/r/348496 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I6b839a6a8644637cfff6fbc83bb18952d97ad9f2 Gerrit-PatchSet: 3 Gerrit-Project: mediawiki/extensions/ORES Gerrit-Branch: master Gerrit-Owner: Sbisson <sbis...@wikimedia.org> Gerrit-Reviewer: Catrope <r...@wikimedia.org> Gerrit-Reviewer: Sbisson <sbis...@wikimedia.org> Gerrit-Reviewer: Siebrand <siebr...@kitano.nl> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits