jenkins-bot has submitted this change and it was merged. Change subject: Introducing pp_sortkey. ......................................................................
Introducing pp_sortkey. This adds the pp_sortkey column to the page_props table. pp_sortkeys allows for top-k queries for pages, e.g. the 100 pages with the most language links, etc. It is also possible to query for exact values. For now, pp_sortkey will contain pp_value's numeric value if the value was set to a float, int or boolean. Associated tasks: * create a maintenance script for populating pp_sortkey. Tricky, because when reading from the database, all values are strings. * create an API module for querying pages by property value. bug: 58032 Change-Id: I217c42656fb877ff35a36eb446a22bdaf119faac --- M RELEASE-NOTES-1.23 M includes/DefaultSettings.php M includes/deferred/LinksUpdate.php M includes/installer/MysqlUpdater.php M includes/installer/PostgresUpdater.php M includes/installer/SqliteUpdater.php M includes/parser/ParserOutput.php A maintenance/archives/patch-pp_sortkey.sql M maintenance/tables.sql M tests/phpunit/includes/LinksUpdateTest.php 10 files changed, 127 insertions(+), 12 deletions(-) Approvals: Chad: Looks good to me, approved Manybubbles: Looks good to me, but someone else must approve jenkins-bot: Verified diff --git a/RELEASE-NOTES-1.23 b/RELEASE-NOTES-1.23 index ff7b5f9..58e4963 100644 --- a/RELEASE-NOTES-1.23 +++ b/RELEASE-NOTES-1.23 @@ -9,6 +9,9 @@ production. === Configuration changes in 1.23 === +* Introduced $wgPagePropsHaveSortkey as a backwards-compatibility switch, + for using the old schema of the page_props table, in case the respective + schema update was not applied. * When $wgJobRunRate is higher that zero, jobs are now executed via an asynchronous HTTP request to a MediaWiki entry point. This may require increasing the number of server worker threads. @@ -245,6 +248,9 @@ * Support was added for Northern Luri (lrc). === Other changes in 1.23 === +* Added pp_sortkey column to page_props table, so pages can be efficiently + queried and sorted by property value (bug 58032). + See $wgPagePropsHaveSortkey if you want to postpone the schema change. * The rc_type field in the recentchanges table has been superseded by a new rc_source field. The rc_source field is a string representation of the change type where rc_type was a numeric constant. This field is not yet diff --git a/includes/DefaultSettings.php b/includes/DefaultSettings.php index 4326eca..f0917f9 100644 --- a/includes/DefaultSettings.php +++ b/includes/DefaultSettings.php @@ -7086,6 +7086,13 @@ $wgCompiledFiles = array(); /** + * Whether the page_props table has a pp_sortkey column. Set to false in case + * the respective database schema change was not applied. + * @since 1.23 + */ +$wgPagePropsHaveSortkey = true; + +/** * For really cool vim folding this needs to be at the end: * vim: foldmarker=@{,@} foldmethod=marker * @} diff --git a/includes/deferred/LinksUpdate.php b/includes/deferred/LinksUpdate.php index 79232e5..65c04d6 100644 --- a/includes/deferred/LinksUpdate.php +++ b/includes/deferred/LinksUpdate.php @@ -502,19 +502,70 @@ */ function getPropertyInsertions( $existing = array() ) { $diffs = array_diff_assoc( $this->mProperties, $existing ); + $arr = array(); - foreach ( $diffs as $name => $value ) { - $arr[] = array( - 'pp_page' => $this->mId, - 'pp_propname' => $name, - 'pp_value' => $value, - ); + foreach ( array_keys( $diffs ) as $name ) { + $arr[] = $this->getPagePropRowData( $name ); } return $arr; } /** + * Returns an associative array to be used for inserting a row into + * the page_props table. Besides the given property name, this will + * include the page id from $this->mId and any property value from + * $this->mProperties. + * + * The array returned will include the pp_sortkey field if this + * is present in the database (as indicated by $wgPagePropsHaveSortkey). + * The sortkey value is currently determined by getPropertySortKeyValue(). + * + * @note: this assumes that $this->mProperties[$prop] is defined. + * + * @param string $prop The name of the property. + * + * @return array + */ + private function getPagePropRowData( $prop ) { + global $wgPagePropsHaveSortkey; + + $value = $this->mProperties[$prop]; + + $row = array( + 'pp_page' => $this->mId, + 'pp_propname' => $prop, + 'pp_value' => $value, + ); + + if ( $wgPagePropsHaveSortkey ) { + $row['pp_sortkey'] = $this->getPropertySortKeyValue( $value ); + } + + return $row; + } + + /** + * Determines the sort key for the given property value. + * This will return $value if it is a float or int, + * 1 or resp. 0 if it is a bool, and null otherwise. + * + * @note: In the future, we may allow the sortkey to be specified explicitly + * in ParserOutput::setProperty. + * + * @param mixed $value + * + * @return float|null + */ + private function getPropertySortKeyValue( $value ) { + if ( is_int( $value ) || is_float( $value ) || is_bool( $value ) ) { + return floatval( $value ); + } + + return null; + } + + /** * Get an array of interwiki insertions for passing to the DB * Skips the titles specified by the 2-D array $existing * @param array $existing diff --git a/includes/installer/MysqlUpdater.php b/includes/installer/MysqlUpdater.php index bcd50b3..be1c8a7 100644 --- a/includes/installer/MysqlUpdater.php +++ b/includes/installer/MysqlUpdater.php @@ -250,6 +250,7 @@ array( 'addIndex', 'logging', 'log_user_text_time', 'patch-logging_user_text_time_index.sql' ), array( 'addField', 'page', 'page_links_updated', 'patch-page_links_updated.sql' ), array( 'addField', 'user', 'user_password_expires', 'patch-user_password_expire.sql' ), + array( 'addField', 'page_props', 'pp_sortkey', 'patch-pp_sortkey.sql' ), ); } diff --git a/includes/installer/PostgresUpdater.php b/includes/installer/PostgresUpdater.php index c5c10b3..4f74bfa 100644 --- a/includes/installer/PostgresUpdater.php +++ b/includes/installer/PostgresUpdater.php @@ -405,6 +405,9 @@ array( 'addPgField', 'recentchanges', 'rc_source', "TEXT NOT NULL DEFAULT ''" ), array( 'addPgField', 'page', 'page_links_updated', "TIMESTAMPTZ NULL" ), array( 'addPgField', 'mwuser', 'user_password_expires', 'TIMESTAMPTZ NULL' ), + array( 'addPgField', 'page_props', 'pp_sortkey', 'float NULL' ), + array( 'addPgIndex', 'page_props', 'pp_propname_sortkey_page', + '( pp_propname, pp_sortkey, pp_page ) WHERE ( pp_sortkey NOT NULL )' ), ); } diff --git a/includes/installer/SqliteUpdater.php b/includes/installer/SqliteUpdater.php index ea19efa..15f8160 100644 --- a/includes/installer/SqliteUpdater.php +++ b/includes/installer/SqliteUpdater.php @@ -128,6 +128,7 @@ array( 'addIndex', 'logging', 'log_user_text_time', 'patch-logging_user_text_time_index.sql' ), array( 'addField', 'page', 'page_links_updated', 'patch-page_links_updated.sql' ), array( 'addField', 'user', 'user_password_expires', 'patch-user_password_expire.sql' ), + array( 'addField', 'page_props', 'pp_sortkey', 'patch-pp_sortkey.sql' ), ); } diff --git a/includes/parser/ParserOutput.php b/includes/parser/ParserOutput.php index d1e3e58..cfee3cf 100644 --- a/includes/parser/ParserOutput.php +++ b/includes/parser/ParserOutput.php @@ -398,6 +398,9 @@ * retrieved given the page ID or via a DB join when given the page * title. * + * Since 1.23, page_props are also indexed by numeric value, to allow + * for efficient "top k" queries of pages wrt a given property. + * * setProperty() is thus used to propagate properties from the parsed * page to request contexts other than a page view of the currently parsed * article. diff --git a/maintenance/archives/patch-pp_sortkey.sql b/maintenance/archives/patch-pp_sortkey.sql new file mode 100644 index 0000000..b13b605 --- /dev/null +++ b/maintenance/archives/patch-pp_sortkey.sql @@ -0,0 +1,8 @@ +-- Add a 'sortkey' field to page_props so pages can be efficiently +-- queried by the numeric value of a property. + +ALTER TABLE /*_*/page_props + ADD pp_sortkey float DEFAULT NULL; + +CREATE UNIQUE INDEX /*i*/pp_propname_sortkey_page + ON /*_*/page_props ( pp_propname, pp_sortkey, pp_page ); diff --git a/maintenance/tables.sql b/maintenance/tables.sql index ca610fc..d39ee87 100644 --- a/maintenance/tables.sql +++ b/maintenance/tables.sql @@ -1429,12 +1429,13 @@ CREATE TABLE /*_*/page_props ( pp_page int NOT NULL, pp_propname varbinary(60) NOT NULL, - pp_value blob NOT NULL + pp_value blob NOT NULL, + pp_sortkey float DEFAULT NULL ) /*$wgDBTableOptions*/; CREATE UNIQUE INDEX /*i*/pp_page_propname ON /*_*/page_props (pp_page,pp_propname); CREATE UNIQUE INDEX /*i*/pp_propname_page ON /*_*/page_props (pp_propname,pp_page); - +CREATE UNIQUE INDEX /*i*/pp_propname_sortkey_page ON /*_*/page_props (pp_propname,pp_sortkey,pp_page); -- A table to log updates, one text key row per update. CREATE TABLE /*_*/updatelog ( diff --git a/tests/phpunit/includes/LinksUpdateTest.php b/tests/phpunit/includes/LinksUpdateTest.php index 33643ac..ddb521b 100644 --- a/tests/phpunit/includes/LinksUpdateTest.php +++ b/tests/phpunit/includes/LinksUpdateTest.php @@ -180,14 +180,48 @@ * @covers ParserOutput::setProperty */ public function testUpdate_page_props() { + global $wgPagePropsHaveSortkey; + /** @var ParserOutput $po */ list( $t, $po ) = $this->makeTitleAndParserOutput( "Testing", 111 ); - $po->setProperty( "foo", "bar" ); + $fields = array( 'pp_propname', 'pp_value' ); + $expected = array(); - $this->assertLinksUpdate( $t, $po, 'page_props', 'pp_propname, pp_value', 'pp_page = 111', array( - array( 'foo', 'bar' ), - ) ); + $po->setProperty( "bool", true ); + $expected[] = array( "bool", true ); + + $po->setProperty( "float", 4.0 + 1.0/4.0 ); + $expected[] = array( "float", 4.0 + 1.0/4.0 ); + + $po->setProperty( "int", -7 ); + $expected[] = array( "int", -7 ); + + $po->setProperty( "string", "33 bar" ); + $expected[] = array( "string", "33 bar" ); + + // compute expected sortkey values + if ( $wgPagePropsHaveSortkey ) { + $fields[] = 'pp_sortkey'; + + foreach ( $expected as &$row ) { + $value = $row[1]; + + if ( is_int( $value ) || is_float( $value ) || is_bool( $value ) ) { + $row[] = floatval( $value ); + } else { + $row[] = null; + } + } + } + + $this->assertLinksUpdate( $t, $po, 'page_props', $fields, 'pp_page = 111', $expected ); + } + + public function testUpdate_page_props_without_sortkey() { + $this->setMwGlobals( 'wgPagePropsHaveSortkey', false ); + + $this->testUpdate_page_props(); } // @todo test recursive, too! -- To view, visit https://gerrit.wikimedia.org/r/122349 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I217c42656fb877ff35a36eb446a22bdaf119faac Gerrit-PatchSet: 5 Gerrit-Project: mediawiki/core Gerrit-Branch: master Gerrit-Owner: Daniel Kinzler <daniel.kinz...@wikimedia.de> Gerrit-Reviewer: Aaron Schulz <asch...@wikimedia.org> Gerrit-Reviewer: Aude <aude.w...@gmail.com> Gerrit-Reviewer: Catrope <roan.katt...@gmail.com> Gerrit-Reviewer: Chad <ch...@wikimedia.org> Gerrit-Reviewer: Cscott <canan...@wikimedia.org> Gerrit-Reviewer: Daniel Friesen <dan...@nadir-seen-fire.com> Gerrit-Reviewer: Hoo man <h...@online.de> Gerrit-Reviewer: Manybubbles <never...@wikimedia.org> Gerrit-Reviewer: Parent5446 <tylerro...@gmail.com> Gerrit-Reviewer: Reedy <re...@wikimedia.org> Gerrit-Reviewer: Springle <sprin...@wikimedia.org> Gerrit-Reviewer: Tim Starling <tstarl...@wikimedia.org> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits