Yaron Koren has uploaded a new change for review. https://gerrit.wikimedia.org/r/278046
Change subject: Added handling for Cargo page data DB table(s) ...................................................................... Added handling for Cargo page data DB table(s) Change-Id: Ib7d76a38e1e8bbf589e0f784f2f7f1275e885bf5 --- M Cargo.hooks.php M Cargo.php A CargoPageData.php M extension.json A maintenance/setCargoPageData.php M specials/CargoPageValues.php M specials/CargoTables.php 7 files changed, 235 insertions(+), 5 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Cargo refs/changes/46/278046/1 diff --git a/Cargo.hooks.php b/Cargo.hooks.php index fb2625d..dd9a275 100644 --- a/Cargo.hooks.php +++ b/Cargo.hooks.php @@ -137,6 +137,7 @@ // Now, delete from the "main" table. $cdb->delete( $curMainTable, array( '_pageID' => $pageID ) ); } + $cdb->delete( '_pageData', array( '_pageID' => $pageID ) ); // Finally, delete from cargo_pages. $dbw->delete( 'cargo_pages', array( 'page_id' => $pageID ) ); @@ -177,6 +178,9 @@ CargoStore::$settings['origin'] = 'page save'; CargoUtils::parsePageForStorage( $article->getTitle(), $content->getNativeData() ); + // Also, save the "page data". + CargoPageData::storeValuesForPage( $article->getTitle() ); + return true; } diff --git a/Cargo.php b/Cargo.php index 6a32edd..f2d9e15 100644 --- a/Cargo.php +++ b/Cargo.php @@ -11,6 +11,13 @@ // extensions to determine whether Cargo is installed. define( 'CARGO_VERSION', '0.11' ); +define( 'CARGO_STORE_CREATION_DATE', 1 ); +define( 'CARGO_STORE_MODIFICATION_DATE', 2 ); +define( 'CARGO_STORE_CREATOR', 3 ); +define( 'CARGO_STORE_FULL_TEXT', 4 ); +define( 'CARGO_STORE_CATEGORIES', 5 ); +define( 'CARGO_STORE_NUM_REVISIONS', 6 ); + // There are bugs in MW 1.25 and 1.26 that make extension.json // unusable for Cargo - for simplicity's sake, don't load extensions // unless we're at version 1.27 or higher. @@ -89,6 +96,7 @@ $wgAutoloadClasses['CargoCompoundQuery'] = $dir . '/parserfunctions/CargoCompoundQuery.php'; $wgAutoloadClasses['CargoSQLQuery'] = $dir . '/CargoSQLQuery.php'; $wgAutoloadClasses['CargoQueryDisplayer'] = $dir . '/CargoQueryDisplayer.php'; +$wgAutoloadClasses['CargoPageData'] = $dir . '/CargoPageData.php'; $wgAutoloadClasses['CargoRecurringEvent'] = $dir . '/parserfunctions/CargoRecurringEvent.php'; $wgAutoloadClasses['CargoDisplayMap'] = $dir . '/parserfunctions/CargoDisplayMap.php'; $wgAutoloadClasses['CargoPopulateTableJob'] = $dir . '/CargoPopulateTableJob.php'; @@ -317,3 +325,5 @@ $wgCargoDrilldownLargestFontSize = -1; $wgCargoDrilldownMinValuesForComboBox = 40; $wgCargoDrilldownNumRangesForNumbers = 5; + +$wgCargoPageDataColumns = array(); diff --git a/CargoPageData.php b/CargoPageData.php new file mode 100644 index 0000000..04b4227 --- /dev/null +++ b/CargoPageData.php @@ -0,0 +1,114 @@ +<?php + +/** + * Static functions for dealing with the "_pageData" table. + * + * @author Yaron Koren + */ +class CargoPageData { + + static function getTableSchema() { + global $wgCargoPageDataColumns; + + $fieldTypes = array(); + + if ( in_array( CARGO_STORE_CREATION_DATE, $wgCargoPageDataColumns ) ) { + $fieldTypes['_creationDate'] = array( 'Date', false ); + } + if ( in_array( CARGO_STORE_MODIFICATION_DATE, $wgCargoPageDataColumns ) ) { + $fieldTypes['_modificationDate'] = array( 'Date', false ); + } + if ( in_array( CARGO_STORE_CREATOR, $wgCargoPageDataColumns ) ) { + $fieldTypes['_creator'] = array( 'String', false ); + } + if ( in_array( CARGO_STORE_FULL_TEXT, $wgCargoPageDataColumns ) ) { + $fieldTypes['_fullText'] = array( 'Searchtext', false ); + } + if ( in_array( CARGO_STORE_CATEGORIES, $wgCargoPageDataColumns ) ) { + $fieldTypes['_categories'] = array( 'String', true ); + } + if ( in_array( CARGO_STORE_NUM_REVISIONS, $wgCargoPageDataColumns ) ) { + $fieldTypes['_numRevisions'] = array( 'Integer', false ); + } + + $tableSchema = new CargoTableSchema(); + foreach ( $fieldTypes as $field => $fieldVals ) { + list ( $type, $isList ) = $fieldVals; + $fieldDesc = new CargoFieldDescription(); + $fieldDesc->mType = $type; + if ( $isList ) { + $fieldDesc->mIsList = true; + $fieldDesc->setDelimiter( '|' ); + } + $tableSchema->mFieldDescriptions[$field] = $fieldDesc; + } + + return $tableSchema; + } + + public static function storeValuesForPage( $title ) { + global $wgCargoPageDataColumns; + + if ( $title == null ) { + return; + } + + $wikiPage = WikiPage::factory( $title ); + $pageDataValues = array(); + + if ( in_array( CARGO_STORE_CREATION_DATE, $wgCargoPageDataColumns ) ) { + $firstRevision = $title->getFirstRevision(); + if ( $firstRevision == null ) { + // This can sometimes happen. + $pageDataValues['_creationDate'] = null; + } else { + $pageDataValues['_creationDate'] = $firstRevision->getTimestamp(); + } + } + if ( in_array( CARGO_STORE_MODIFICATION_DATE, $wgCargoPageDataColumns ) ) { + $pageDataValues['_modificationDate'] = $wikiPage->getTimestamp(); + } + if ( in_array( CARGO_STORE_CREATOR, $wgCargoPageDataColumns ) ) { + $pageDataValues['_creator'] = $wikiPage->getCreator(); + } + if ( in_array( CARGO_STORE_FULL_TEXT, $wgCargoPageDataColumns ) ) { + $article = new Article( $title ); + $pageDataValues['_fullText'] = $article->getContent(); + } + if ( in_array( CARGO_STORE_CATEGORIES, $wgCargoPageDataColumns ) ) { + $pageCategories = array(); + $dbr = wfGetDB( DB_SLAVE ); + $res = $dbr->select( + 'categorylinks', + 'cl_to', + array( 'cl_from' => $title->getArticleID() ), + __METHOD__ + ); + foreach ( $res as $row ) { + $pageCategories[] = $row->cl_to; + } + + $pageCategoriesString = implode( '|', $pageCategories ); + $pageDataValues['_categories'] = $pageCategoriesString; + } + if ( in_array( CARGO_STORE_NUM_REVISIONS, $wgCargoPageDataColumns ) ) { + $dbr = wfGetDB( DB_SLAVE ); + $res = $dbr->select( + 'revision', + 'COUNT(*)', + array( 'rev_page' => $title->getArticleID() ), + __METHOD__ + ); + $row = $dbr->fetchRow( $res ); + $pageDataValues['_numRevisions'] = $row[0]; + } + + $tableSchemas = CargoUtils::getTableSchemas( array( '_pageData' ) ); + if ( !array_key_exists( '_pageData', $tableSchemas ) ) { + return false; + } + + CargoStore::storeAllData( $title, '_pageData', $pageDataValues, $tableSchemas['_pageData'] ); + } + +} diff --git a/extension.json b/extension.json index cc95a96..906d12a 100644 --- a/extension.json +++ b/extension.json @@ -51,6 +51,7 @@ "CargoCompoundQuery": "parserfunctions/CargoCompoundQuery.php", "CargoSQLQuery": "CargoSQLQuery.php", "CargoQueryDisplayer": "CargoQueryDisplayer.php", + "CargoPageData": "CargoPageData.php", "CargoRecurringEvent": "parserfunctions/CargoRecurringEvent.php", "CargoDisplayMap": "parserfunctions/CargoDisplayMap.php", "CargoPopulateTableJob": "CargoPopulateTableJob.php", @@ -284,7 +285,8 @@ "CargoDrilldownSmallestFontSize": -1, "CargoDrilldownLargestFontSize": -1, "CargoDrilldownMinValuesForComboBox": 40, - "CargoDrilldownNumRangesForNumbers": 5 + "CargoDrilldownNumRangesForNumbers": 5, + "CargoPageDataColumns": [] }, "manifest_version": 1 } diff --git a/maintenance/setCargoPageData.php b/maintenance/setCargoPageData.php new file mode 100644 index 0000000..6fa4236 --- /dev/null +++ b/maintenance/setCargoPageData.php @@ -0,0 +1,73 @@ +<?php + +/** + * This script populates the Cargo _pageData DB table (and possibly other + * auxiliary tables) for all pages in the wiki. + * + * Usage: + * no parameters + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @author Yaron Koren + * @ingroup Maintenance + */ + +require_once( dirname( __FILE__ ) . '/../../../maintenance/Maintenance.php' ); + +$maintClass = "SetCargoPageData"; + +class SetCargoPageData extends Maintenance { + + public function __construct() { + parent::__construct(); + + $this->mDescription = "Stores a set of data each page in the wiki in one or more database tables, for use within Cargo queries."; + } + + public function execute() { + global $wgCargoPageDataColumns; + + $cdb = CargoUtils::getDB(); + $dbw = wfGetDB( DB_MASTER ); + + $cdb->dropTable( '_pageData' ); + $dbw->delete( 'cargo_tables', array( 'main_table' => '_pageData' ) ); + + $tableSchema = CargoPageData::getTableSchema(); + $tableSchemaString = $tableSchema->toDBString(); + + CargoUtils::createCargoTableOrTables( $cdb, $dbw, '_pageData', $tableSchema, $tableSchemaString, 0 ); + + $dbr = wfGetDB( DB_SLAVE ); + + $pages = $dbr->select( 'page', array( 'page_id' ) ); + + while ( $page = $pages->fetchObject() ) { + $title = Title::newFromID( $page->page_id ); + if ( $title == null ) { + continue; + } + CargoPageData::storeValuesForPage( $title ); + $this->output( wfTimestamp( TS_DB ) . ' Stored page data for page "' . $title->getFullText() . "\".\n" ); + } + + $this->output( "\n Finished populating page data table(s).\n" ); + } + +} + +require_once( DO_MAINTENANCE ); diff --git a/specials/CargoPageValues.php b/specials/CargoPageValues.php index 473b298..1211295 100644 --- a/specials/CargoPageValues.php +++ b/specials/CargoPageValues.php @@ -17,6 +17,8 @@ } function execute( $subpage = null ) { + global $wgCargoPageDataColumns; + if ( $subpage ) { // Allow inclusion with e.g. {{Special:PageValues/Book}} $this->mTitle = Title::newFromText( $subpage ); @@ -37,11 +39,28 @@ $text = ''; $dbw = wfGetDB( DB_MASTER ); + + $tableNames = array(); + + // Make _pageData the first table, if it holds any real data. + if ( count( $wgCargoPageDataColumns ) > 0 ) { + $tableNames[] = '_pageData'; + } + $res = $dbw->select( 'cargo_pages', 'table_name', array( 'page_id' => $this->mTitle->getArticleID() ) ); while ( $row = $dbw->fetchRow( $res ) ) { - $tableName = $row['table_name']; - $queryResults = $this->getRowsForPageInTable( $tableName ); + $tableNames[] = $row['table_name']; + } + + foreach ( $tableNames as $tableName ) { + try { + $queryResults = $this->getRowsForPageInTable( $tableName ); + } catch ( Exception $e ) { + // Most likely this is because the _pageData + // table doesn't exist. + continue; + } $text .= Html::element( 'h2', null, $this->msg( 'cargo-pagevalues-tablevalues', $tableName )->text() ) . "\n"; foreach ( $queryResults as $rowValues ) { @@ -75,7 +94,8 @@ // @TODO - do some custom formatting } - $fieldAlias = str_replace( '_', ' ', $fieldName ); + //$fieldAlias = str_replace( '_', ' ', $fieldName ); + $fieldAlias = $fieldName; if ( $fieldDescription->mIsList ) { $aliasedFieldNames[$fieldAlias] = $fieldName . '__full'; @@ -101,6 +121,9 @@ * Based on MediaWiki's InfoAction::addRow() */ function printRow( $name, $value ) { + if ( $name == '_fullText' && strlen( $value ) > 300 ) { + $value = substr( $value, 0, 300 ) . ' ...'; + } return Html::rawElement( 'tr', array(), Html::rawElement( 'td', array( 'style' => 'vertical-align: top;' ), $name ) . Html::rawElement( 'td', array(), $value ) diff --git a/specials/CargoTables.php b/specials/CargoTables.php index 5f541ef..70ae150 100644 --- a/specials/CargoTables.php +++ b/specials/CargoTables.php @@ -55,7 +55,11 @@ continue; } - $fieldAlias = str_replace( '_', ' ', $fieldName ); + if ( $fieldName[0] != '_' ) { + $fieldAlias = str_replace( '_', ' ', $fieldName ); + } else { + $fieldAlias = $fieldName; + } $fieldType = $fieldDescription->mType; // Special handling for URLs, to avoid them // overwhelming the page. -- To view, visit https://gerrit.wikimedia.org/r/278046 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Ib7d76a38e1e8bbf589e0f784f2f7f1275e885bf5 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/Cargo Gerrit-Branch: master Gerrit-Owner: Yaron Koren <yaro...@gmail.com> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits