MaxSem has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/134851

Change subject: Kill Solr support
......................................................................

Kill Solr support

Change-Id: I29fc406ba085795db68911487ee074b3eae6e8a2
---
M GeoData.body.php
M GeoData.php
M GeoDataHooks.php
D api/ApiQueryGeoSearchSolr.php
D solr/SolrGeoData.php
D solr/SolrUpdateJob.php
D solr/SolrUpdateWork.php
D solr/schema.xml
D solrupdate.php
A sql/drop-updates-killlist.sql
M sql/externally-backed.sql
D sql/wmfFixTables.sql
12 files changed, 5 insertions(+), 854 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/GeoData 
refs/changes/51/134851/1

diff --git a/GeoData.body.php b/GeoData.body.php
index 5c088c7..71f3856 100644
--- a/GeoData.body.php
+++ b/GeoData.body.php
@@ -180,15 +180,4 @@
        public static function pickRandom( $weights ) {
                return ArrayUtils::pickRandom( $weights );
        }
-
-       /**
-        * Adds an update job if needed
-        */
-       public static function maybeUpdate() {
-               global $wgGeoDataBackend, $wgGeoDataUpdatesViaJob;
-
-               if ( $wgGeoDataBackend == 'solr' && $wgGeoDataUpdatesViaJob ) {
-                       JobQueueGroup::singleton()->push( new SolrUpdateJob( 
null ) );
-               }
-       }
 }
diff --git a/GeoData.php b/GeoData.php
index 919c11f..913f7b7 100644
--- a/GeoData.php
+++ b/GeoData.php
@@ -17,7 +17,6 @@
 $wgAutoloadClasses['ApiQueryGeoSearch'] = "$dir/api/ApiQueryGeoSearch.php";
 $wgAutoloadClasses['ApiQueryGeoSearchDb'] = "$dir/api/ApiQueryGeoSearchDb.php";
 $wgAutoloadClasses['ApiQueryGeoSearchElastic'] = 
"$dir/api/ApiQueryGeoSearchElastic.php";
-$wgAutoloadClasses['ApiQueryGeoSearchSolr'] = 
"$dir/api/ApiQueryGeoSearchSolr.php";
 $wgAutoloadClasses['ApiQueryAllPages_GeoData'] = 
"$dir/api/ApiQueryAllPages_GeoData.php";
 $wgAutoloadClasses['ApiQueryCategoryMembers_GeoData'] = 
"$dir/api/ApiQueryCategoryMembers_GeoData.php";
 $wgAutoloadClasses['GeoDataQueryExtender'] = 
"$dir/api/GeoDataQueryExtender.php";
@@ -28,11 +27,6 @@
 $wgAutoloadClasses['GeoData'] = "$dir/GeoData.body.php";
 $wgAutoloadClasses['GeoDataHooks'] = "$dir/GeoDataHooks.php";
 $wgAutoloadClasses['GeoDataMath'] = "$dir/GeoDataMath.php";
-$wgAutoloadClasses['SolrUpdate'] = "$dir/solrupdate.php";
-$wgAutoloadClasses['SolrUpdateJob'] = "$dir/solr/SolrUpdateJob.php";
-$wgAutoloadClasses['SolrUpdateWork'] = "$dir/solr/SolrUpdateWork.php";
-
-$wgAutoloadClasses['SolrGeoData'] = "$dir/solr/SolrGeoData.php";
 
 $wgMessagesDirs['GeoData'] = __DIR__ . '/i18n';
 $wgExtensionMessagesFiles['GeoData'] = "$dir/GeoData.i18n.php";
@@ -62,8 +56,6 @@
                $wgAPIListModules['geosearch'] = 'ApiQueryGeoSearch' . ucfirst( 
$wgGeoDataBackend );
        }
 }
-
-$wgJobClasses['solrUpdate'] = 'SolrUpdateJob';
 
 // Tracking categories for Special:TrackingCategories
 $wgTrackingCategories[] = 'geodata-broken-tags-category';
@@ -190,48 +182,9 @@
 $wgGeoDataIndexGranularity = 10;
 
 /**
- * Which backend should be used by spatial searhces: 'db', 'solr' or 'elastic'
+ * Which backend should be used by spatial searhces: 'db' or 'elastic'
  */
 $wgGeoDataBackend = 'db';
-
-
-// Solr-specific settings
-
-/**
- * Generic Solr connection options, see Solarium docs.
- * Note: host must be set in $wgGeoDataSolrHosts for load-balancicng.
- */
-$wgGeoDataSolrOptions = array(
-       'adapteroptions' => array(
-               //'host' => '127.0.0.1',
-               'port' => 8983,
-               'path' => '/solr/',
-       ),
-);
-
-/**
- * @var string|array: Solr host, string "hostname" or array( 'host1' => 
weight1, 'host2' => weight2 ... )
- */
-$wgGeoDataSolrHosts = 'localhost';
-
-/**
- * @var string: Solr master used for updates
- */
-$wgGeoDataSolrMaster = 'localhost';
-
-/**
- * @var int|string: Commit policy
- * Possible values:
- * - 'never': Never commit explicitly, let Solr decide on its own.
- * - 'immediate': Commit after every change.
- * - (some number): Commit within this number of milliseconds.
- */
-$wgGeoDataSolrCommitPolicy = 'immediate';
-
-/**
- * Whether search index should be updated via jobs. Supported only for Solr.
- */
-$wgGeoDataUpdatesViaJob = false;
 
 /**
  * Specifies which information about page's primary coordinate is added to 
global JS variable wgCoordinates.
diff --git a/GeoDataHooks.php b/GeoDataHooks.php
index 3f3181f..a98902e 100644
--- a/GeoDataHooks.php
+++ b/GeoDataHooks.php
@@ -20,6 +20,7 @@
                        case 'mysql':
                                if ( $wgGeoDataBackend != 'db' ) {
                                        $updater->addExtensionTable( 
'geo_tags', dirname( __FILE__ ) . '/sql/externally-backed.sql' );
+                                       $updater->dropExtensionTable( 
'geo_killlist', dirname( __FILE__ ) . '/sql/drop-updates-killlist.sql' );
                                } else {
                                        $updater->addExtensionTable( 
'geo_tags', dirname( __FILE__ ) . '/sql/db-backed.sql' );
                                }
@@ -77,22 +78,10 @@
         * @return bool
         */
        public static function onArticleDeleteComplete( &$article, User &$user, 
$reason, $id ) {
-               global $wgGeoDataBackend;
 
                wfProfileIn( __METHOD__ );
                $dbw = wfGetDB( DB_MASTER );
-               if ( $wgGeoDataBackend == 'solr' ) {
-                       $res = $dbw->select( 'geo_tags', 'gt_id', array( 
'gt_page_id' => $id ), __METHOD__ );
-                       $killlist = array();
-                       foreach ( $res as $row ) {
-                               $killlist[] = array( 'gk_killed_id' => 
$row->gt_id );
-                       }
-                       if ( $killlist ) {
-                               $dbw->insert( 'geo_killlist', $killlist, 
__METHOD__ );
-                       }
-               }
                $dbw->delete( 'geo_tags', array( 'gt_page_id' => $id ), 
__METHOD__ );
-               GeoData::maybeUpdate();
                wfProfileOut( __METHOD__ );
 
                return true;
@@ -128,7 +117,6 @@
                } else {
                        self::doSmartUpdate( $data, $linksUpdate->mId );
                }
-               GeoData::maybeUpdate();
                wfProfileOut( __METHOD__ );
 
                return true;
@@ -218,12 +206,6 @@
                if ( count( $delete ) ) {
                        $deleteIds = array_keys( $delete );
                        $dbw->delete( 'geo_tags', array( 'gt_id' => $deleteIds 
), __METHOD__ );
-                       if ( $wgGeoDataBackend != 'db' ) {
-                               $rows = array_map( function( $id ) {
-                                       return array( 'gk_killed_id' => $id );
-                               }, $deleteIds );
-                               $dbw->insert( 'geo_killlist', $rows, __METHOD__ 
);
-                       }
                }
                if ( count( $add ) ) {
                        $dbw->insert( 'geo_tags', $add, __METHOD__ );
diff --git a/api/ApiQueryGeoSearchSolr.php b/api/ApiQueryGeoSearchSolr.php
deleted file mode 100644
index 8fc1820..0000000
--- a/api/ApiQueryGeoSearchSolr.php
+++ /dev/null
@@ -1,114 +0,0 @@
-<?php
-
-class ApiQueryGeoSearchSolr extends ApiQueryGeoSearch {
-       public function __construct( $query, $moduleName ) {
-               parent::__construct( $query, $moduleName );
-       }
-
-       /**
-        * @param ApiPageSet $resultPageSet
-        */
-       protected function run( $resultPageSet = null ) {
-               global $wgDefaultGlobe;
-
-               wfProfileIn( __METHOD__ );
-               parent::run( $resultPageSet );
-
-               try {
-                       $params = $this->extractRequestParams();
-
-                       $solr = SolrGeoData::newClient();
-                       $query = $solr->createSelect();
-                       $helper = $query->getHelper();
-
-                       // @todo: props
-                       $query->setQueryDefaultOperator( 'AND' );
-                       $query->createFilterQuery( 'wiki' )->setQuery( 'wiki:' 
. wfWikiID() ); // Only Earth is supported
-                       $query->createFilterQuery( 'globe' )->setQuery( 
'globe:earth' ); // Only Earth is supported
-                       if ( isset( $params['maxdim'] ) ) {
-                               $query->addFilterQuery( "dim:[* TO 
{$params['maxdim']}]" );
-                       }
-                       $primary = $params['primary'];
-                       if ( $primary !== 'all' ) {
-                               $query->createFilterQuery( 'primary' 
)->setQuery( 'primary:' . intval( $primary === 'primary' ) );
-                       }
-                       $query->createFilterQuery( 'coord' )->setQuery( 
$helper->geofilt( $this->lat, $this->lon, 'coord', $this->radius / 1000 ) );
-                       $query->addSort( $helper->geodist( $this->lat, 
$this->lon, 'coord' ), Solarium_Query_Select::SORT_ASC );
-
-                       $limit = $params['limit'];
-                       $query->setRows( $limit + ( $this->idToExclude ? 1 : 0 
) ); // +1 in case we need to exclude a page
-
-                       wfProfileIn( __METHOD__ . '-solr' );
-                       $docs = $solr->select( $query );
-                       wfProfileOut( __METHOD__ . '-solr' );
-                       $mapping = array();
-                       foreach ( $docs as $doc ) {
-                               $id = $doc->page_id;
-                               if ( !isset( $mapping[$id] ) && $id != 
$this->idToExclude ) {
-                                       $mapping[$id] = $doc;
-                               }
-                       }
-
-                       if ( !count( $mapping ) ) {
-                               wfProfileOut( __METHOD__ );
-                               return; // No results, no point in doing 
anything else
-                       }
-                       $this->addWhere( array( 'page_id' => array_keys( 
$mapping ) ) );
-
-                       wfProfileIn( __METHOD__ . '-sql' );
-                       $res = $this->select( __METHOD__ );
-                       wfProfileOut( __METHOD__ . '-sql' );
-
-                       $result = $this->getResult();
-                       $rows = array();
-                       foreach ( $res as $row ) {
-                               $rows[$row->page_id] = $row;
-                       }
-
-                       foreach ( $mapping as $id => $doc ) {
-                               if ( !$limit-- ) {
-                                       break;
-                               }
-                               if ( !isset( $rows[$id] ) ) {
-                                       continue;
-                               }
-                               $row = $rows[$id];
-                               if ( is_null( $resultPageSet ) ) {
-                                       $title = Title::newFromRow( $row );
-                                       list( $lat, $lon ) = explode( ',', 
$doc->coord );
-                                       $vals = array(
-                                               'pageid' => intval( 
$row->page_id ),
-                                               'ns' => intval( 
$title->getNamespace() ),
-                                               'title' => 
$title->getPrefixedText(),
-                                               'lat' => floatval( $lat ),
-                                               'lon' => floatval( $lon ),
-                                               'dist' => round( 
GeoDataMath::distance( $lat, $lon, $this->lat, $this->lon ), 1 ),
-                                       );
-
-                                       if ( $doc->primary ) {
-                                               $vals['primary'] = '';
-                                       }
-                                       foreach( $params['prop'] as $prop ) {
-                                               // Don't output default globe
-                                               if ( !( $prop === 'globe' && 
$doc->$prop === $wgDefaultGlobe ) ) {
-                                                       $vals[$prop] = 
$doc->$prop;
-                                               }
-                                       }
-                                       $fit = $result->addValue( array( 
'query', $this->getModuleName() ), null, $vals );
-                                       if ( !$fit ) {
-                                               break;
-                                       }
-                               } else {
-                                       $resultPageSet->processDbRow( $row );
-                               }
-                       }
-               } catch ( Solarium_Exception $e ) {
-                       throw new MWException( get_class( $e ) . " at 
{$e->getFile()}, line {$e->getLine()}: {$e->getMessage()}", 0, $e );
-               }
-               wfProfileOut( __METHOD__ );
-       }
-
-       public function getVersion() {
-               return __CLASS__ . ': $Id$';
-       }
-}
diff --git a/solr/SolrGeoData.php b/solr/SolrGeoData.php
deleted file mode 100644
index 3e71975..0000000
--- a/solr/SolrGeoData.php
+++ /dev/null
@@ -1,21 +0,0 @@
-<?php
-
-class SolrGeoData {
-       /**
-        * @param bool $master
-        *
-        * @return Solarium_Client
-        */
-       public static function newClient( $master = false ) {
-               global $wgGeoDataSolrOptions, $wgGeoDataSolrHosts, 
$wgGeoDataSolrMaster;
-
-               $options = $wgGeoDataSolrOptions;
-               if ( $master ) {
-                       $options['adapteroptions']['host'] = 
$wgGeoDataSolrMaster;
-               } else {
-                       $options['adapteroptions']['host'] = 
GeoData::pickRandom( $wgGeoDataSolrHosts );
-               }
-
-               return new Solarium_Client( $options );
-       }
-}
diff --git a/solr/SolrUpdateJob.php b/solr/SolrUpdateJob.php
deleted file mode 100644
index 8a857a2..0000000
--- a/solr/SolrUpdateJob.php
+++ /dev/null
@@ -1,25 +0,0 @@
-<?php
-
-class SolrUpdateJob extends Job {
-
-       public function __construct( $title, $params = array(), $id = 0 ) {
-               parent::__construct( 'solrUpdate', Title::newMainPage(), 
$params, $id );
-               $this->removeDuplicates = true;
-       }
-
-       /**
-        * Run the job
-        * @return boolean success
-        */
-       public function run() {
-               global $wgGeoDataUpdatesViaJob;
-
-               // Allow disabling jobs on the fly
-               if ( $wgGeoDataUpdatesViaJob ) {
-                       $maint = new SolrUpdate();
-                       $maint->enableJobMode();
-                       $maint->execute();
-               }
-               return true;
-       }
-}
diff --git a/solr/SolrUpdateWork.php b/solr/SolrUpdateWork.php
deleted file mode 100644
index 83de0ba..0000000
--- a/solr/SolrUpdateWork.php
+++ /dev/null
@@ -1,15 +0,0 @@
-<?php
-
-class SolrUpdateWork extends PoolCounterWork {
-       private $maint;
-
-       public function __construct( SolrUpdate $maint ) {
-               parent::__construct( 'solrUpdate', '*' );
-               $this->maint = $maint;
-       }
-
-       function doWork() {
-               $this->maint->safeExecute();
-               return true;
-       }
-}
diff --git a/solr/schema.xml b/solr/schema.xml
deleted file mode 100644
index 3d6e56d..0000000
--- a/solr/schema.xml
+++ /dev/null
@@ -1,330 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" ?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements.  See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<!--  
- This is the Solr schema file. This file should be named "schema.xml" and
- should be in the conf directory under the solr home
- (i.e. ./solr/conf/schema.xml by default) 
- or located where the classloader for the Solr webapp can find it.
-
- This example schema is the recommended starting point for users.
- It should be kept correct and concise, usable out-of-the-box.
-
- For more information, on how to customize this file, please see
- http://wiki.apache.org/solr/SchemaXml
-
- PERFORMANCE NOTE: this schema includes many optional features and should not
- be used for benchmarking.  To improve performance one could
-  - set stored="false" for all fields possible (esp large fields) when you
-    only need to search on the field but don't need to return the original
-    value.
-  - set indexed="false" if you don't need to search on the field, but only
-    return the field as a result of searching on other indexed fields.
-  - remove all unneeded copyField statements
-  - for best index size and searching performance, set "index" to false
-    for all general text fields, use copyField to copy them to the
-    catchall "text" field, and use that for searching.
-  - For maximum indexing performance, use the StreamingUpdateSolrServer
-    java client.
-  - Remember to run the JVM in server mode, and use a higher logging level
-    that avoids logging every request
--->
-
-<schema name="geodata" version="1.5">
-
- <fields>
-   <!-- Valid attributes for fields:
-     name: mandatory - the name for the field
-     type: mandatory - the name of a field type from the 
-       <types> fieldType section
-     indexed: true if this field should be indexed (searchable or sortable)
-     stored: true if this field should be retrievable
-     multiValued: true if this field may contain multiple values per document
-     omitNorms: (expert) set to true to omit the norms associated with
-       this field (this disables length normalization and index-time
-       boosting for the field, and saves some memory).  Only full-text
-       fields or fields that need an index-time boost need norms.
-       Norms are omitted for primitive (non-analyzed) types by default.
-     termVectors: [false] set to true to store the term vector for a
-       given field.
-       When using MoreLikeThis, fields used for similarity should be
-       stored for best performance.
-     termPositions: Store position information with the term vector.  
-       This will increase storage costs.
-     termOffsets: Store offset information with the term vector. This 
-       will increase storage costs.
-     required: The field is required.  It will throw an error if the
-       value does not exist
-     default: a value that should be used if no value is specified
-       when adding a document.
-   -->
-
-   <!-- field names should consist of alphanumeric or underscore characters 
only and
-      not start with a digit.  This is not currently strictly enforced,
-      but other field names will not have first class support from all 
components
-      and back compatibility is not guaranteed.  Names with both leading and
-      trailing underscores (e.g. _version_) are reserved.
-   -->
-        
-   <field name="id" type="string" indexed="true" stored="true" required="true" 
/> 
-   <field name="wiki" type="string" indexed="true" stored="false" 
required="true" omitNorms="true"/> 
-   <field name="coord" type="location" indexed="true" stored="true" 
required="true"/>
-   <field name="page_id" type="tlong" indexed="true" stored="true" 
required="true"/>
-   <field name="globe" type="string" indexed="true" stored="true" 
omitNorms="true" required="true"/>
-   <field name="primary" type="boolean" indexed="true" stored="true" 
required="true"/>
-   <field name="dim" type="float" indexed="true" stored="true" 
required="false"/>
-   <field name="type" type="string" indexed="false" stored="true" 
omitNorms="true"/>
-   <field name="name" type="string" indexed="false" stored="true" 
omitNorms="true"/>
-   <field name="country" type="string" indexed="true" stored="true" 
omitNorms="true"/>
-   <field name="region" type="string" indexed="true" stored="true" 
omitNorms="true"/>
-
-   <!-- Dynamic field definitions allow using convention over configuration
-       for fields via the specification of patterns to match field names. 
-       EXAMPLE:  name="*_i" will match any field ending in _i (like myid_i, 
z_i)
-       RESTRICTION: the glob-like pattern in the name attribute must have
-       a "*" only at the start or the end.  -->
-   
-   <!-- Type used to index the lat and lon components for the "location" 
FieldType -->
-   <dynamicField name="*_coordinate"  type="tdouble" indexed="true"  
stored="false" />
-
-   <!-- Uncommenting the following will create a "timestamp" field using
-        a default value of "NOW" to indicate when each document was indexed.
-     -->
-   <!--
-   <field name="timestamp" type="date" indexed="true" stored="true" 
default="NOW" multiValued="false"/>
-     -->
-
-   <!-- uncomment the following to ignore any fields that don't already match 
an existing 
-        field name or dynamic field, rather than reporting them as an error. 
-        alternately, change the type="ignored" to some other type e.g. "text" 
if you want 
-        unknown fields indexed and/or stored by default --> 
-   <!--dynamicField name="*" type="ignored" multiValued="true" /-->
-   
- </fields>
-
- <!-- Field to use to determine and enforce document uniqueness. 
-      Unless this field is marked with required="false", it will be a required 
field
-   -->
- <uniqueKey>id</uniqueKey>
-
- <!-- DEPRECATED: The defaultOperator (AND|OR) is consulted by various query 
parsers
-  when parsing a query string to determine if a clause of the query should be 
marked as
-  required or optional, assuming the clause isn't already marked by some 
operator.
-  The default is OR, which is generally assumed so it is not a good idea to 
change it
-  globally here.  The "q.op" request parameter takes precedence over this.
- <solrQueryParser defaultOperator="OR"/> -->
-
-
-  <types>
-    <!-- field type definitions. The "name" attribute is
-       just a label to be used by field definitions.  The "class"
-       attribute and any other attributes determine the real
-       behavior of the fieldType.
-         Class names starting with "solr" refer to java classes in a
-       standard package such as org.apache.solr.analysis
-    -->
-
-    <!-- The StrField type is not analyzed, but indexed/stored verbatim. -->
-    <fieldType name="string" class="solr.StrField" sortMissingLast="true" />
-
-    <!-- boolean type: "true" or "false" -->
-    <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
-
-    <!-- sortMissingLast and sortMissingFirst attributes are optional 
attributes are
-         currently supported on types that are sorted internally as strings
-         and on numeric types.
-            This includes "string","boolean", and, as of 3.5 (and 4.x),
-            int, float, long, date, double, including the "Trie" variants.
-       - If sortMissingLast="true", then a sort on this field will cause 
documents
-         without the field to come after documents with the field,
-         regardless of the requested sort order (asc or desc).
-       - If sortMissingFirst="true", then a sort on this field will cause 
documents
-         without the field to come before documents with the field,
-         regardless of the requested sort order.
-       - If sortMissingLast="false" and sortMissingFirst="false" (the default),
-         then default lucene sorting will be used which places docs without the
-         field first in an ascending sort and last in a descending sort.
-    -->    
-
-    <!--
-      Default numeric field types. For faster range queries, consider the 
tint/tfloat/tlong/tdouble types.
-    -->
-    <fieldType name="int" class="solr.TrieIntField" precisionStep="0" 
positionIncrementGap="0"/>
-    <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" 
positionIncrementGap="0"/>
-    <fieldType name="long" class="solr.TrieLongField" precisionStep="0" 
positionIncrementGap="0"/>
-    <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" 
positionIncrementGap="0"/>
-
-    <!--
-     Numeric field types that index each value at various levels of precision
-     to accelerate range queries when the number of values between the range
-     endpoints is large. See the javadoc for NumericRangeQuery for internal
-     implementation details.
-
-     Smaller precisionStep values (specified in bits) will lead to more tokens
-     indexed per value, slightly larger index size, and faster range queries.
-     A precisionStep of 0 disables indexing at different precision levels.
-    -->
-    <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" 
positionIncrementGap="0"/>
-    <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" 
positionIncrementGap="0"/>
-    <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" 
positionIncrementGap="0"/>
-    <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" 
positionIncrementGap="0"/>
-
-    <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, 
and
-         is a more restricted form of the canonical representation of dateTime
-         http://www.w3.org/TR/xmlschema-2/#dateTime    
-         The trailing "Z" designates UTC time and is mandatory.
-         Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
-         All other components are mandatory.
-
-         Expressions can also be used to denote calculations that should be
-         performed relative to "NOW" to determine the value, ie...
-
-               NOW/HOUR
-                  ... Round to the start of the current hour
-               NOW-1DAY
-                  ... Exactly 1 day prior to now
-               NOW/DAY+6MONTHS+3DAYS
-                  ... 6 months and 3 days in the future from the start of
-                      the current day
-                      
-         Consult the DateField javadocs for more information.
-
-         Note: For faster range queries, consider the tdate type
-      -->
-    <fieldType name="date" class="solr.TrieDateField" precisionStep="0" 
positionIncrementGap="0"/>
-
-    <!-- A Trie based date field for faster date range queries and date 
faceting. -->
-    <fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" 
positionIncrementGap="0"/>
-
-
-    <!--Binary data type. The data should be sent/retrieved in as Base64 
encoded Strings -->
-    <fieldtype name="binary" class="solr.BinaryField"/>
-
-    <!--
-      Note:
-      These should only be used for compatibility with existing indexes 
(created with lucene or older Solr versions).
-      Use Trie based fields instead. As of Solr 3.5 and 4.x, Trie based fields 
support sortMissingFirst/Last
-      
-      Plain numeric field types that store and index the text
-      value verbatim (and hence don't correctly support range queries, since 
the
-      lexicographic ordering isn't equal to the numeric ordering)
-    -->
-    <fieldType name="pint" class="solr.IntField"/>
-    <fieldType name="plong" class="solr.LongField"/>
-    <fieldType name="pfloat" class="solr.FloatField"/>
-    <fieldType name="pdouble" class="solr.DoubleField"/>
-    <fieldType name="pdate" class="solr.DateField" sortMissingLast="true"/>
-
-    <!-- The "RandomSortField" is not used to store or search any
-         data.  You can declare fields of this type it in your schema
-         to generate pseudo-random orderings of your docs for sorting 
-         or function purposes.  The ordering is generated based on the field
-         name and the version of the index. As long as the index version
-         remains unchanged, and the same field name is reused,
-         the ordering of the docs will be consistent.  
-         If you want different psuedo-random orderings of documents,
-         for the same version of the index, use a dynamicField and
-         change the field name in the request.
-     -->
-    <fieldType name="random" class="solr.RandomSortField" indexed="true" />
-
-    <!-- solr.TextField allows the specification of custom text analyzers
-         specified as a tokenizer and a list of token filters. Different
-         analyzers may be specified for indexing and querying.
-
-         The optional positionIncrementGap puts space between multiple fields 
of
-         this type on the same document, with the purpose of preventing false 
phrase
-         matching across fields.
-
-         For more info on customizing your analyzer chain, please see
-         http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
-     -->
-
-    <!-- One can also specify an existing Analyzer class that has a
-         default constructor via the class attribute on the analyzer element.
-         Example:
-    <fieldType name="text_greek" class="solr.TextField">
-      <analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>
-    </fieldType>
-    -->
-
-    <!-- A text field that only splits on whitespace for exact matching of 
words -->
-    <fieldType name="text_ws" class="solr.TextField" 
positionIncrementGap="100">
-      <analyzer>
-        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
-      </analyzer>
-    </fieldType>
-
-
-
-    <!-- charFilter + WhitespaceTokenizer  -->
-    <!--
-    <fieldType name="text_char_norm" class="solr.TextField" 
positionIncrementGap="100" >
-      <analyzer>
-        <charFilter class="solr.MappingCharFilterFactory" 
mapping="mapping-ISOLatin1Accent.txt"/>
-        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
-      </analyzer>
-    </fieldType>
-    -->
-
-    <!-- lowercases the entire field value, keeping it as a single token.  -->
-    <fieldType name="lowercase" class="solr.TextField" 
positionIncrementGap="100">
-      <analyzer>
-        <tokenizer class="solr.KeywordTokenizerFactory"/>
-        <filter class="solr.LowerCaseFilterFactory" />
-      </analyzer>
-    </fieldType>
-
-    <fieldType name="text_path" class="solr.TextField" 
positionIncrementGap="100">
-      <analyzer>
-        <tokenizer class="solr.PathHierarchyTokenizerFactory"/>
-      </analyzer>
-    </fieldType>
-
-
-    <!-- since fields of this type are by default not stored or indexed,
-         any data added to them will be ignored outright.  --> 
-    <fieldtype name="ignored" stored="false" indexed="false" 
multiValued="true" class="solr.StrField" />
-
-    <!-- This point type indexes the coordinates as separate fields (subFields)
-      If subFieldType is defined, it references a type, and a dynamic field
-      definition is created matching *___<typename>.  Alternately, if 
-      subFieldSuffix is defined, that is used to create the subFields.
-      Example: if subFieldType="double", then the coordinates would be
-        indexed in fields myloc_0___double,myloc_1___double.
-      Example: if subFieldSuffix="_d" then the coordinates would be indexed
-        in fields myloc_0_d,myloc_1_d
-      The subFields are an implementation detail of the fieldType, and end
-      users normally should not need to know about them.
-     -->
-    <fieldType name="point" class="solr.PointType" dimension="2" 
subFieldSuffix="_d"/>
-
-    <!-- A specialized field for geospatial search. If indexed, this fieldType 
must not be multivalued. -->
-    <fieldType name="location" class="solr.LatLonType" 
subFieldSuffix="_coordinate"/>
-
-   <!--
-    A Geohash is a compact representation of a latitude longitude pair in a 
single field.
-    See http://wiki.apache.org/solr/SpatialSearch
-   -->
-    <fieldtype name="geohash" class="solr.GeoHashField"/>
-
- </types>
-
-
-
-</schema>
diff --git a/solrupdate.php b/solrupdate.php
deleted file mode 100644
index 8bf4716..0000000
--- a/solrupdate.php
+++ /dev/null
@@ -1,243 +0,0 @@
-<?php
-
-$IP = getenv( 'MW_INSTALL_PATH' );
-if ( $IP === false ) {
-       $IP = dirname( __FILE__ ) . '/../..';
-}
-require_once( "$IP/maintenance/Maintenance.php" );
-
-class SolrUpdate extends Maintenance {
-       const WRITE_BATCH_SIZE = 500;
-       const READ_BATCH_SIZE = 1000;
-       const READ_DELAY = 0; // In microseconds
-
-       private $jobMode = false;
-
-       public function __construct() {
-               $this->mDescription = 'Performs updates and other operations 
with Solr index';
-               $this->addOption( 'reset', 'Reset last update timestamp (next 
feed will return whole database)' );
-               $this->addOption( 'clear-killlist', 'Purge killlist entries 
older than this value (in days)', false, true );
-               $this->addOption( 'noindex', 'Don\'t update index' );
-       }
-
-       public function enableJobMode() {
-               $this->mQuiet = true;
-               $this->jobMode = true;
-       }
-
-       public function execute() {
-               // Make sure that the index is being updated only once
-               $work = new SolrUpdateWork( $this );
-               if ( !$work->execute() ) {
-                       $this->error( __METHOD__ . '(): PoolCounter error!', 
true );
-               }
-       }
-
-       /**
-        * Called internally
-        */
-       public function safeExecute() {
-               global $wgGeoDataBackend, $wgGeoDataSolrCommitPolicy;
-               if ( $wgGeoDataBackend != 'solr' ) {
-                       $this->error( "This script is only for wikis with Solr 
GeoData backend", true );
-               }
-
-               $dbr = $this->getDB( DB_SLAVE );
-               $dbw = $this->getDB( DB_MASTER );
-
-               $wikiId = wfWikiID();
-
-               if ( $this->hasOption( 'reset' ) ) {
-                       $this->output( "Resetting update tracking...\n" );
-                       $dbw->delete( 'geo_updates', array( 'gu_wiki' => 
$wikiId ), __METHOD__ );
-                       $this->output( "Truncating killlist...\n" );
-                       $table = $dbw->tableName( 'geo_killlist' );
-                       $dbw->query( "TRUNCATE TABLE $table", __METHOD__ );
-                       $cutoffKilllist = false;
-               } else {
-                       $cutoffKilllist = $dbr->selectField( 'geo_killlist', 
'MAX( gk_killed_id )', '', __METHOD__ );
-               }
-               $cutoffTags = $dbr->selectField( 'geo_tags', 'MAX( gt_id )', 
'', __METHOD__ );
-
-               if ( $this->hasOption( 'clear-killlist' ) ) {
-                       $days = intval( $this->getOption( 'clear-killlist' ) );
-                       if ( $days <= 0 ) {
-                               $this->error( '--clear-killlist: please specify 
a positive integer number of days', true );
-                       }
-                       $this->output( "Deleting killlist entries older than 
$days days...\n" );
-                       $timestamp = $dbw->addQuotes( wfTimestamp( TS_DB, 
strtotime( "$days days ago" ) ) );
-                       $table = $dbr->tableName( 'geo_killlist' );
-                       $count = 0;
-                       do {
-                               $sql = "DELETE FROM $table WHERE gk_touched < 
$timestamp LIMIT "
-                                       . self::WRITE_BATCH_SIZE;
-                               $dbw->query( $sql, __METHOD__ );
-                               $deleted = $dbw->affectedRows();
-                               $count += $deleted;
-                               if ( $deleted ) {
-                                       wfWaitForSlaves();
-                                       $this->output( "  $count\n" );
-                               }
-                       } while ( $deleted > 0 );
-               }
-
-               if ( $this->hasOption( 'noindex' ) ) {
-                       return;
-               }
-               $res = $dbr->select( 'geo_updates',
-                       array( 'gu_last_tag', 'gu_last_kill' ),
-                       array( 'gu_wiki' => $wikiId ),
-                       __METHOD__
-               );
-               if ( !$res || !( $row = $res->fetchObject() ) ) {
-                       $lastTag = $lastKill = 0;
-               } else {
-                       $lastTag = $row->gu_last_tag;
-                       $lastKill = $row->gu_last_kill;
-               }
-
-               $solr = SolrGeoData::newClient( 'master' );
-
-               $fields = Coord::getFieldMapping();
-               $fields['page_id'] = 'gt_page_id';
-
-               if ( $cutoffTags ) {
-                       $this->output( "Indexing new documents...\n" );
-                       $count = 0;
-                       do {
-                               $conds = array(
-                                       "gt_id <= $cutoffTags",
-                                       'gt_globe' => 'earth',
-                               );
-                               if ( $lastTag ) {
-                                       $conds[] = "gt_id > $lastTag";
-                               }
-                               $res = $dbr->select( 'geo_tags',
-                                       array_values( $fields ),
-                                       $conds,
-                                       __METHOD__,
-                                       array( 'LIMIT' => 
self::READ_BATCH_SIZE, 'ORDER BY' => 'gt_id' )
-                               );
-                               $docs = array();
-                               $update = $solr->createUpdate();
-                               foreach ( $res as $row ) {
-                                       $lastTag = $row->gt_id;
-                                       $doc = $update->createDocument();
-                                       $row->gt_id = $wikiId . '-' . 
$row->gt_id;
-                                       foreach( $fields as $solrField => 
$dbField ) {
-                                               if ( $solrField != 'lat' && 
$solrField != 'lon' ) {
-                                                       $doc->addField( 
$solrField, $row->$dbField );
-                                               }
-                                       }
-                                       $doc->addField( 'wiki', $wikiId );
-                                       $doc->addField( 'coord', 
"{$row->gt_lat},{$row->gt_lon}" );
-                                       $docs[] = $doc;
-                               }
-                               if ( $docs ) {
-                                       $update->addDocuments( $docs, null, 
$this->commitWithin() );
-                                       $this->addCommit( $update );
-                                       $solr->update( $update );
-
-                                       $count += count( $docs );
-                                       $this->output( "   $count\n" );
-                                       usleep( self::READ_DELAY );
-                               }
-                       } while ( $res->numRows() > 0 );
-               }
-
-               if ( $cutoffKilllist ) {
-                       $this->output( "Deleting old documents...\n" );
-                       $count = 0;
-                       do {
-                               $conds = array(
-                                       "gk_killed_id <= $cutoffKilllist",
-                               );
-                               if ( $lastKill ) {
-                                       $conds[] = "gk_killed_id > $lastKill";
-                               }
-                               $res = $dbr->select( 'geo_killlist',
-                                       array( 'gk_killed_id' ),
-                                       $conds,
-                                       __METHOD__,
-                                       array( 'LIMIT' => 
self::READ_BATCH_SIZE, 'ORDER BY' => 'gk_killed_id' )
-                               );
-                               $killedIds = array();
-                               $update = $solr->createUpdate();
-                               foreach ( $res as $row ) {
-                                       $lastKill = $row->gk_killed_id;
-                                       $killedIds[] = $wikiId . '-' . 
$row->gk_killed_id;
-                               }
-                               if ( $killedIds ) {
-                                       $update->addDeleteByIds( $killedIds );
-                                       if ( $wgGeoDataSolrCommitPolicy === 
'immediate' ) {
-                                               $update->addCommit();
-                                       }
-                                       $solrResult = $solr->update( $update );
-                                       wfDebugLog( 'geodata', "Deleting " . 
count( $killedIds ) . " docs, response: 
{$solrResult->getResponse()->getBody()}" );
-
-                                       $count += count( $killedIds );
-                                       $this->output( "   $count\n" );
-                                       usleep( self::READ_DELAY );
-                               }
-                       } while ( $res->numRows() > 0 );
-                       // delete queries don't support commitWithin, so if 
we're in commitWithin mode,
-                       // just commit after we're done deleting
-                       if ( $count && is_int( $wgGeoDataSolrCommitPolicy ) ) {
-                               $update = $solr->createUpdate();
-                               $update->addCommit();
-                               $solr->update( $update );
-                       }
-               }
-
-               $dbw->replace( 'geo_updates',
-                       array( 'gu_wiki' ),
-                       array( 'gu_wiki' => $wikiId, 'gu_last_tag' => $lastTag, 
'gu_last_kill' => $lastKill ),
-                       __METHOD__
-               );
-       }
-
-       /**
-        * @param Solarium_Query_Update $update
-        */
-       private function addCommit( $update ) {
-               global $wgGeoDataSolrCommitPolicy;
-
-               if ( $wgGeoDataSolrCommitPolicy === 'immediate' ) {
-                       $update->addCommit();
-               } elseif ( !( is_int( $wgGeoDataSolrCommitPolicy ) && 
$wgGeoDataSolrCommitPolicy > 0 )
-                       && $wgGeoDataSolrCommitPolicy !== 'never' ) {
-                       throw new MWException( "'$wgGeoDataSolrCommitPolicy' is 
not a valid \$wgGeoDataSolrCommitPolicy value" );
-               }
-       }
-
-       /**
-        * @return int|null: Number of milliseconds to commit within or null if 
not applicable
-        */
-       private function commitWithin() {
-               global $wgGeoDataSolrCommitPolicy;
-
-               if ( is_int( $wgGeoDataSolrCommitPolicy ) ) {
-                       return $wgGeoDataSolrCommitPolicy;
-               }
-               return null;
-       }
-
-       /**
-        * Overrides Maintenace::error() to throw exceptions instead of writing 
to stderr when called from a job
-        * @param String $err
-        * @param int $die
-        */
-       protected function error( $err, $die = 0 ) {
-               if ( $this->jobMode ) {
-                       if ( $die ) {
-                               throw new MWException( $err );
-                       } else {
-                               wfDebug( "$err\n" );
-                       }
-               }
-               parent::error( $err, $die );
-       }
-}
-
-$maintClass = 'SolrUpdate';
-require_once( DO_MAINTENANCE );
diff --git a/sql/drop-updates-killlist.sql b/sql/drop-updates-killlist.sql
new file mode 100644
index 0000000..b2ac148
--- /dev/null
+++ b/sql/drop-updates-killlist.sql
@@ -0,0 +1,2 @@
+DROP TABLE /*_*/geo_killlist;
+DROP TABLE /*_*/geo_updates;
\ No newline at end of file
diff --git a/sql/externally-backed.sql b/sql/externally-backed.sql
index a279930..2993a65 100644
--- a/sql/externally-backed.sql
+++ b/sql/externally-backed.sql
@@ -1,4 +1,4 @@
--- SQL schema for GeoData extension, Solr-aware
+-- SQL schema for GeoData extension, Elasticsearch backend
 
 -- Stores information about geographical coordinates in articles
 CREATE TABLE /*_*/geo_tags (
@@ -29,22 +29,3 @@
 
 CREATE INDEX /*i*/gt_page_primary ON /*_*/geo_tags ( gt_page_id, gt_primary );
 CREATE INDEX /*i*/gt_page_id_id ON /*_*/geo_tags ( gt_page_id, gt_id );
-
--- Stores kill-list (ids of records deleted from geo_tags that need to be 
deleted from the Solr index)
-CREATE TABLE /*_*/geo_killlist (
-       -- Row ID
-       gk_id int unsigned NOT NULL PRIMARY KEY AUTO_INCREMENT,
-       -- gt_id of a row deleted from geo_tags
-       gk_killed_id int unsigned NOT NULL,
-       -- Last change timestamp
-       gk_touched timestamp NOT NULL default CURRENT_TIMESTAMP ON UPDATE 
CURRENT_TIMESTAMP
-)/*$wgDBTableOptions*/;
-
-CREATE INDEX /*i*/gk_touched ON /*_*/geo_killlist ( gk_touched );
-
--- Stores information about the last index update time
-CREATE TABLE /*_*/geo_updates (
-       gu_wiki varchar(64) NOT NULL PRIMARY KEY,
-       gu_last_tag int NOT NULL,
-       gu_last_kill int NOT NULL
-)/*$wgDBTableOptions*/;
diff --git a/sql/wmfFixTables.sql b/sql/wmfFixTables.sql
deleted file mode 100644
index 72f9177..0000000
--- a/sql/wmfFixTables.sql
+++ /dev/null
@@ -1,8 +0,0 @@
--- Some tables were created on WMF using MyISAM
--- This script fixes them
-
-TRUNCATE TABLE /*_*/geo_killlist;
-
-ALTER TABLE /*_*/geo_killlist ENGINE=InnoDB;
-
-ALTER TABLE /*_*/geo_updates ENGINE=InnoDB;

-- 
To view, visit https://gerrit.wikimedia.org/r/134851
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I29fc406ba085795db68911487ee074b3eae6e8a2
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/GeoData
Gerrit-Branch: master
Gerrit-Owner: MaxSem <maxsem.w...@gmail.com>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to