Github user anujgandharv commented on a diff in the pull request:
https://github.com/apache/jena/pull/227#discussion_r106869941
--- Diff:
jena-text/src/main/java/org/apache/jena/query/text/TextIndexES.java ---
@@ -0,0 +1,394 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.query.text;
+
+import org.apache.jena.graph.Node;
+import org.apache.jena.graph.NodeFactory;
+import org.apache.jena.sparql.util.NodeFactoryExtra;
+import
org.elasticsearch.action.admin.indices.exists.indices.IndicesExistsRequest;
+import
org.elasticsearch.action.admin.indices.exists.indices.IndicesExistsResponse;
+import org.elasticsearch.action.get.GetResponse;
+import org.elasticsearch.action.index.IndexRequest;
+import org.elasticsearch.action.search.SearchResponse;
+import org.elasticsearch.action.update.UpdateRequest;
+import org.elasticsearch.action.update.UpdateResponse;
+import org.elasticsearch.client.Client;
+import org.elasticsearch.client.transport.TransportClient;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.transport.InetSocketTransportAddress;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.index.query.QueryBuilders;
+import org.elasticsearch.script.Script;
+import org.elasticsearch.search.SearchHit;
+import org.elasticsearch.transport.client.PreBuiltTransportClient;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.net.InetAddress;
+import java.util.*;
+
+import static
org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
+
+/**
+ * Elastic Search Implementation of {@link TextIndex}
+ *
+ */
+public class TextIndexES implements TextIndex {
+
+ /**
+ * The definition of the Entity we are trying to Index
+ */
+ private final EntityDefinition docDef ;
+
+ /**
+ * Thread safe ElasticSearch Java Client to perform Index operations
+ */
+ private static Client client;
+
+ /**
+ * The name of the index. Defaults to 'test'
+ */
+ private final String indexName;
+
+ static final String CLUSTER_NAME_PARAM = "cluster.name";
+
+ static final String NUM_OF_SHARDS_PARAM = "number_of_shards";
+
+ static final String NUM_OF_REPLICAS_PARAM = "number_of_replicas";
+
+ /**
+ * Number of maximum results to return in case no limit is specified
on the search operation
+ */
+ static final Integer MAX_RESULTS = 10000;
+
+ private boolean isMultilingual ;
+
+ private static final Logger LOGGER =
LoggerFactory.getLogger(TextIndexES.class) ;
+
+ public TextIndexES(TextIndexConfig config, ESSettings esSettings) {
+
+ this.indexName = esSettings.getIndexName();
+ this.docDef = config.getEntDef();
+
+ this.isMultilingual = config.isMultilingualSupport();
+ if (this.isMultilingual && config.getEntDef().getLangField() ==
null) {
+ //multilingual index cannot work without lang field
+ docDef.setLangField("lang");
+ }
+ try {
+ if(client == null) {
+
+ LOGGER.debug("Initializing the Elastic Search Java Client
with settings: " + esSettings);
+ Settings settings = Settings.builder()
+ .put(CLUSTER_NAME_PARAM,
esSettings.getClusterName()).build();
+ List<InetSocketTransportAddress> addresses = new
ArrayList<>();
+ for(String host:
esSettings.getHostToPortMapping().keySet()) {
+ InetSocketTransportAddress addr = new
InetSocketTransportAddress(InetAddress.getByName(host),
esSettings.getHostToPortMapping().get(host));
+ addresses.add(addr);
+ }
+
+ InetSocketTransportAddress socketAddresses[] = new
InetSocketTransportAddress[addresses.size()];
+ client = new
PreBuiltTransportClient(settings).addTransportAddresses(addresses.toArray(socketAddresses));
+ LOGGER.debug("Successfully initialized the client");
+ }
+
+ IndicesExistsResponse exists =
client.admin().indices().exists(new IndicesExistsRequest(indexName)).get();
+ if(!exists.isExists()) {
+ Settings indexSettings = Settings.builder()
+ .put(NUM_OF_SHARDS_PARAM, esSettings.getShards())
+ .put(NUM_OF_REPLICAS_PARAM,
esSettings.getReplicas())
+ .build();
+ LOGGER.debug("Index with name " + indexName + " does not
exist yet. Creating one with settings: " + indexSettings.toString());
+
client.admin().indices().prepareCreate(indexName).setSettings(indexSettings).get();
+ }
+ }catch (Exception e) {
+ throw new TextIndexException("Exception occured while
instantiating ElasticSearch Text Index", e);
+ }
+ }
+
+
+ /**
+ * Constructor used mainly for performing Integration tests
+ * @param config an instance of {@link TextIndexConfig}
+ * @param client an instance of {@link TransportClient}. The client
should already have been initialized with an index
+ */
+ public TextIndexES(TextIndexConfig config, Client client, String
indexName) {
+ this.docDef = config.getEntDef();
+ this.isMultilingual = true;
+ this.client = client;
+ this.indexName = indexName;
+ }
+
+ /**
+ * We do not have any specific logic to perform before committing
+ */
+ @Override
+ public void prepareCommit() {
+ //Do Nothing
+
+ }
+
+ /**
+ * Commit happens in the individual get/add/delete operations
+ */
+ @Override
+ public void commit() {
+ // Do Nothing
+ }
+
+ /**
+ * We do not do rollback
+ */
+ @Override
+ public void rollback() {
+ //Do Nothing
+
+ }
+
+ /**
+ * We don't have resources that need to be closed explicitely
+ */
+ @Override
+ public void close() {
+ // Do Nothing
+
+ }
+
+ /**
+ * Update an Entity. Since we are doing Upserts in add entity anyways,
we simply call {@link #addEntity(Entity)}
+ * method that takes care of updating the Entity as well.
+ * @param entity the entity to update.
+ */
+ @Override
+ public void updateEntity(Entity entity) {
+ //Since Add entity also updates the indexed document in case it
already exists,
+ // we can simply call the addEntity from here.
+ addEntity(entity);
+ }
+
+
+ /**
+ * Add an Entity to the ElasticSearch Index.
+ * The entity will be added as a new document in ES, if it does not
already exists.
+ * If the Entity exists, then the entity will simply be updated.
+ * The entity will never be replaced.
+ * @param entity the entity to add
+ */
+ @Override
+ public void addEntity(Entity entity) {
+ LOGGER.debug("Adding/Updating the entity in ES");
+
+ //The field that has a not null value in the current Entity
instance.
+ //Required, mainly for building a script for the update command.
+ String fieldToAdd = null;
+ String fieldValueToAdd = "";
+ try {
+ XContentBuilder builder = jsonBuilder()
+ .startObject();
+
+ for(String field: docDef.fields()) {
+ if(entity.get(field) != null) {
+ if(entity.getLanguage() != null &&
!entity.getLanguage().isEmpty() && isMultilingual) {
+ fieldToAdd = field + "_" + entity.getLanguage();
+ } else {
+ fieldToAdd = field;
+ }
+
+ fieldValueToAdd = (String) entity.get(field);
+ builder = builder.field(fieldToAdd,
Arrays.asList(fieldValueToAdd));
+ break;
+ } else {
+ //We are making sure that the field is at-least added
to the index.
+ //This will help us tremendously when we are appending
the data later in an already indexed document.
+ builder = builder.field(field,
Collections.emptyList());
+ }
+
+ }
+
+ builder = builder.endObject();
+ IndexRequest indexRequest = new IndexRequest(indexName,
docDef.getEntityField(), entity.getId())
+ .source(builder);
+
+ String addUpdateScript = "if(ctx._source.<fieldName> == null
|| ctx._source.<fieldName>.empty) " +
+ "{ctx._source.<fieldName>=['<fieldValue>'] } else
{ctx._source.<fieldName>.add('<fieldValue>')}";
+ addUpdateScript = addUpdateScript.replaceAll("<fieldName>",
fieldToAdd).replaceAll("<fieldValue>", fieldValueToAdd);
--- End diff --
Reworked the scripts to pass fieldValue as parameters. This will result in
the script NOT being recompiled every time.
Can not pass the field name as parameter because apparently ES does a param
check with the values (right side of = sign).
In any case this would not result in any extra performance degradation
because the script is compilable per field type.
Also if the value contains single quote then the single quote is preserved.
Tested it with JenaESTextExample and the latest scripting update.
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---