nknize commented on code in PR #1017:
URL: https://github.com/apache/lucene/pull/1017#discussion_r930257797


##########
lucene/core/src/java/org/apache/lucene/document/ShapeDocValuesField.java:
##########
@@ -0,0 +1,896 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.document;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.List;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.document.ShapeField.DecodedTriangle.TYPE;
+import org.apache.lucene.document.ShapeField.QueryRelation;
+import org.apache.lucene.document.SpatialQuery.EncodedRectangle;
+import org.apache.lucene.index.DocValuesType;
+import org.apache.lucene.index.IndexableFieldType;
+import org.apache.lucene.index.PointValues.Relation;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.store.ByteBuffersDataOutput;
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+
+/** A doc values field representation for {@link LatLonShape} and {@link 
XYShape} */
+public final class ShapeDocValuesField extends Field {
+  private final ShapeComparator shapeComparator;
+
+  private static final FieldType FIELD_TYPE = new FieldType();
+
+  static {
+    FIELD_TYPE.setDocValuesType(DocValuesType.BINARY);
+    FIELD_TYPE.setOmitNorms(true);
+    FIELD_TYPE.freeze();
+  }
+
+  /**
+   * Creates a {@ShapeDocValueField} instance from a shape tessellation
+   *
+   * @param name The Field Name (must not be null)
+   * @param tessellation The tessellation (must not be null)
+   */
+  ShapeDocValuesField(String name, List<ShapeField.DecodedTriangle> 
tessellation) {
+    super(name, FIELD_TYPE);
+    BytesRef b = computeBinaryValue(tessellation);
+    this.fieldsData = b;
+    try {
+      this.shapeComparator = new ShapeComparator(b);
+    } catch (IOException e) {
+      throw new IllegalArgumentException("unable to read binary shape doc 
value field. ", e);
+    }
+  }
+
+  /** Creates a {@code ShapeDocValue} field from a given serialized value */
+  ShapeDocValuesField(String name, BytesRef binaryValue) {
+    super(name, FIELD_TYPE);
+    this.fieldsData = binaryValue;
+    try {
+      this.shapeComparator = new ShapeComparator(binaryValue);
+    } catch (IOException e) {
+      throw new IllegalArgumentException("unable to read binary shape doc 
value field. ", e);
+    }
+  }
+
+  /** The name of the field */
+  @Override
+  public String name() {
+    return name;
+  }
+
+  /** Gets the {@code IndexableFieldType} for this ShapeDocValue field */
+  @Override
+  public IndexableFieldType fieldType() {
+    return FIELD_TYPE;
+  }
+
+  /** Currently there is no string representation for the ShapeDocValueField */
+  @Override
+  public String stringValue() {
+    return null;
+  }
+
+  /** TokenStreams are not yet supported */
+  @Override
+  public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) {
+    return null;
+  }
+
+  /** create a shape docvalue field from indexable fields */
+  public static ShapeDocValuesField createDocValueField(String fieldName, 
Field[] indexableFields) {
+    ArrayList<ShapeField.DecodedTriangle> tess = new 
ArrayList<>(indexableFields.length);
+    final byte[] scratch = new byte[7 * Integer.BYTES];
+    for (Field f : indexableFields) {
+      BytesRef br = f.binaryValue();
+      assert br.length == 7 * ShapeField.BYTES;
+      System.arraycopy(br.bytes, br.offset, scratch, 0, 7 * ShapeField.BYTES);
+      ShapeField.DecodedTriangle t = new ShapeField.DecodedTriangle();
+      ShapeField.decodeTriangle(scratch, t);
+      tess.add(t);
+    }
+    return new ShapeDocValuesField(fieldName, tess);
+  }
+
+  /** Returns the number of terms (tessellated triangles) for this shape */
+  public int numberOfTerms() {
+    return shapeComparator.numberOfTerms();
+  }
+
+  /** Creates a geometry query for shape docvalues */
+  public static Query newGeometryQuery(
+      final String field, final QueryRelation relation, Object... geometries) {
+    return null;
+    // TODO
+    //  return new ShapeDocValuesQuery(field, relation, geometries);
+  }
+
+  /** Compute the spatial relation of this shape and a bounding box (in 
encoded space) */
+  public Relation relate(final int minX, final int maxX, final int minY, final 
int maxY)
+      throws IOException {
+    return shapeComparator.relate(minX, maxX, minY, maxY);
+  }
+
+  /** returns the min x value for the shape's bounding box */
+  public int getMinX() {
+    return shapeComparator.getMinX();
+  }
+
+  /** returns the min y value for the shape's bounding box */
+  public int getMinY() {
+    return shapeComparator.getMinY();
+  }
+
+  /** returns the max x value for the shape's bounding box */
+  public int getMaxX() {
+    return shapeComparator.getMaxX();
+  }
+
+  /** returns the max y value for the shape's bounding box */
+  public int getMaxY() {
+    return shapeComparator.getMaxY();
+  }
+
+  /** Retrieves the x centroid location for the geometry(s) */
+  public int getCentroidX() {
+    return shapeComparator.getCentroidX();
+  }
+
+  /** Retrieves the y centroid location for the geometry(s) */
+  public int getCentroidY() {
+    return shapeComparator.getCentroidY();
+  }
+
+  /**
+   * Retrieves the highest dimensional type (POINT, LINE, TRIANGLE) for 
computing the geometry(s)
+   * centroid
+   */
+  public TYPE getHighestDimensionType() {
+    return shapeComparator.getHighestDimension();
+  }
+
+  private BytesRef computeBinaryValue(List<ShapeField.DecodedTriangle> 
tessellation) {
+    try {
+      // dfs order serialization
+      List<TreeNode> dfsSerialized = new ArrayList<>(tessellation.size());
+      buildTree(tessellation, dfsSerialized);
+      Writer w = new Writer(dfsSerialized);
+      return w.getBytesRef();
+    } catch (IOException e) {
+      throw new RuntimeException("Internal error building 
LatLonShapeDocValues. Got ", e);
+    }
+  }
+
+  /** main entry point to build the tessellation tree * */
+  public TreeNode buildTree(
+      List<ShapeField.DecodedTriangle> tessellation, List<TreeNode> 
dfsSerialized)
+      throws IOException {
+    if (tessellation.size() == 1) {
+      ShapeField.DecodedTriangle t = tessellation.get(0);
+      TreeNode node = new TreeNode(t);
+      if (t.type == TYPE.LINE) {
+        node.midX /= node.length;
+        node.midY /= node.length;
+      } else if (t.type == TYPE.TRIANGLE) {
+        node.midX /= node.signedArea;
+        node.midY /= node.signedArea;
+      }
+      node.highestType = t.type;
+      dfsSerialized.add(node);
+      return node;
+    }
+    TreeNode[] triangles = new TreeNode[tessellation.size()];
+    int i = 0;
+    int minY = Integer.MAX_VALUE;
+    int minX = Integer.MAX_VALUE;
+    int maxY = Integer.MIN_VALUE;
+    int maxX = Integer.MIN_VALUE;
+
+    // running stats for computing centroid
+    double totalSignedArea = 0;
+    double totalLength = 0;
+    double numXPnt = 0;
+    double numYPnt = 0;
+    double numXLin = 0;
+    double numYLin = 0;
+    double numXPly = 0;
+    double numYPly = 0;
+    TYPE highestType = TYPE.POINT;
+
+    for (ShapeField.DecodedTriangle t : tessellation) {
+      TreeNode node = new TreeNode(t);
+      triangles[i++] = node;
+      // compute the bbox values up front
+      minY = Math.min(minY, node.minY);
+      minX = Math.min(minX, node.minX);
+      maxY = Math.max(maxY, node.maxY);
+      maxX = Math.max(maxX, node.maxX);
+
+      // compute the running centroid stats
+      totalSignedArea += node.signedArea; // non-zero if any components are 
triangles
+      totalLength += node.length; // non-zero if any components are line 
segments
+      if (t.type == TYPE.POINT) {
+        numXPnt += node.midX;
+        numYPnt += node.midY;
+      } else if (t.type == TYPE.LINE) {
+        if (highestType == TYPE.POINT) {
+          highestType = TYPE.LINE;
+        }
+        numXLin += node.midX;
+        numYLin += node.midY;
+      } else {
+        if (highestType != TYPE.TRIANGLE) {
+          highestType = TYPE.TRIANGLE;
+        }
+        numXPly += node.midX;
+        numYPly += node.midY;
+      }
+    }
+    TreeNode root = createTree(triangles, 0, triangles.length - 1, false, 
null, dfsSerialized);
+
+    // pull up min values for the root node so the bbox is consistent
+    root.minY = minY;
+    root.minX = minX;
+
+    // set the highest dimensional type
+    root.highestType = highestType;
+
+    // compute centroid values for the root node so the centroid is consistent

Review Comment:
   Finally had a moment to come back to this and the test doesn't appear 
genuine. It's misusing the `LatLonShape.createDocValueField` API in a way that 
tells me I need to strengthen the javadocs in this iteration. As you can see 
the Polygon method is: `  public static ShapeDocValuesField 
createDocValueField(String fieldName, Polygon polygon)` which does not take a 
multi polygon. This is because `ShapeDocValues` computes the centroid on a per 
polygon basis and does not yet support multi geometries (this is why the Multi 
tests are not yet wired up; that's a todo for a follow on PR). The test you're 
proposing is "solving" the multi doc value problem by munging the tessellation 
of the two geometries into a single doc value field that was designed for 
singular geometries.  I get why you're doing this, **Lucene does not support 
multi binary doc values**. But honestly, I disagree with munging the 
tessellations as a hack around this lucene limitation. Instead, I think we 
should improve lucen
 e by adding multi binary doc value support like we have for Numeric and Sorted 
Numeric; it would be a nice symmetry. This may take some time which is why we 
should strengthen the docs here and not hold up the PR for "perfection".



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org
For additional commands, e-mail: issues-h...@lucene.apache.org

Reply via email to