iverase commented on code in PR #1017:
URL: https://github.com/apache/lucene/pull/1017#discussion_r927333254


##########
lucene/core/src/java/org/apache/lucene/document/ShapeDocValuesField.java:
##########
@@ -0,0 +1,896 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.document;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.List;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.document.ShapeField.DecodedTriangle.TYPE;
+import org.apache.lucene.document.ShapeField.QueryRelation;
+import org.apache.lucene.document.SpatialQuery.EncodedRectangle;
+import org.apache.lucene.index.DocValuesType;
+import org.apache.lucene.index.IndexableFieldType;
+import org.apache.lucene.index.PointValues.Relation;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.store.ByteBuffersDataOutput;
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+
+/** A doc values field representation for {@link LatLonShape} and {@link 
XYShape} */
+public final class ShapeDocValuesField extends Field {
+  private final ShapeComparator shapeComparator;
+
+  private static final FieldType FIELD_TYPE = new FieldType();
+
+  static {
+    FIELD_TYPE.setDocValuesType(DocValuesType.BINARY);
+    FIELD_TYPE.setOmitNorms(true);
+    FIELD_TYPE.freeze();
+  }
+
+  /**
+   * Creates a {@ShapeDocValueField} instance from a shape tessellation
+   *
+   * @param name The Field Name (must not be null)
+   * @param tessellation The tessellation (must not be null)
+   */
+  ShapeDocValuesField(String name, List<ShapeField.DecodedTriangle> 
tessellation) {
+    super(name, FIELD_TYPE);
+    BytesRef b = computeBinaryValue(tessellation);
+    this.fieldsData = b;
+    try {
+      this.shapeComparator = new ShapeComparator(b);
+    } catch (IOException e) {
+      throw new IllegalArgumentException("unable to read binary shape doc 
value field. ", e);
+    }
+  }
+
+  /** Creates a {@code ShapeDocValue} field from a given serialized value */
+  ShapeDocValuesField(String name, BytesRef binaryValue) {
+    super(name, FIELD_TYPE);
+    this.fieldsData = binaryValue;
+    try {
+      this.shapeComparator = new ShapeComparator(binaryValue);
+    } catch (IOException e) {
+      throw new IllegalArgumentException("unable to read binary shape doc 
value field. ", e);
+    }
+  }
+
+  /** The name of the field */
+  @Override
+  public String name() {
+    return name;
+  }
+
+  /** Gets the {@code IndexableFieldType} for this ShapeDocValue field */
+  @Override
+  public IndexableFieldType fieldType() {
+    return FIELD_TYPE;
+  }
+
+  /** Currently there is no string representation for the ShapeDocValueField */
+  @Override
+  public String stringValue() {
+    return null;
+  }
+
+  /** TokenStreams are not yet supported */
+  @Override
+  public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) {
+    return null;
+  }
+
+  /** create a shape docvalue field from indexable fields */
+  public static ShapeDocValuesField createDocValueField(String fieldName, 
Field[] indexableFields) {
+    ArrayList<ShapeField.DecodedTriangle> tess = new 
ArrayList<>(indexableFields.length);
+    final byte[] scratch = new byte[7 * Integer.BYTES];
+    for (Field f : indexableFields) {
+      BytesRef br = f.binaryValue();
+      assert br.length == 7 * ShapeField.BYTES;
+      System.arraycopy(br.bytes, br.offset, scratch, 0, 7 * ShapeField.BYTES);
+      ShapeField.DecodedTriangle t = new ShapeField.DecodedTriangle();
+      ShapeField.decodeTriangle(scratch, t);
+      tess.add(t);
+    }
+    return new ShapeDocValuesField(fieldName, tess);
+  }
+
+  /** Returns the number of terms (tessellated triangles) for this shape */
+  public int numberOfTerms() {
+    return shapeComparator.numberOfTerms();
+  }
+
+  /** Creates a geometry query for shape docvalues */
+  public static Query newGeometryQuery(
+      final String field, final QueryRelation relation, Object... geometries) {
+    return null;
+    // TODO
+    //  return new ShapeDocValuesQuery(field, relation, geometries);
+  }
+
+  /** Compute the spatial relation of this shape and a bounding box (in 
encoded space) */
+  public Relation relate(final int minX, final int maxX, final int minY, final 
int maxY)
+      throws IOException {
+    return shapeComparator.relate(minX, maxX, minY, maxY);
+  }
+
+  /** returns the min x value for the shape's bounding box */
+  public int getMinX() {
+    return shapeComparator.getMinX();
+  }
+
+  /** returns the min y value for the shape's bounding box */
+  public int getMinY() {
+    return shapeComparator.getMinY();
+  }
+
+  /** returns the max x value for the shape's bounding box */
+  public int getMaxX() {
+    return shapeComparator.getMaxX();
+  }
+
+  /** returns the max y value for the shape's bounding box */
+  public int getMaxY() {
+    return shapeComparator.getMaxY();
+  }
+
+  /** Retrieves the x centroid location for the geometry(s) */
+  public int getCentroidX() {
+    return shapeComparator.getCentroidX();
+  }
+
+  /** Retrieves the y centroid location for the geometry(s) */
+  public int getCentroidY() {
+    return shapeComparator.getCentroidY();
+  }
+
+  /**
+   * Retrieves the highest dimensional type (POINT, LINE, TRIANGLE) for 
computing the geometry(s)
+   * centroid
+   */
+  public TYPE getHighestDimensionType() {
+    return shapeComparator.getHighestDimension();
+  }
+
+  private BytesRef computeBinaryValue(List<ShapeField.DecodedTriangle> 
tessellation) {
+    try {
+      // dfs order serialization
+      List<TreeNode> dfsSerialized = new ArrayList<>(tessellation.size());
+      buildTree(tessellation, dfsSerialized);
+      Writer w = new Writer(dfsSerialized);
+      return w.getBytesRef();
+    } catch (IOException e) {
+      throw new RuntimeException("Internal error building 
LatLonShapeDocValues. Got ", e);
+    }
+  }
+
+  /** main entry point to build the tessellation tree * */
+  public TreeNode buildTree(
+      List<ShapeField.DecodedTriangle> tessellation, List<TreeNode> 
dfsSerialized)
+      throws IOException {
+    if (tessellation.size() == 1) {
+      ShapeField.DecodedTriangle t = tessellation.get(0);
+      TreeNode node = new TreeNode(t);
+      if (t.type == TYPE.LINE) {
+        node.midX /= node.length;
+        node.midY /= node.length;
+      } else if (t.type == TYPE.TRIANGLE) {
+        node.midX /= node.signedArea;
+        node.midY /= node.signedArea;
+      }
+      node.highestType = t.type;
+      dfsSerialized.add(node);
+      return node;
+    }
+    TreeNode[] triangles = new TreeNode[tessellation.size()];
+    int i = 0;
+    int minY = Integer.MAX_VALUE;
+    int minX = Integer.MAX_VALUE;
+    int maxY = Integer.MIN_VALUE;
+    int maxX = Integer.MIN_VALUE;
+
+    // running stats for computing centroid
+    double totalSignedArea = 0;
+    double totalLength = 0;
+    double numXPnt = 0;
+    double numYPnt = 0;
+    double numXLin = 0;
+    double numYLin = 0;
+    double numXPly = 0;
+    double numYPly = 0;
+    TYPE highestType = TYPE.POINT;
+
+    for (ShapeField.DecodedTriangle t : tessellation) {
+      TreeNode node = new TreeNode(t);
+      triangles[i++] = node;
+      // compute the bbox values up front
+      minY = Math.min(minY, node.minY);
+      minX = Math.min(minX, node.minX);
+      maxY = Math.max(maxY, node.maxY);
+      maxX = Math.max(maxX, node.maxX);
+
+      // compute the running centroid stats
+      totalSignedArea += node.signedArea; // non-zero if any components are 
triangles
+      totalLength += node.length; // non-zero if any components are line 
segments
+      if (t.type == TYPE.POINT) {
+        numXPnt += node.midX;
+        numYPnt += node.midY;
+      } else if (t.type == TYPE.LINE) {
+        if (highestType == TYPE.POINT) {
+          highestType = TYPE.LINE;
+        }
+        numXLin += node.midX;
+        numYLin += node.midY;
+      } else {
+        if (highestType != TYPE.TRIANGLE) {
+          highestType = TYPE.TRIANGLE;
+        }
+        numXPly += node.midX;
+        numYPly += node.midY;
+      }
+    }
+    TreeNode root = createTree(triangles, 0, triangles.length - 1, false, 
null, dfsSerialized);
+
+    // pull up min values for the root node so the bbox is consistent
+    root.minY = minY;
+    root.minX = minX;
+
+    // set the highest dimensional type
+    root.highestType = highestType;
+
+    // compute centroid values for the root node so the centroid is consistent

Review Comment:
   Thanks @nknize for the references. I have no time to look in detail into the 
algorithms so to keep me sane I just try to compute the centroid using an 
ubiquitous library JTS (which as well 
[refers](https://github.com/locationtech/jts/blob/master/modules/core/src/main/java/org/locationtech/jts/algorithm/Centroid.java)
 to the documentation you pointed above) to compute the centroid of the polygon 
I gave as example:
   
   ```
           String mp = "MULTIPOLYGON(((-80 -10, -40 -10, -40 10, -80 10, -80 
-10)),((10 -1, 11 -1, 11 1, 10 1, 10 -1)))";
           WKTReader wktReader = new WKTReader();
           Coordinate c = Centroid.getCentroid(wktReader.read(mp));
           String wkt = "POINT(" + c.getX() + " " + c.getY() + ")";
   ```
   
   And the result is:
   
   ```
   POINT(-59.82418952618454 -0.0)
   ```
   
   Which is where I was expecting the centroid to be located:
   
   <img width="1014" alt="image" 
src="https://user-images.githubusercontent.com/29038686/180377042-6ff46b36-0fae-434d-8742-82088e943320.png";>
   
   Then I wrote a test using your implementation:
   
   ```
     public void testLatLonMultiPolygonCentroid() throws Exception {
       String mp = "MULTIPOLYGON(((-80 -10, -40 -10, -40 10, -80 10, -80 
-10)),((10 -1, 11 -1, 11 1, 10 1, 10 -1)))";
       Polygon[] p = (Polygon[]) SimpleWKTShapeParser.parse(mp);
       List<ShapeField.DecodedTriangle> tess = getTessellation(p[0]);
       tess.addAll(getTessellation(p[1]));
       ShapeDocValuesField dvField = 
LatLonShape.createDocValueField(FIELD_NAME, tess);
       assertEquals(0d, 
GeoEncodingUtils.decodeLatitude(dvField.getCentroidY()), 1E-8);
       // POINT(-59.82418952618454 0.0)
       assertEquals(-59.82418952618454 , 
GeoEncodingUtils.decodeLongitude(dvField.getCentroidX()), 1E-8);
       assertEquals(ShapeField.DecodedTriangle.TYPE.TRIANGLE, 
dvField.getHighestDimensionType());
     }
   ```
   
   And the test fails:
   
   ```
   Expected :-59.82418952618454
   Actual   :4.51269194483757
   ```
   
   So the centroid is way off the expected position, could you check if I am 
doing something wrong?
   
   <img width="966" alt="image" 
src="https://user-images.githubusercontent.com/29038686/180377577-3e589aed-dfa2-4172-8a97-aa9acc5eb4ee.png";>
   
   
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org
For additional commands, e-mail: issues-h...@lucene.apache.org

Reply via email to