xiangfu0 commented on code in PR #18446:
URL: https://github.com/apache/pinot/pull/18446#discussion_r3211713083


##########
pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/partition/PartitionFunctionFactory.java:
##########
@@ -18,86 +18,154 @@
  */
 package org.apache.pinot.segment.spi.partition;
 
+import com.google.common.base.Preconditions;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.Modifier;
+import java.util.Collections;
 import java.util.HashMap;
+import java.util.Locale;
 import java.util.Map;
+import java.util.Set;
 import javax.annotation.Nullable;
 import org.apache.pinot.segment.spi.partition.metadata.ColumnPartitionMetadata;
+import org.apache.pinot.spi.annotations.PartitionFunctionType;
 import org.apache.pinot.spi.config.table.ColumnPartitionConfig;
-
-
-/**
- * Factory to build instances of {@link PartitionFunction}.
- */
+import org.apache.pinot.spi.utils.PinotReflectionUtils;
+import org.reflections.Reflections;
+import org.reflections.scanners.SubTypesScanner;
+import org.reflections.util.ClasspathHelper;
+import org.reflections.util.ConfigurationBuilder;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/// Dynamic registry for [PartitionFunction] implementations.
+///
+/// Discovery walks every public, concrete [PartitionFunction] subtype on the 
classpath under the
+/// `org.apache.pinot.*` package tree, then registers each under its canonical 
name(s):
+///
+/// - If the class is annotated with [PartitionFunctionType] (and `enabled()` 
is true), the
+///   annotation's `names()` are used. Multiple aliases register the same 
constructor under each
+///   name (e.g. `Murmur` and `Murmur2` for `MurmurPartitionFunction`).
+/// - Otherwise, the registry probes [PartitionFunction#getName()] by 
instantiating the class with
+///   `(numPartitions=1, functionConfig=null)` and registers under the value 
returned. This lets
+///   existing partition functions work without adding the annotation.
+///
+/// Each registrable class must be public, concrete, and expose a public 
constructor with signature
+/// `(int numPartitions, Map<String, String> functionConfig)` — the 
constructor the registry calls
+/// when [#getPartitionFunction(String, int, Map)] is invoked.
+///
+/// The static block scans the classpath once and builds an immutable 
(canonicalized name →
+/// constructor) map. To force eager initialization (e.g. so the scan happens 
before the first
+/// segment is read), call [#init()] from broker / server / controller startup.
 public class PartitionFunctionFactory {
-  // Enum for various partition functions to be added.
-  public enum PartitionFunctionType {
-    Modulo, Murmur, Murmur2, Murmur3, Fnv, ByteArray, HashCode, 
BoundedColumnValue;
-    // Add more functions here.
+  private PartitionFunctionFactory() {
+  }
+
+  private static final Logger LOGGER = 
LoggerFactory.getLogger(PartitionFunctionFactory.class);
+  private static final String SCAN_PACKAGE = "org.apache.pinot";
 
-    private static final Map<String, PartitionFunctionType> VALUE_MAP = new 
HashMap<>();
+  private static final Map<String, Constructor<? extends PartitionFunction>> 
REGISTRY;
 
-    static {
-      for (PartitionFunctionType functionType : 
PartitionFunctionType.values()) {
-        VALUE_MAP.put(functionType.name().toLowerCase(), functionType);
+  static {
+    long startTimeMs = System.currentTimeMillis();
+    Map<String, Constructor<? extends PartitionFunction>> registry = new 
HashMap<>();
+    Set<Class<? extends PartitionFunction>> subtypes = scanSubtypes();
+    for (Class<? extends PartitionFunction> clazz : subtypes) {
+      int mods = clazz.getModifiers();
+      if (!Modifier.isPublic(mods) || Modifier.isAbstract(mods) || 
clazz.isInterface()) {
+        continue;
+      }
+      PartitionFunctionType annotation = 
clazz.getAnnotation(PartitionFunctionType.class);
+      if (annotation != null && !annotation.enabled()) {
+        continue;
+      }
+      Constructor<? extends PartitionFunction> constructor;
+      try {
+        constructor = clazz.getConstructor(int.class, Map.class);
+      } catch (NoSuchMethodException e) {
+        LOGGER.warn("Skipping {}: missing public constructor (int, Map<String, 
String>)", clazz.getName());
+        continue;
+      }
+      String[] names = resolveNames(clazz, annotation, constructor);
+      if (names == null) {
+        continue;
+      }
+      for (String name : names) {
+        String canonical = canonicalize(name);
+        Constructor<? extends PartitionFunction> existing = 
registry.put(canonical, constructor);
+        Preconditions.checkState(existing == null || 
existing.getDeclaringClass().equals(clazz),
+            "Partition function name '%s' is registered to both %s and %s", 
name,
+            existing == null ? null : existing.getDeclaringClass().getName(), 
clazz.getName());
       }

Review Comment:
   Done in 182d8d1 — `resolveNames()` now trims each annotation entry, drops 
blanks, and falls back to `getName()` (with a warning log) when all declared 
aliases are blank. Prevents an empty canonical name from poisoning the registry.



##########
pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/partition/PartitionIdNormalizer.java:
##########
@@ -0,0 +1,112 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.spi.partition;
+
+import com.google.common.base.Preconditions;
+import java.util.Locale;
+
+
+/// Maps a raw signed integer hash output to a non-negative partition id in 
`[0, numPartitions)`.
+///
+/// [PartitionFunction] implementations apply the configured normalizer in 
their
+/// `getPartition(...)` body and report it via 
[PartitionFunction#getPartitionIdNormalizer()].
+/// The framework also uses the reported value for identity / staleness 
matching between
+/// config-side and segment-side partition metadata.
+public enum PartitionIdNormalizer {
+  /// Compute the remainder, then shift negative remainders into the valid 
range with `+ numPartitions`.
+  POSITIVE_MODULO {
+    @Override
+    int toPartitionId(int value, int numPartitions) {
+      int partition = value % numPartitions;
+      return partition < 0 ? partition + numPartitions : partition;
+    }
+
+    @Override
+    int toPartitionId(long value, int numPartitions) {
+      long partition = value % numPartitions;
+      return (int) (partition < 0 ? partition + numPartitions : partition);
+    }
+  },
+  /// Compute the remainder, then take its absolute value.
+  ABS {
+    @Override
+    int toPartitionId(int value, int numPartitions) {
+      int partition = value % numPartitions;
+      return partition < 0 ? -partition : partition;
+    }
+
+    @Override
+    int toPartitionId(long value, int numPartitions) {
+      long partition = value % numPartitions;
+      return (int) (partition < 0 ? -partition : partition);
+    }
+  },
+  /// Mask the sign bit before applying modulo.
+  MASK {
+    @Override
+    int toPartitionId(int value, int numPartitions) {
+      return (value & Integer.MAX_VALUE) % numPartitions;
+    }
+
+    @Override
+    int toPartitionId(long value, int numPartitions) {
+      return (int) ((value & Long.MAX_VALUE) % numPartitions);
+    }
+  },
+  /// Pre-modulo abs (Kafka-style) `abs(value) % numPartitions` that handles 
`Integer.MIN_VALUE -> 0`
+  /// (and `Long.MIN_VALUE -> 0`) to avoid the `Math.abs` overflow corner. 
Matches the
+  /// legacy semantics of `HashCodePartitionFunction` and 
`ByteArrayPartitionFunction`.
+  PRE_MODULO_ABS {
+    @Override
+    int toPartitionId(int value, int numPartitions) {
+      int abs = (value == Integer.MIN_VALUE) ? 0 : Math.abs(value);
+      return abs % numPartitions;
+    }
+
+    @Override
+    int toPartitionId(long value, int numPartitions) {
+      long abs = (value == Long.MIN_VALUE) ? 0L : Math.abs(value);
+      return (int) (abs % numPartitions);
+    }
+  };

Review Comment:
   Good catch — PR description has been updated to use `PRE_MODULO_ABS` 
consistently throughout. The enum constant and config-string value are both 
`PRE_MODULO_ABS`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to