(pinot) branch master updated: Revert "Make ingestion offset delay metric configurable (#14074)" (#14127)

2024-10-01 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new 56ccbc3d5f Revert "Make ingestion offset delay metric configurable 
(#14074)" (#14127)
56ccbc3d5f is described below

commit 56ccbc3d5f04f0d62041c32d314b814c1b7cab4f
Author: Kartik Khare 
AuthorDate: Tue Oct 1 16:39:47 2024 +0530

Revert "Make ingestion offset delay metric configurable (#14074)" (#14127)

This reverts commit bba61eef14a49e7ed7a5c4e73c640c12b916a5d6.

Co-authored-by: Kartik Khare 

---
 .../manager/realtime/IngestionDelayTracker.java| 153 -
 .../realtime/RealtimeSegmentDataManager.java   |   5 +-
 .../manager/realtime/RealtimeTableDataManager.java |   4 +-
 .../realtime/IngestionDelayTrackerTest.java|  13 +-
 4 files changed, 39 insertions(+), 136 deletions(-)

diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/IngestionDelayTracker.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/IngestionDelayTracker.java
index 658b54c1b3..fd31d8f72b 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/IngestionDelayTracker.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/IngestionDelayTracker.java
@@ -19,7 +19,6 @@
 package org.apache.pinot.core.data.manager.realtime;
 
 import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Preconditions;
 import com.google.common.cache.Cache;
 import com.google.common.cache.CacheBuilder;
 import java.time.Clock;
@@ -38,14 +37,15 @@ import javax.annotation.Nullable;
 import org.apache.pinot.common.metrics.ServerGauge;
 import org.apache.pinot.common.metrics.ServerMetrics;
 import org.apache.pinot.common.utils.LLCSegmentName;
-import org.apache.pinot.spi.env.PinotConfiguration;
 import org.apache.pinot.spi.stream.LongMsgOffset;
 import org.apache.pinot.spi.stream.RowMetadata;
+import org.apache.pinot.spi.stream.StreamMetadataProvider;
 import org.apache.pinot.spi.stream.StreamPartitionMsgOffset;
 import org.apache.pinot.spi.utils.builder.TableNameBuilder;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+
 /**
  * A Class to track realtime ingestion delay for table partitions on a given 
server.
  * Highlights:
@@ -83,36 +83,22 @@ import org.slf4j.LoggerFactory;
  *
  * TODO: handle bug situations like the one where a partition is not allocated 
to a given server due to a bug.
  */
+
 public class IngestionDelayTracker {
 
   private static class IngestionInfo {
-volatile Long _ingestionTimeMs;
-volatile Long _firstStreamIngestionTimeMs;
-volatile StreamPartitionMsgOffset _currentOffset;
-volatile StreamPartitionMsgOffset _latestOffset;
-final Supplier _latestOffsetFetcher;
-
-IngestionInfo(@Nullable Long ingestionTimeMs, @Nullable Long 
firstStreamIngestionTimeMs,
-@Nullable StreamPartitionMsgOffset currentOffset,
-@Nullable Supplier latestOffsetFetcher) {
+final long _ingestionTimeMs;
+final long _firstStreamIngestionTimeMs;
+final StreamPartitionMsgOffset _currentOffset;
+final StreamPartitionMsgOffset _latestOffset;
+
+IngestionInfo(long ingestionTimeMs, long firstStreamIngestionTimeMs,
+@Nullable StreamPartitionMsgOffset currentOffset, @Nullable 
StreamPartitionMsgOffset latestOffset) {
   _ingestionTimeMs = ingestionTimeMs;
   _firstStreamIngestionTimeMs = firstStreamIngestionTimeMs;
   _currentOffset = currentOffset;
-  _latestOffsetFetcher = latestOffsetFetcher;
-}
-
-void updateCurrentOffset(StreamPartitionMsgOffset currentOffset) {
-  _currentOffset = currentOffset;
-}
-
-void updateLatestOffset(StreamPartitionMsgOffset latestOffset) {
   _latestOffset = latestOffset;
 }
-
-void updateIngestionTimes(long ingestionTimeMs, long 
firstStreamIngestionTimeMs) {
-  _ingestionTimeMs = ingestionTimeMs;
-  _firstStreamIngestionTimeMs = firstStreamIngestionTimeMs;
-}
   }
 
   private static final Logger LOGGER = 
LoggerFactory.getLogger(IngestionDelayTracker.class);
@@ -126,13 +112,6 @@ public class IngestionDelayTracker {
 
   // Cache expire time for ignored segment if there is no update from the 
segment.
   private static final int IGNORED_SEGMENT_CACHE_TIME_MINUTES = 10;
-  public static final String OFFSET_LAG_TRACKING_ENABLE_CONFIG_KEY = 
"offset.lag.tracking.enable";
-  public static final String OFFSET_LAG_TRACKING_UPDATE_INTERVAL_CONFIG_KEY = 
"offset.lag.tracking.update.interval";
-
-  // Since offset lag metric does a call to Kafka, we want to make sure we 
don't do it too frequently.
-  public static final boolean DEFAULT_ENABLE_OFFSET_LAG_METRIC = true;
-  public static final long DEFAULT_OFFSET_LAG_UPDATE_INTER

(pinot) branch master updated (7668b212bb -> bba61eef14)

2024-09-26 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


from 7668b212bb Handling null cases in sum,min,max series builders (#14084)
 add bba61eef14 Make ingestion offset delay metric configurable (#14074)

No new revisions were added by this update.

Summary of changes:
 .../manager/realtime/IngestionDelayTracker.java| 153 +
 .../realtime/RealtimeSegmentDataManager.java   |   5 +-
 .../manager/realtime/RealtimeTableDataManager.java |   4 +-
 .../realtime/IngestionDelayTrackerTest.java|  13 +-
 4 files changed, 136 insertions(+), 39 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org



(pinot) branch master updated: Remove `recreateDeletedConsumingSegment` flag from RealtimeSegmentValidationManager (#14024)

2024-09-19 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new 2de61de8bc Remove `recreateDeletedConsumingSegment` flag from 
RealtimeSegmentValidationManager (#14024)
2de61de8bc is described below

commit 2de61de8bcf4c5befce3404543f25025cbbf7cbd
Author: Shounak kulkarni 
AuthorDate: Thu Sep 19 18:44:55 2024 +0530

Remove `recreateDeletedConsumingSegment` flag from 
RealtimeSegmentValidationManager (#14024)

* Remove recreateDeletedConsumingSegment flag

In favour of always recreating deleted consuming segments if table is not 
paused.

* handle resumption upon storage quota getting freed up
---
 .../realtime/PinotLLCRealtimeSegmentManager.java   | 14 ++---
 .../RealtimeSegmentValidationManager.java  | 63 ++
 .../PinotLLCRealtimeSegmentManagerTest.java|  5 +-
 3 files changed, 35 insertions(+), 47 deletions(-)

diff --git 
a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
 
b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
index d799000ed3..7a459d7ddb 100644
--- 
a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
+++ 
b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
@@ -912,11 +912,9 @@ public class PinotLLCRealtimeSegmentManager {
* Check whether there are segments in the PROPERTYSTORE with status DONE, 
but no new segment in status
* IN_PROGRESS, and the state for the latest segment in the IDEALSTATE is 
ONLINE.
* If so, it should create a new CONSUMING segment for the partition.
-   * (this operation is done only if @param recreateDeletedConsumingSegment is 
set to true,
-   * which means it's manually triggered by admin not by automatic periodic 
task)
*/
   public void ensureAllPartitionsConsuming(TableConfig tableConfig, 
StreamConfig streamConfig,
-  boolean recreateDeletedConsumingSegment, OffsetCriteria offsetCriteria) {
+  OffsetCriteria offsetCriteria) {
 Preconditions.checkState(!_isStopping, "Segment manager is stopping");
 
 String realtimeTableName = tableConfig.getTableName();
@@ -938,7 +936,7 @@ public class PinotLLCRealtimeSegmentManager {
 getNewPartitionGroupMetadataList(streamConfig, 
currentPartitionGroupConsumptionStatusList);
 streamConfig.setOffsetCriteria(originalOffsetCriteria);
 return ensureAllPartitionsConsuming(tableConfig, streamConfig, 
idealState, newPartitionGroupMetadataList,
-recreateDeletedConsumingSegment, offsetCriteria);
+offsetCriteria);
   } else {
 LOGGER.info("Skipping LLC segments validation for table: {}, 
isTableEnabled: {}, isTablePaused: {}",
 realtimeTableName, isTableEnabled, isTablePaused);
@@ -1158,8 +1156,7 @@ public class PinotLLCRealtimeSegmentManager {
*/
   @VisibleForTesting
   IdealState ensureAllPartitionsConsuming(TableConfig tableConfig, 
StreamConfig streamConfig, IdealState idealState,
-  List newPartitionGroupMetadataList, boolean 
recreateDeletedConsumingSegment,
-  OffsetCriteria offsetCriteria) {
+  List newPartitionGroupMetadataList, 
OffsetCriteria offsetCriteria) {
 String realtimeTableName = tableConfig.getTableName();
 
 InstancePartitions instancePartitions = 
getConsumingInstancePartitions(tableConfig);
@@ -1275,7 +1272,7 @@ public class PinotLLCRealtimeSegmentManager {
 instancePartitionsMap, startOffset);
   } else {
 if (newPartitionGroupSet.contains(partitionGroupId)) {
-  if (recreateDeletedConsumingSegment && 
latestSegmentZKMetadata.getStatus().isCompleted()
+  if (latestSegmentZKMetadata.getStatus().isCompleted()
   && isAllInstancesInState(instanceStateMap, 
SegmentStateModel.ONLINE)) {
 // If we get here, that means in IdealState, the latest 
segment has all replicas ONLINE.
 // Create a new IN_PROGRESS segment in PROPERTYSTORE,
@@ -1737,7 +1734,6 @@ public class PinotLLCRealtimeSegmentManager {
 
 // trigger realtime segment validation job to resume consumption
 Map taskProperties = new HashMap<>();
-
taskProperties.put(RealtimeSegmentValidationManager.RECREATE_DELETED_CONSUMING_SEGMENT_KEY,
 "true");
 if (offsetCriteria != null) {
   taskProperties.put(RealtimeSegmentValidationManager.OFFSET_CRITERIA, 
offsetCriteria);
 }
@@ -1749,7 +1745,7 @@ public class PinotLLCRealtimeSegmentManager {
 + "endpoint in a few moments to double check.", new 
Timestamp(Sy

(pinot) branch master updated: Storage Quota imposition on Realtime tables (#13584)

2024-09-18 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new e9271f6dd7 Storage Quota imposition on Realtime tables (#13584)
e9271f6dd7 is described below

commit e9271f6dd73cb1162e9743b14e61fd420dbf3e27
Author: Shounak kulkarni 
AuthorDate: Wed Sep 18 17:52:46 2024 +0530

Storage Quota imposition on Realtime tables (#13584)

* Storage quota imposition on realtime tables

* fix test mock

* Handle IS update failure case

* refactor

* tests

* move quota checker to PinotLLCRealtimeSegmentManager

* test fix

* Add tableStorageQuotaExceeded gauge on controller

* refactor to use PauseState

* revert TABLE_STORAGE_QUOTA_EXCEEDED metric

* cosmetic

* refactors

* check consuming segments only when table is not paused

* refactor
---
 .../pinot/controller/BaseControllerStarter.java|  4 +-
 .../PinotSegmentUploadDownloadRestletResource.java |  2 +-
 .../api/upload/SegmentValidationUtils.java |  9 +---
 .../realtime/PinotLLCRealtimeSegmentManager.java   |  3 +-
 .../RealtimeSegmentValidationManager.java  | 47 ++--
 .../controller/validation/StorageQuotaChecker.java | 34 ++--
 .../PinotLLCRealtimeSegmentManagerTest.java|  2 +-
 .../helix/core/realtime/SegmentCompletionTest.java |  6 +++
 .../validation/StorageQuotaCheckerTest.java| 63 +-
 .../apache/pinot/spi/config/table/PauseState.java  |  2 +-
 10 files changed, 138 insertions(+), 34 deletions(-)

diff --git 
a/pinot-controller/src/main/java/org/apache/pinot/controller/BaseControllerStarter.java
 
b/pinot-controller/src/main/java/org/apache/pinot/controller/BaseControllerStarter.java
index 5e4ff8751f..44c8e96f36 100644
--- 
a/pinot-controller/src/main/java/org/apache/pinot/controller/BaseControllerStarter.java
+++ 
b/pinot-controller/src/main/java/org/apache/pinot/controller/BaseControllerStarter.java
@@ -493,7 +493,7 @@ public abstract class BaseControllerStarter implements 
ServiceStartable {
 new TableSizeReader(_executorService, _connectionManager, 
_controllerMetrics, _helixResourceManager,
 _leadControllerManager);
 _storageQuotaChecker = new StorageQuotaChecker(_tableSizeReader, 
_controllerMetrics, _leadControllerManager,
-_helixResourceManager);
+_helixResourceManager, _config);
 
 // Setting up periodic tasks
 List controllerPeriodicTasks = 
setupControllerPeriodicTasks();
@@ -852,7 +852,7 @@ public abstract class BaseControllerStarter implements 
ServiceStartable {
 periodicTasks.add(_offlineSegmentIntervalChecker);
 _realtimeSegmentValidationManager =
 new RealtimeSegmentValidationManager(_config, _helixResourceManager, 
_leadControllerManager,
-_pinotLLCRealtimeSegmentManager, _validationMetrics, 
_controllerMetrics);
+_pinotLLCRealtimeSegmentManager, _validationMetrics, 
_controllerMetrics, _storageQuotaChecker);
 periodicTasks.add(_realtimeSegmentValidationManager);
 _brokerResourceValidationManager =
 new BrokerResourceValidationManager(_config, _helixResourceManager, 
_leadControllerManager, _controllerMetrics);
diff --git 
a/pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PinotSegmentUploadDownloadRestletResource.java
 
b/pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PinotSegmentUploadDownloadRestletResource.java
index 156a3e9095..5b7cbed00e 100644
--- 
a/pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PinotSegmentUploadDownloadRestletResource.java
+++ 
b/pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PinotSegmentUploadDownloadRestletResource.java
@@ -355,7 +355,7 @@ public class PinotSegmentUploadDownloadRestletResource {
 untarredSegmentSizeInBytes = FileUtils.sizeOfDirectory(tempSegmentDir);
   }
   SegmentValidationUtils.checkStorageQuota(segmentName, 
untarredSegmentSizeInBytes, tableConfig,
-  _controllerConf, _storageQuotaChecker);
+  _storageQuotaChecker);
 
   // Encrypt segment
   String crypterNameInTableConfig = 
tableConfig.getValidationConfig().getCrypterClassName();
diff --git 
a/pinot-controller/src/main/java/org/apache/pinot/controller/api/upload/SegmentValidationUtils.java
 
b/pinot-controller/src/main/java/org/apache/pinot/controller/api/upload/SegmentValidationUtils.java
index dff3dd3d11..ee6219876f 100644
--- 
a/pinot-controller/src/main/java/org/apache/pinot/controller/api/upload/SegmentValidationUtils.java
+++ 
b/pinot-controller/src/main/java/org/apache/pinot/controller/api/upload/SegmentValidationUtils.java
@@ -19,7 +19,6 @@
 package org.apache.pinot.controller.api.upload;
 
 import

(pinot) branch master updated: Flaky test fix. Query only 1 broker to test quota split (#13771)

2024-09-06 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new 6e8333ab24 Flaky test fix. Query only 1 broker to test quota split 
(#13771)
6e8333ab24 is described below

commit 6e8333ab241abcae3ef8a555e3c221b72c0523ed
Author: Shounak kulkarni 
AuthorDate: Fri Sep 6 19:07:17 2024 +0530

Flaky test fix. Query only 1 broker to test quota split (#13771)
---
 .../tests/QueryQuotaClusterIntegrationTest.java| 80 +++---
 1 file changed, 70 insertions(+), 10 deletions(-)

diff --git 
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/QueryQuotaClusterIntegrationTest.java
 
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/QueryQuotaClusterIntegrationTest.java
index d1fb956f2c..dfd9d39727 100644
--- 
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/QueryQuotaClusterIntegrationTest.java
+++ 
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/QueryQuotaClusterIntegrationTest.java
@@ -18,13 +18,18 @@
  */
 package org.apache.pinot.integration.tests;
 
+import com.fasterxml.jackson.databind.JsonNode;
+import java.io.IOException;
 import java.net.URI;
+import java.util.Iterator;
 import java.util.Properties;
 import org.apache.pinot.broker.broker.helix.BaseBrokerStarter;
 import 
org.apache.pinot.broker.queryquota.HelixExternalViewBasedQueryQuotaManagerTest;
+import org.apache.pinot.client.BrokerResponse;
 import org.apache.pinot.client.ConnectionFactory;
 import org.apache.pinot.client.JsonAsyncHttpPinotClientTransportFactory;
 import org.apache.pinot.client.PinotClientException;
+import org.apache.pinot.client.PinotClientTransport;
 import org.apache.pinot.client.ResultSetGroup;
 import org.apache.pinot.common.utils.http.HttpClient;
 import org.apache.pinot.spi.config.table.QuotaConfig;
@@ -46,6 +51,9 @@ import static org.testng.Assert.assertTrue;
  * tested as part of {@link HelixExternalViewBasedQueryQuotaManagerTest}
  */
 public class QueryQuotaClusterIntegrationTest extends 
BaseClusterIntegrationTest {
+  private PinotClientTransport _pinotClientTransport;
+  private String _brokerHostPort;
+
   @BeforeClass
   public void setUp()
   throws Exception {
@@ -56,6 +64,7 @@ public class QueryQuotaClusterIntegrationTest extends 
BaseClusterIntegrationTest
 startController();
 startBrokers(1);
 startServers(1);
+_brokerHostPort = LOCAL_HOST + ":" + _brokerPorts.get(0);
 
 // Create and upload the schema and table config
 Schema schema = createSchema();
@@ -65,9 +74,11 @@ public class QueryQuotaClusterIntegrationTest extends 
BaseClusterIntegrationTest
 
 Properties properties = new Properties();
 properties.put(FAIL_ON_EXCEPTIONS, "FALSE");
+_pinotClientTransport = new JsonAsyncHttpPinotClientTransportFactory()
+.withConnectionProperties(getPinotConnectionProperties())
+.buildTransport();
 _pinotConnection = ConnectionFactory.fromZookeeper(properties, getZkUrl() 
+ "/" + getHelixClusterName(),
-new 
JsonAsyncHttpPinotClientTransportFactory().withConnectionProperties(getPinotConnectionProperties())
-.buildTransport());
+_pinotClientTransport);
   }
 
   @AfterMethod
@@ -76,6 +87,8 @@ public class QueryQuotaClusterIntegrationTest extends 
BaseClusterIntegrationTest
 addQueryQuotaToClusterConfig(null);
 addQueryQuotaToDatabaseConfig(null);
 addQueryQuotaToTableConfig(null);
+_brokerHostPort = LOCAL_HOST + ":" + _brokerPorts.get(0);
+verifyQuotaUpdate(0);
   }
 
   @Test
@@ -125,12 +138,13 @@ public class QueryQuotaClusterIntegrationTest extends 
BaseClusterIntegrationTest
   addQueryQuotaToTableConfig(10);
   // Add one more broker such that quota gets distributed equally among 
them
   brokerStarter = startOneBroker(2);
-  // to allow change propagation to QueryQuotaManager
-  Thread.sleep(1000);
-  testQueryRate(10);
+  _brokerHostPort = LOCAL_HOST + ":" + brokerStarter.getPort();
+  // query only one broker across the divided quota
+  testQueryRateOnBroker(5);
   // drop table level quota so that database quota comes into effect
   addQueryQuotaToTableConfig(null);
-  testQueryRate(25);
+  // query only one broker across the divided quota
+  testQueryRateOnBroker(12.5f);
 } finally {
   if (brokerStarter != null) {
 brokerStarter.stop();
@@ -143,19 +157,29 @@ public class QueryQuotaClusterIntegrationTest extends 
BaseClusterIntegrationTest
* Then runs the query load with double the max rate and expects queries to 
fail due to quota breach.
* @param maxRate max rate allowed by the quota
*/
-  void testQueryRate(int maxRate)
+  void testQueryRate(float maxRate)
  

(pinot) branch master updated: Debug endpoints to fetch effective query quotas on broker (#13864)

2024-08-30 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new fc132c3f8a Debug endpoints to fetch effective query quotas on broker 
(#13864)
fc132c3f8a is described below

commit fc132c3f8ab4ee610bcdfe958d9675bd7bc4a164
Author: Shounak kulkarni 
AuthorDate: Fri Aug 30 19:26:04 2024 +0530

Debug endpoints to fetch effective query quotas on broker (#13864)
---
 .../broker/api/resources/PinotBrokerDebug.java | 28 ++
 .../broker/broker/BrokerAdminApiApplication.java   |  4 +++-
 .../broker/broker/helix/BaseBrokerStarter.java | 28 +++---
 .../HelixExternalViewBasedQueryQuotaManager.java   | 19 +++
 .../pinot/broker/queryquota/QueryQuotaManager.java | 14 +++
 ...elixExternalViewBasedQueryQuotaManagerTest.java | 21 ++--
 .../java/org/apache/pinot/core/auth/Actions.java   |  2 ++
 7 files changed, 82 insertions(+), 34 deletions(-)

diff --git 
a/pinot-broker/src/main/java/org/apache/pinot/broker/api/resources/PinotBrokerDebug.java
 
b/pinot-broker/src/main/java/org/apache/pinot/broker/api/resources/PinotBrokerDebug.java
index 5c8ce167f6..78a6dd324f 100644
--- 
a/pinot-broker/src/main/java/org/apache/pinot/broker/api/resources/PinotBrokerDebug.java
+++ 
b/pinot-broker/src/main/java/org/apache/pinot/broker/api/resources/PinotBrokerDebug.java
@@ -47,6 +47,7 @@ import javax.ws.rs.core.MediaType;
 import javax.ws.rs.core.Response;
 import org.apache.commons.lang3.tuple.Pair;
 import org.apache.pinot.broker.broker.AccessControlFactory;
+import org.apache.pinot.broker.queryquota.QueryQuotaManager;
 import org.apache.pinot.broker.routing.BrokerRoutingManager;
 import org.apache.pinot.common.request.BrokerRequest;
 import org.apache.pinot.common.utils.DatabaseUtils;
@@ -92,6 +93,9 @@ public class PinotBrokerDebug {
   @Inject
   private ServerRoutingStatsManager _serverRoutingStatsManager;
 
+  @Inject
+  private QueryQuotaManager _queryQuotaManager;
+
   @Inject
   AccessControlFactory _accessControlFactory;
 
@@ -295,4 +299,28 @@ public class PinotBrokerDebug {
 ThreadResourceUsageAccountant threadAccountant = 
Tracing.getThreadAccountant();
 return threadAccountant.getQueryResources().values();
   }
+
+  @GET
+  @Path("debug/tables/queryQuota/{tableName}")
+  @Produces(MediaType.TEXT_PLAIN)
+  @Authorize(targetType = TargetType.TABLE, paramName = "tableName", action = 
Actions.Table.GET_TABLE_QUERY_QUOTA)
+  @ApiOperation(value = "Get the active query quota being imposed on the 
table", notes = "This is a debug endpoint, "
+  + "and won't maintain backward compatibility")
+  public String getTableQueryQuota(
+  @ApiParam(value = "Name of the table with type") @PathParam("tableName") 
String tableName,
+  @Context HttpHeaders headers) {
+tableName = DatabaseUtils.translateTableName(tableName, headers);
+return String.valueOf(_queryQuotaManager.getTableQueryQuota(tableName));
+  }
+
+  @GET
+  @Path("debug/databases/queryQuota/{databaseName}")
+  @Produces(MediaType.TEXT_PLAIN)
+  @Authorize(targetType = TargetType.CLUSTER, action = 
Actions.Cluster.GET_DATABASE_QUERY_QUOTA)
+  @ApiOperation(value = "Get the active query quota being imposed on the 
database", notes = "This is a debug endpoint, "
+  + "and won't maintain backward compatibility")
+  public String getDatabaseQueryQuota(
+  @ApiParam(value = "Name of the database") @PathParam("databaseName") 
String databaseName) {
+return 
String.valueOf(_queryQuotaManager.getDatabaseQueryQuota(databaseName));
+  }
 }
diff --git 
a/pinot-broker/src/main/java/org/apache/pinot/broker/broker/BrokerAdminApiApplication.java
 
b/pinot-broker/src/main/java/org/apache/pinot/broker/broker/BrokerAdminApiApplication.java
index d87dd1d518..fc443caab0 100644
--- 
a/pinot-broker/src/main/java/org/apache/pinot/broker/broker/BrokerAdminApiApplication.java
+++ 
b/pinot-broker/src/main/java/org/apache/pinot/broker/broker/BrokerAdminApiApplication.java
@@ -32,6 +32,7 @@ import 
org.apache.hc.client5.http.io.HttpClientConnectionManager;
 import org.apache.hc.core5.http.io.SocketConfig;
 import org.apache.hc.core5.util.Timeout;
 import org.apache.helix.HelixManager;
+import org.apache.pinot.broker.queryquota.QueryQuotaManager;
 import org.apache.pinot.broker.requesthandler.BrokerRequestHandler;
 import org.apache.pinot.broker.routing.BrokerRoutingManager;
 import org.apache.pinot.common.http.PoolingHttpClientConnectionManagerHelper;
@@ -74,7 +75,7 @@ public class BrokerAdminApiApplication extends ResourceConfig 
{
   public BrokerAdminApiApplication(BrokerRoutingManager routingManager, 
BrokerRequestHandler brokerRequestHandler,
   BrokerMetrics 

(pinot) branch master updated: Add TablePauseStatus to track the pause details (#13803)

2024-08-20 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new 692d78c112 Add TablePauseStatus to track the pause details (#13803)
692d78c112 is described below

commit 692d78c1120c411feabfe378d276ee0adec1f68c
Author: Shounak kulkarni 
AuthorDate: Wed Aug 21 10:36:43 2024 +0500

Add TablePauseStatus to track the pause details (#13803)

* Add table pause status container to track the pause details

* deprecate IS_TABLE_PAUSED

* fix

* Allow passing comment for resuming ingestion

* Avoid the confusion on description field in APIs

* refactor PauseStatus

* refactors

* revert to consumingSegments

* fix naming
---
 .../{PauseStatus.java => PauseStatusDetails.java}  | 29 ++--
 .../api/resources/PinotRealtimeTableResource.java  | 11 ++-
 .../controller/helix/ControllerRequestClient.java  | 14 ++--
 .../controller/helix/SegmentStatusChecker.java |  2 +-
 .../realtime/PinotLLCRealtimeSegmentManager.java   | 82 --
 ...PartialUpsertTableRebalanceIntegrationTest.java |  6 +-
 .../apache/pinot/spi/config/table/PauseState.java  | 75 
 7 files changed, 178 insertions(+), 41 deletions(-)

diff --git 
a/pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PauseStatus.java
 
b/pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PauseStatusDetails.java
similarity index 65%
rename from 
pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PauseStatus.java
rename to 
pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PauseStatusDetails.java
index 9542e70eba..d531ed65e8 100644
--- 
a/pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PauseStatus.java
+++ 
b/pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PauseStatusDetails.java
@@ -22,21 +22,28 @@ import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonInclude;
 import com.fasterxml.jackson.annotation.JsonProperty;
 import java.util.Set;
+import org.apache.pinot.spi.config.table.PauseState;
 
 
 @JsonInclude(JsonInclude.Include.NON_NULL)
-public class PauseStatus {
+public class PauseStatusDetails {
   private boolean _pauseFlag;
   private Set _consumingSegments;
-  private String _description;
+  private PauseState.ReasonCode _reasonCode;
+  private String _comment;
+  private String _timestamp;
 
   @JsonCreator
-  public PauseStatus(@JsonProperty("pauseFlag") boolean pauseFlag,
+  public PauseStatusDetails(@JsonProperty("pauseFlag") boolean pauseFlag,
   @JsonProperty("consumingSegments") Set consumingSegments,
-  @JsonProperty("description") String description) {
+  @JsonProperty("reasonCode") PauseState.ReasonCode reasonCode,
+  @JsonProperty("comment") String comment,
+  @JsonProperty("timestamp") String timestamp) {
 _pauseFlag = pauseFlag;
 _consumingSegments = consumingSegments;
-_description = description;
+_reasonCode = reasonCode;
+_comment = comment != null ? comment : pauseFlag ? "Table is paused." : 
"Table is unpaused.";
+_timestamp = timestamp;
   }
 
   public boolean getPauseFlag() {
@@ -47,7 +54,15 @@ public class PauseStatus {
 return _consumingSegments;
   }
 
-  public String getDescription() {
-return _description;
+  public PauseState.ReasonCode getReasonCode() {
+return _reasonCode;
+  }
+
+  public String getComment() {
+return _comment;
+  }
+
+  public String getTimestamp() {
+return _timestamp;
   }
 }
diff --git 
a/pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PinotRealtimeTableResource.java
 
b/pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PinotRealtimeTableResource.java
index 44fc0433e5..2ab15427f7 100644
--- 
a/pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PinotRealtimeTableResource.java
+++ 
b/pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PinotRealtimeTableResource.java
@@ -57,6 +57,7 @@ import 
org.apache.pinot.controller.util.ConsumingSegmentInfoReader;
 import org.apache.pinot.core.auth.Actions;
 import org.apache.pinot.core.auth.Authorize;
 import org.apache.pinot.core.auth.TargetType;
+import org.apache.pinot.spi.config.table.PauseState;
 import org.apache.pinot.spi.config.table.TableType;
 import org.apache.pinot.spi.utils.CommonConstants;
 import org.apache.pinot.spi.utils.JsonUtils;
@@ -103,12 +104,14 @@ public class PinotRealtimeTableResource {
   @ApiOperation(value = "Pause consumption of a realtime table", notes = 
"Pause the consumption of a realtime table"

(pinot) branch master updated: Add logs indicating the star-tree config diff to understand the rebuild purpose (#13627)

2024-08-13 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new 61511fbd15 Add logs indicating the star-tree config diff to understand 
the rebuild purpose (#13627)
61511fbd15 is described below

commit 61511fbd15d767f5ad6b2c3f60ea098926de27f1
Author: Shounak kulkarni 
AuthorDate: Tue Aug 13 12:22:13 2024 +0500

Add logs indicating the star-tree config diff to understand the rebuild 
purpose (#13627)
---
 .../local/startree/v2/builder/MultipleTreesBuilder.java | 10 ++
 .../startree/v2/builder/StarTreeIndexSeparator.java | 17 -
 2 files changed, 26 insertions(+), 1 deletion(-)

diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/MultipleTreesBuilder.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/MultipleTreesBuilder.java
index 35343dc3f1..361e6fa67d 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/MultipleTreesBuilder.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/MultipleTreesBuilder.java
@@ -90,6 +90,15 @@ public class MultipleTreesBuilder implements Closeable {
 _metadataProperties =
 CommonsConfigurationUtils.fromFile(new File(_segmentDirectory, 
V1Constants.MetadataKeys.METADATA_FILE_NAME));
 _separator = getSeparator();
+// log the updated star-tree configs
+if (LOGGER.isDebugEnabled()) {
+  StringBuilder logUpdatedStarTrees = new StringBuilder();
+  logUpdatedStarTrees.append("Updated star-tree configs :");
+  for (StarTreeV2BuilderConfig startree : _builderConfigs) {
+logUpdatedStarTrees.append("\n").append(startree);
+  }
+  LOGGER.debug(logUpdatedStarTrees.toString());
+}
 _segment = ImmutableSegmentLoader.load(indexDir, ReadMode.mmap);
   }
 
@@ -127,6 +136,7 @@ public class MultipleTreesBuilder implements Closeable {
   throws Exception {
 List starTreeMetadataList = new 
SegmentMetadataImpl(_indexDir).getStarTreeV2MetadataList();
 if (starTreeMetadataList == null) {
+  LOGGER.info("No existing star-tree. Building all new start-trees.");
   return null;
 }
 try {
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeIndexSeparator.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeIndexSeparator.java
index 29e677f5d5..51f01a0b48 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeIndexSeparator.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeIndexSeparator.java
@@ -36,12 +36,16 @@ import org.apache.pinot.segment.spi.V1Constants;
 import 
org.apache.pinot.segment.spi.index.startree.AggregationFunctionColumnPair;
 import org.apache.pinot.segment.spi.index.startree.StarTreeV2Constants;
 import org.apache.pinot.segment.spi.index.startree.StarTreeV2Metadata;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 
 /**
  * The {@code StarTreeIndexSeparator} pulls out the individual star-trees from 
the common star-tree index file
  */
 public class StarTreeIndexSeparator implements Closeable {
+  private static final Logger LOGGER = 
LoggerFactory.getLogger(StarTreeIndexSeparator.class);
+
   private final List> _indexMapList;
   private final List _builderConfigList;
   private final List _numDocsList;
@@ -58,9 +62,18 @@ public class StarTreeIndexSeparator implements Closeable {
 _builderConfigList = new ArrayList<>(numStarTrees);
 _numDocsList = new ArrayList<>(numStarTrees);
 for (StarTreeV2Metadata starTreeMetadata : starTreeMetadataList) {
-  
_builderConfigList.add(StarTreeV2BuilderConfig.fromMetadata(starTreeMetadata));
+  StarTreeV2BuilderConfig config = 
StarTreeV2BuilderConfig.fromMetadata(starTreeMetadata);
+  _builderConfigList.add(config);
   _numDocsList.add(starTreeMetadata.getNumDocs());
 }
+if (LOGGER.isDebugEnabled()) {
+  StringBuilder logExistingStarTrees = new StringBuilder();
+  logExistingStarTrees.append("Existing star-tree configs :");
+  for (StarTreeV2BuilderConfig config : _builderConfigList) {
+logExistingStarTrees.append("\n").append(config);
+  }
+  LOGGER.debug(logExistingStarTrees.toString());
+}
 _indexFileChannel = new RandomAccessFile(indexFile, "r").getChannel();
   }
 
@@ -77,8 +90,10 @@ public class StarTreeIndexSeparator implements Closeable {
   throws IOException {
 int treeIndex = _builderConfigList.indexOf(builderConfig);
 if (treeIndex == -1) {
+  LOGGER.info(&qu

(pinot) branch master updated: Database query quota (#13544)

2024-08-04 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new 9a7e11682a Database query quota (#13544)
9a7e11682a is described below

commit 9a7e11682a56761684edb1aae4a7ec5b9984c6b8
Author: Shounak kulkarni 
AuthorDate: Mon Aug 5 08:42:26 2024 +0500

Database query quota (#13544)
---
 .../broker/broker/helix/BaseBrokerStarter.java |  17 ++
 ...okerResourceOnlineOfflineStateModelFactory.java |   3 +
 .../BrokerUserDefinedMessageHandlerFactory.java|  33 +++
 .../broker/broker/helix/ClusterChangeMediator.java |  12 +-
 .../HelixExternalViewBasedQueryQuotaManager.java   | 219 ++--
 .../pinot/broker/queryquota/QueryQuotaManager.java |   7 +
 .../BaseSingleStageBrokerRequestHandler.java   |  10 +-
 .../MultiStageBrokerRequestHandler.java|  13 +-
 ...elixExternalViewBasedQueryQuotaManagerTest.java | 222 +
 .../BaseSingleStageBrokerRequestHandlerTest.java   |   1 +
 .../messages/DatabaseConfigRefreshMessage.java |  60 ++
 .../pinot/common/metadata/ZKMetadataProvider.java  |  85 
 .../apache/pinot/common/utils/DatabaseUtils.java   |  14 ++
 .../resources/PinotDatabaseRestletResource.java|  85 +++-
 .../helix/core/PinotHelixResourceManager.java  |  56 ++
 .../java/org/apache/pinot/core/auth/Actions.java   |   2 +
 .../tests/QueryQuotaClusterIntegrationTest.java| 207 +++
 .../apache/pinot/spi/config/DatabaseConfig.java|  56 ++
 .../apache/pinot/spi/utils/CommonConstants.java|   1 +
 19 files changed, 1046 insertions(+), 57 deletions(-)

diff --git 
a/pinot-broker/src/main/java/org/apache/pinot/broker/broker/helix/BaseBrokerStarter.java
 
b/pinot-broker/src/main/java/org/apache/pinot/broker/broker/helix/BaseBrokerStarter.java
index 04bf6ce921..553228d89c 100644
--- 
a/pinot-broker/src/main/java/org/apache/pinot/broker/broker/helix/BaseBrokerStarter.java
+++ 
b/pinot-broker/src/main/java/org/apache/pinot/broker/broker/helix/BaseBrokerStarter.java
@@ -107,6 +107,8 @@ public abstract class BaseBrokerStarter implements 
ServiceStartable {
   protected String _instanceId;
   private volatile boolean _isStarting = false;
   private volatile boolean _isShuttingDown = false;
+
+  protected final List _clusterConfigChangeHandlers = 
new ArrayList<>();
   protected final List _idealStateChangeHandlers = new 
ArrayList<>();
   protected final List _externalViewChangeHandlers = new 
ArrayList<>();
   protected final List _instanceConfigChangeHandlers = 
new ArrayList<>();
@@ -214,6 +216,15 @@ public abstract class BaseBrokerStarter implements 
ServiceStartable {
 _instanceConfigChangeHandlers.add(instanceConfigChangeHandler);
   }
 
+  /**
+   * Adds a cluster config change handler to handle Helix cluster config 
change callbacks.
+   * NOTE: all change handlers will be run in a single thread, so any slow 
change handler can block other change
+   * handlers from running. For slow change handler, make it asynchronous.
+   */
+  public void addClusterConfigChangeHandler(ClusterChangeHandler 
clusterConfigChangeHandler) {
+_clusterConfigChangeHandlers.add(clusterConfigChangeHandler);
+  }
+
   /**
* Adds a live instance change handler to handle Helix live instance change 
callbacks.
* NOTE: all change handlers will be run in a single thread, so any slow 
change handler can block other change
@@ -350,6 +361,10 @@ public abstract class BaseBrokerStarter implements 
ServiceStartable {
 _brokerAdminApplication.start(_listenerConfigs);
 
 LOGGER.info("Initializing cluster change mediator");
+for (ClusterChangeHandler clusterConfigChangeHandler : 
_clusterConfigChangeHandlers) {
+  clusterConfigChangeHandler.init(_spectatorHelixManager);
+}
+_clusterConfigChangeHandlers.add(queryQuotaManager);
 for (ClusterChangeHandler idealStateChangeHandler : 
_idealStateChangeHandlers) {
   idealStateChangeHandler.init(_spectatorHelixManager);
 }
@@ -368,6 +383,7 @@ public abstract class BaseBrokerStarter implements 
ServiceStartable {
   liveInstanceChangeHandler.init(_spectatorHelixManager);
 }
 Map> clusterChangeHandlersMap = new 
HashMap<>();
+clusterChangeHandlersMap.put(ChangeType.CLUSTER_CONFIG, 
_clusterConfigChangeHandlers);
 clusterChangeHandlersMap.put(ChangeType.IDEAL_STATE, 
_idealStateChangeHandlers);
 clusterChangeHandlersMap.put(ChangeType.EXTERNAL_VIEW, 
_externalViewChangeHandlers);
 clusterChangeHandlersMap.put(ChangeType.INSTANCE_CONFIG, 
_instanceConfigChangeHandlers);
@@ -379,6 +395,7 @@ public abstract class BaseBrokerStarter implements 
ServiceStartable {
 _spectatorHelixManager.addIdealStateChangeListener(_clusterChangeMediator);
 
_spectatorHelixManager.addExtern

(pinot) branch master updated: Reduce Kinesis default rate limit to 1 to account for replication (#13649)

2024-07-31 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new dd8a6477c2 Reduce Kinesis default rate limit to 1 to account for 
replication (#13649)
dd8a6477c2 is described below

commit dd8a6477c22e761a2a6dc3264b9c2a86c94427fd
Author: Kartik Khare 
AuthorDate: Wed Jul 31 12:47:24 2024 +0530

Reduce Kinesis default rate limit to 1 to account for replication (#13649)

* Use debug logs in case we run into rate limit exceeded exception

* lower kinesis rate limit

* Fix exception

-

Co-authored-by: Kartik Khare 
---
 .../apache/pinot/plugin/stream/kinesis/KinesisConfig.java  |  8 +++-
 .../pinot/plugin/stream/kinesis/KinesisConsumer.java   | 14 --
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git 
a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConfig.java
 
b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConfig.java
index 529e218e90..6f84407006 100644
--- 
a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConfig.java
+++ 
b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConfig.java
@@ -70,7 +70,13 @@ public class KinesisConfig {
   public static final String DEFAULT_IAM_ROLE_BASED_ACCESS_ENABLED = "false";
   public static final String DEFAULT_SESSION_DURATION_SECONDS = "900";
   public static final String DEFAULT_ASYNC_SESSION_UPDATED_ENABLED = "true";
-  public static final String DEFAULT_RPS_LIMIT = "5";
+
+  // Kinesis has a default limit of 5 getRecord requests per second per shard.
+  // This limit is enforced by Kinesis and is not configurable.
+  // We are setting it to 1 to avoid hitting the limit  in a replicated setup,
+  // where multiple replicas are fetching from the same shard.
+  // see - 
https://docs.aws.amazon.com/kinesis/latest/APIReference/API_GetRecords.html
+  public static final String DEFAULT_RPS_LIMIT = "1";
 
   private final String _streamTopicName;
   private final String _awsRegion;
diff --git 
a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
 
b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
index e7bb76797a..d90b1b61bb 100644
--- 
a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
+++ 
b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
@@ -63,7 +63,17 @@ public class KinesisConsumer extends 
KinesisConnectionHandler implements Partiti
 
   @Override
   public synchronized KinesisMessageBatch 
fetchMessages(StreamPartitionMsgOffset startMsgOffset, int timeoutMs) {
-KinesisPartitionGroupOffset startOffset = (KinesisPartitionGroupOffset) 
startMsgOffset;
+try {
+  return getKinesisMessageBatch((KinesisPartitionGroupOffset) 
startMsgOffset);
+} catch (ProvisionedThroughputExceededException pte) {
+  LOGGER.error("Rate limit exceeded while fetching messages from Kinesis 
stream: {} with threshold: {}",
+  pte.getMessage(), _config.getRpsLimit());
+  return new KinesisMessageBatch(List.of(), (KinesisPartitionGroupOffset) 
startMsgOffset, false);
+}
+  }
+
+  private KinesisMessageBatch 
getKinesisMessageBatch(KinesisPartitionGroupOffset startMsgOffset) {
+KinesisPartitionGroupOffset startOffset = startMsgOffset;
 String shardId = startOffset.getShardId();
 String startSequenceNumber = startOffset.getSequenceNumber();
 // Get the shard iterator
@@ -122,7 +132,7 @@ public class KinesisConsumer extends 
KinesisConnectionHandler implements Partiti
 } catch (InterruptedException e) {
   throw new RuntimeException(e);
 }
-_currentSecond++;
+_currentSecond = (int) 
TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis());
 _numRequestsInCurrentSecond = 1;
   } else {
 _numRequestsInCurrentSecond++;


-
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org



(pinot) branch SegmentProcessorFrameworkImprovement deleted (was 0022190559)

2024-07-26 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a change to branch SegmentProcessorFrameworkImprovement
in repository https://gitbox.apache.org/repos/asf/pinot.git


 was 0022190559 Added support to upload segments in batch mode with 
METADATA upload type (#13690)

This change permanently discards the following revisions:

 discard 0022190559 Added support to upload segments in batch mode with 
METADATA upload type (#13690)


-
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org



(pinot) branch master updated (8dd5b50b40 -> 9fd75c950d)

2024-07-26 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


from 8dd5b50b40 Update Notice and License for 1.2.0 Release (#13691)
 add 9fd75c950d Build Profiles for aarch64 (#13648)

No new revisions were added by this update.

Summary of changes:
 .../sql/parsers/rewriter/ClpRewriterTest.java  |  9 
 .../test/java/org/apache/pinot/util/TestUtils.java |  8 
 .../function/ClpTransformFunctionsTest.java|  4 ++
 .../tests/CLPEncodingRealtimeIntegrationTest.java  |  3 ++
 .../clplog/CLPLogRecordExtractorTest.java  | 12 +
 .../local/segment/creator/DictionariesTest.java|  8 
 .../index/creator/CLPForwardIndexCreatorTest.java  |  3 ++
 .../mutable/CLPMutableForwardIndexTest.java|  4 ++
 .../spi/memory/PinotLArrayByteBufferTest.java  |  5 +++
 pom.xml| 52 ++
 10 files changed, 108 insertions(+)


-
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org



(pinot) branch master updated: Do not log exceptions when fetching offsets for lag metric (#13528)

2024-07-03 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new 55806ba484 Do not log exceptions when fetching offsets for lag metric 
(#13528)
55806ba484 is described below

commit 55806ba484f75d3629b11c043eebe25aed222630
Author: Kartik Khare 
AuthorDate: Wed Jul 3 12:42:28 2024 +0530

Do not log exceptions when fetching offsets for lag metric (#13528)

Co-authored-by: Kartik Khare 

---
 .../realtime/RealtimeSegmentDataManager.java   | 29 --
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeSegmentDataManager.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeSegmentDataManager.java
index 862ec52615..c26b2c14f3 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeSegmentDataManager.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeSegmentDataManager.java
@@ -1682,25 +1682,38 @@ public class RealtimeSegmentDataManager extends 
SegmentDataManager {
 return _idleTimer.getTimeSinceEventLastConsumedMs();
   }
 
+  public StreamPartitionMsgOffset fetchLatestStreamOffset(long maxWaitTimeMs, 
boolean useDebugLog) {
+return fetchStreamOffset(OffsetCriteria.LARGEST_OFFSET_CRITERIA, 
maxWaitTimeMs, useDebugLog);
+  }
+
   public StreamPartitionMsgOffset fetchLatestStreamOffset(long maxWaitTimeMs) {
-return fetchStreamOffset(OffsetCriteria.LARGEST_OFFSET_CRITERIA, 
maxWaitTimeMs);
+return fetchLatestStreamOffset(maxWaitTimeMs, false);
+  }
+
+  public StreamPartitionMsgOffset fetchEarliestStreamOffset(long 
maxWaitTimeMs, boolean useDebugLog) {
+return fetchStreamOffset(OffsetCriteria.SMALLEST_OFFSET_CRITERIA, 
maxWaitTimeMs, useDebugLog);
   }
 
   public StreamPartitionMsgOffset fetchEarliestStreamOffset(long 
maxWaitTimeMs) {
-return fetchStreamOffset(OffsetCriteria.SMALLEST_OFFSET_CRITERIA, 
maxWaitTimeMs);
+return fetchEarliestStreamOffset(maxWaitTimeMs, false);
   }
 
-  private StreamPartitionMsgOffset fetchStreamOffset(OffsetCriteria 
offsetCriteria, long maxWaitTimeMs) {
+  private StreamPartitionMsgOffset fetchStreamOffset(OffsetCriteria 
offsetCriteria, long maxWaitTimeMs,
+  boolean useDebugLog) {
 if (_partitionMetadataProvider == null) {
   createPartitionMetadataProvider("Fetch latest stream offset");
 }
 try {
   return 
_partitionMetadataProvider.fetchStreamPartitionOffset(offsetCriteria, 
maxWaitTimeMs);
 } catch (Exception e) {
-  _segmentLogger.warn(
-  String.format(
-  "Cannot fetch stream offset with criteria %s for clientId %s and 
partitionGroupId %d with maxWaitTime %d",
-  offsetCriteria, _clientId, _partitionGroupId, maxWaitTimeMs), e);
+  String logMessage = String.format(
+  "Cannot fetch stream offset with criteria %s for clientId %s and 
partitionGroupId %d with maxWaitTime %d",
+  offsetCriteria, _clientId, _partitionGroupId, maxWaitTimeMs);
+  if (!useDebugLog) {
+_segmentLogger.warn(logMessage, e);
+  } else {
+_segmentLogger.debug(logMessage, e);
+  }
 }
 return null;
   }
@@ -1810,7 +1823,7 @@ public class RealtimeSegmentDataManager extends 
SegmentDataManager {
   private void updateIngestionMetrics(RowMetadata metadata) {
 if (metadata != null) {
   try {
-StreamPartitionMsgOffset latestOffset = fetchLatestStreamOffset(5000);
+StreamPartitionMsgOffset latestOffset = fetchLatestStreamOffset(5000, 
true);
 
_realtimeTableDataManager.updateIngestionMetrics(metadata.getRecordIngestionTimeMs(),
 metadata.getFirstStreamRecordIngestionTimeMs(), 
metadata.getOffset(), latestOffset, _partitionGroupId);
   } catch (Exception e) {


-
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org



(pinot) branch master updated: Handle unsupported exception gracefully (#13524)

2024-07-02 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new f54559987e Handle unsupported exception gracefully (#13524)
f54559987e is described below

commit f54559987e9b66eaed4cc68852653e14a133b9e0
Author: Kartik Khare 
AuthorDate: Tue Jul 2 18:34:36 2024 +0530

Handle unsupported exception gracefully (#13524)

Co-authored-by: Kartik Khare 

---
 .../pinot/core/data/manager/realtime/IngestionDelayTracker.java   | 4 
 .../pinot/core/data/manager/realtime/RealtimeSegmentDataManager.java  | 3 +--
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/IngestionDelayTracker.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/IngestionDelayTracker.java
index eed1302708..6953ddaf33 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/IngestionDelayTracker.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/IngestionDelayTracker.java
@@ -195,6 +195,10 @@ public class IngestionDelayTracker {
 StreamPartitionMsgOffset currentOffset = offset._offset;
 StreamPartitionMsgOffset latestOffset = offset._latestOffset;
 
+if (currentOffset == null || latestOffset == null) {
+  return 0;
+}
+
 // Compute aged delay for current partition
 // TODO: Support other types of offsets
 if (!(currentOffset instanceof LongMsgOffset && latestOffset instanceof 
LongMsgOffset)) {
diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeSegmentDataManager.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeSegmentDataManager.java
index dbfe885cc0..862ec52615 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeSegmentDataManager.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeSegmentDataManager.java
@@ -1810,8 +1810,7 @@ public class RealtimeSegmentDataManager extends 
SegmentDataManager {
   private void updateIngestionMetrics(RowMetadata metadata) {
 if (metadata != null) {
   try {
-StreamPartitionMsgOffset latestOffset =
-
_partitionMetadataProvider.fetchStreamPartitionOffset(OffsetCriteria.LARGEST_OFFSET_CRITERIA,
 5000);
+StreamPartitionMsgOffset latestOffset = fetchLatestStreamOffset(5000);
 
_realtimeTableDataManager.updateIngestionMetrics(metadata.getRecordIngestionTimeMs(),
 metadata.getFirstStreamRecordIngestionTimeMs(), 
metadata.getOffset(), latestOffset, _partitionGroupId);
   } catch (Exception e) {


-
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org



(pinot) branch master updated: Fix NPE in Minion (#13518)

2024-07-02 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new c156042c75 Fix NPE in Minion (#13518)
c156042c75 is described below

commit c156042c755615838264e8455c3b84cece722e8b
Author: aishikbh 
AuthorDate: Tue Jul 2 17:57:02 2024 +0530

Fix NPE in Minion (#13518)

* Fix NPE in Minion

* Fix a corner case where tasks/schedule API is called without any table
  name or task name.
* Put the null check before adding elements to the map.

* address comments.

* Added annotations.
---
 .../pinot/controller/api/resources/PinotTaskRestletResource.java | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git 
a/pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PinotTaskRestletResource.java
 
b/pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PinotTaskRestletResource.java
index 2a83915354..1b5f8c2f95 100644
--- 
a/pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PinotTaskRestletResource.java
+++ 
b/pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PinotTaskRestletResource.java
@@ -616,8 +616,10 @@ public class PinotTaskRestletResource {
   @Produces(MediaType.APPLICATION_JSON)
   @Authenticate(AccessType.UPDATE)
   @ApiOperation("Schedule tasks and return a map from task type to task name 
scheduled")
-  public Map scheduleTasks(@ApiParam(value = "Task type") 
@QueryParam("taskType") String taskType,
-  @ApiParam(value = "Table name (with type suffix)") 
@QueryParam("tableName") String tableName,
+  @Nullable
+  public Map scheduleTasks(
+  @ApiParam(value = "Task type") @QueryParam("taskType") @Nullable String 
taskType,
+  @ApiParam(value = "Table name (with type suffix)") 
@QueryParam("tableName") @Nullable String tableName,
   @ApiParam(value = "Minion Instance tag to schedule the task explicitly 
on") @QueryParam("minionInstanceTag")
   @Nullable String minionInstanceTag, @Context HttpHeaders headers) {
 String database = headers != null ? headers.getHeaderString(DATABASE) : 
DEFAULT_DATABASE;
@@ -632,8 +634,9 @@ public class PinotTaskRestletResource {
   Map> allTaskNames = tableName != null ? 
_pinotTaskManager.scheduleAllTasksForTable(
   DatabaseUtils.translateTableName(tableName, headers), 
minionInstanceTag)
   : _pinotTaskManager.scheduleAllTasksForDatabase(database, 
minionInstanceTag);
-  return allTaskNames.entrySet().stream()
+  Map result = 
allTaskNames.entrySet().stream().filter(entry -> entry.getValue() != null)
   .collect(Collectors.toMap(Map.Entry::getKey, entry -> 
String.join(",", entry.getValue(;
+  return result.isEmpty() ? null : result;
 }
   }
 


-
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org



(pinot) branch master updated: Add scalar functions for geohash (#13471)

2024-06-26 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new f52e651c31 Add scalar functions for geohash (#13471)
f52e651c31 is described below

commit f52e651c3184bbe3229061224194674a8ceaa1c7
Author: Kartik Khare 
AuthorDate: Wed Jun 26 12:34:41 2024 +0530

Add scalar functions for geohash (#13471)
---
 .../common/function/scalar/GeohashFunctions.java   | 160 +
 .../function/scalar/GeohashFunctionsTest.java  |  66 +
 2 files changed, 226 insertions(+)

diff --git 
a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/GeohashFunctions.java
 
b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/GeohashFunctions.java
new file mode 100644
index 00..1c784254ce
--- /dev/null
+++ 
b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/GeohashFunctions.java
@@ -0,0 +1,160 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.common.function.scalar;
+
+import org.apache.pinot.spi.annotations.ScalarFunction;
+
+/**
+ * Geohash scalar functions that can be used in transformation.
+ * This class is used to encode and decode geohash values.
+ */
+public class GeohashFunctions {
+  private static final String BASE32 = "0123456789bcdefghjkmnpqrstuvwxyz";
+  private static final int[] BITS = {16, 8, 4, 2, 1};
+
+  private GeohashFunctions() {
+  }
+
+  public static long encode(double latitude, double longitude, int length) {
+if (length < 1 || length > 12) {
+  throw new IllegalArgumentException("length must be between 1 and 12");
+}
+
+boolean isEven = true;
+double minLat = -90.0;
+double maxLat = 90.0;
+double minLon = -180.0;
+double maxLon = 180.0;
+long bit = 0x8000L;
+long geohash = 0L;
+
+for (long i = 0; i < 5 * length; i++) {
+  if (isEven) {
+double mid = (minLon + maxLon) / 2;
+if (longitude >= mid) {
+  geohash |= bit;
+  minLon = mid;
+} else {
+  maxLon = mid;
+}
+  } else {
+double mid = (minLat + maxLat) / 2;
+if (latitude >= mid) {
+  geohash |= bit;
+  minLat = mid;
+} else {
+  maxLat = mid;
+}
+  }
+
+  isEven = !isEven;
+  bit >>>= 1;
+}
+
+return geohash | length;
+  }
+
+  private static String longHashToStringGeohash(long hash) {
+int length = (int) (hash & 15L);
+if (length < 1 || length > 12) {
+  throw new IllegalArgumentException("Invalid geohash length: " + length);
+}
+
+char[] geohash = new char[length];
+for (int i = 0; i < length; i++) {
+  geohash[i] = BASE32.charAt((int) ((hash >>> 59) & 31L));
+  hash <<= 5;
+}
+
+return new String(geohash);
+  }
+
+  public static double[] decode(String geohash) {
+double[] lat = {-90.0, 90.0};
+double[] lon = {-180.0, 180.0};
+boolean isEven = true;
+
+for (int i = 0; i < geohash.length(); i++) {
+  int cd = BASE32.indexOf(geohash.charAt(i));
+  for (int j = 0; j < 5; j++) {
+int mask = BITS[j];
+if (isEven) {
+  refineInterval(lon, cd, mask);
+} else {
+  refineInterval(lat, cd, mask);
+}
+isEven = !isEven;
+  }
+}
+
+return new double[]{(lat[0] + lat[1]) / 2, (lon[0] + lon[1]) / 2};
+  }
+
+  private static void refineInterval(double[] interval, int cd, int mask) {
+if ((cd & mask) != 0) {
+  interval[0] = (interval[0] + interval[1]) / 2;
+} else {
+  interval[1] = (interval[0] + interval[1]) / 2;
+}
+  }
+
+  /**
+   * Encodes a latitude and longitude to a geohash.
+   * @param latitude
+   * @param longitude
+   * @param precision
+   * @return the geohash value as a string
+   */
+  @ScalarFunction(names = {"encodeGeoHash", "encode_geohash"})
+  public static String encodeGeoHash(double latitude, double

(pinot) branch master updated: Add offset based lag metrics (#13298)

2024-06-07 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new fc358a87fc Add offset based lag metrics (#13298)
fc358a87fc is described below

commit fc358a87fc63def0b5c5a31a24bde1d9df0d9ecb
Author: Kartik Khare 
AuthorDate: Fri Jun 7 19:21:32 2024 +0530

Add offset based lag metrics (#13298)

* Add offset based lag metrics

* Add tests

* Refactoring

-

Co-authored-by: Kartik Khare 
---
 .../apache/pinot/common/metrics/ServerGauge.java   |  3 +-
 .../manager/realtime/IngestionDelayTracker.java| 92 +++---
 .../realtime/RealtimeSegmentDataManager.java   | 16 ++--
 .../manager/realtime/RealtimeTableDataManager.java | 10 ++-
 .../realtime/IngestionDelayTrackerTest.java| 65 +++
 5 files changed, 167 insertions(+), 19 deletions(-)

diff --git 
a/pinot-common/src/main/java/org/apache/pinot/common/metrics/ServerGauge.java 
b/pinot-common/src/main/java/org/apache/pinot/common/metrics/ServerGauge.java
index f0a1fdd136..072c048c55 100644
--- 
a/pinot-common/src/main/java/org/apache/pinot/common/metrics/ServerGauge.java
+++ 
b/pinot-common/src/main/java/org/apache/pinot/common/metrics/ServerGauge.java
@@ -72,7 +72,8 @@ public enum ServerGauge implements AbstractMetrics.Gauge {
   END_TO_END_REALTIME_INGESTION_DELAY_MS("milliseconds", false),
   // Needed to track if valid doc id snapshots are present for faster restarts
   UPSERT_VALID_DOC_ID_SNAPSHOT_COUNT("upsertValidDocIdSnapshotCount", false),
-  UPSERT_PRIMARY_KEYS_IN_SNAPSHOT_COUNT("upsertPrimaryKeysInSnapshotCount", 
false);
+  UPSERT_PRIMARY_KEYS_IN_SNAPSHOT_COUNT("upsertPrimaryKeysInSnapshotCount", 
false),
+  REALTIME_INGESTION_OFFSET_LAG("offsetLag", false);
 
   private final String _gaugeName;
   private final String _unit;
diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/IngestionDelayTracker.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/IngestionDelayTracker.java
index 84a5ea7ba0..eed1302708 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/IngestionDelayTracker.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/IngestionDelayTracker.java
@@ -34,6 +34,8 @@ import java.util.concurrent.TimeUnit;
 import java.util.function.Supplier;
 import org.apache.pinot.common.metrics.ServerGauge;
 import org.apache.pinot.common.metrics.ServerMetrics;
+import org.apache.pinot.spi.stream.LongMsgOffset;
+import org.apache.pinot.spi.stream.StreamPartitionMsgOffset;
 import org.apache.pinot.spi.utils.builder.TableNameBuilder;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -81,13 +83,23 @@ public class IngestionDelayTracker {
 
   // Class to wrap supported timestamps collected for an ingested event
   private static class IngestionTimestamps {
+private final long _firstStreamIngestionTimeMs;
+private final long _ingestionTimeMs;
 IngestionTimestamps(long ingestionTimesMs, long 
firstStreamIngestionTimeMs) {
   _ingestionTimeMs = ingestionTimesMs;
   _firstStreamIngestionTimeMs = firstStreamIngestionTimeMs;
 }
-private final long _ingestionTimeMs;
-private final long _firstStreamIngestionTimeMs;
   }
+
+  private static class IngestionOffsets {
+private final StreamPartitionMsgOffset _latestOffset;
+private final StreamPartitionMsgOffset _offset;
+IngestionOffsets(StreamPartitionMsgOffset offset, StreamPartitionMsgOffset 
latestOffset) {
+  _offset = offset;
+  _latestOffset = latestOffset;
+}
+  }
+
   // Sleep interval for scheduled executor service thread that triggers read 
of ideal state
   private static final int SCHEDULED_EXECUTOR_THREAD_TICK_INTERVAL_MS = 
30; // 5 minutes +/- precision in timeouts
   // Once a partition is marked for verification, we wait 10 minutes to pull 
its ideal state.
@@ -98,6 +110,8 @@ public class IngestionDelayTracker {
 
   // HashMap used to store ingestion time measures for all partitions active 
for the current table.
   private final Map 
_partitionToIngestionTimestampsMap = new ConcurrentHashMap<>();
+
+  private final Map _partitionToOffsetMap = new 
ConcurrentHashMap<>();
   // We mark partitions that go from CONSUMING to ONLINE in 
_partitionsMarkedForVerification: if they do not
   // go back to CONSUMING in some period of time, we verify whether they are 
still hosted in this server by reading
   // ideal state. This is done with the goal of minimizing reading ideal state 
for efficiency reasons.
@@ -174,6 +188,22 @@ public class IngestionDelayTracker {
 return agedIngestionDelayMs;
   }
 
+  private long getPartitionOffsetLag(IngestionOffsets offset) {
+if (offset ==

(pinot) branch master updated: Fix few metric rules which were affected by the database prefix handling (#13290)

2024-06-06 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new fb64ec29e9 Fix few metric rules which were affected by the database 
prefix handling (#13290)
fb64ec29e9 is described below

commit fb64ec29e9949aee70e2d92c9821edf0cf56fd09
Author: Shounak kulkarni 
AuthorDate: Fri Jun 7 11:34:13 2024 +0500

Fix few metric rules which were affected by the database prefix handling 
(#13290)
---
 .../jmx_prometheus_javaagent/configs/broker.yml| 12 +++
 .../configs/controller.yml |  7 ++
 .../jmx_prometheus_javaagent/configs/server.yml| 25 +++---
 3 files changed, 36 insertions(+), 8 deletions(-)

diff --git 
a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/broker.yml 
b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/broker.yml
index 5c7a6cc8a9..3e74ae244e 100644
--- a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/broker.yml
+++ b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/broker.yml
@@ -59,6 +59,12 @@ rules:
   labels:
 database: "$2"
 table: "$1$3"
+- pattern: 
"\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
+  name: "pinot_broker_requestSize_$4"
+  cache: true
+  labels:
+database: "$2"
+table: "$1$3"
 - pattern: 
"\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
   name: "pinot_broker_scatterGather_$4"
   cache: true
@@ -112,6 +118,12 @@ rules:
 - pattern: 
"\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
   name: "pinot_broker_routingTableUpdateTime_$1"
   cache: true
+- pattern: 
"\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
+  name: "pinot_broker_adaptiveServerSelectorType_$1"
+  cache: true
+- pattern: 
"\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
+  name: "pinot_broker_adaptiveServerSelectorType_$1_$2"
+  cache: true
 - pattern: 
"\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
   name: "pinot_broker_brokerResponsesWithPartialServersResponded_$4"
   cache: true
diff --git 
a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml 
b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml
index a036a13053..81bb7b0e5b 100644
--- a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml
+++ b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml
@@ -64,6 +64,9 @@ rules:
 database: "$2"
 table: "$1$3"
 tableType: "$4"
+- pattern: 
"\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
+  name: "pinot_controller_dataDir_$1_$2"
+  cache: true
 - pattern: 
"\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
   name: "pinot_controller_numberSegmentUploadTimeoutExceeded_$1"
   cache: true
@@ -228,6 +231,10 @@ rules:
 database: "$2"
 table: "$1$3"
 tableType: "$4"
+# Controller periodic task metrics
+- pattern: 
"\"?org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
+  name: "pinot_controller_periodicTaskRun_$1_$2"
+  cache: true
 - pattern: "\"?org\\.apache\\.pinot\\.common\\.metrics\"?<>(\\w+)"
   name: "pinot_$1_version"
   cache: true
diff --git 
a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/server.yml 
b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/server.yml
index 1aae226de3..c711d3cfd5 100644
--- a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/server.yml
+++ b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/server.yml
@@ -13,13 +13,6 @@ rules:
 database: "$2"
 table: "$1$3"
 tableType: "$4"
-- pattern: 
"\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
-  name: "pinot_server_$5_$6"
-  cache: true
-  labels:
-database: "$2"
-table: "$1$3"
-tableType: "$4"
 - pattern: 
"\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
   name: "pinot_server_helix_connected_$1"
   cache: true
@@ -73,6 +66,14 @@ rules:
 table: "$1$3"
 tableType: "$4"
 partition: "$5"
+- pattern: 
"\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
+  name: "pinot_server_endToEndRealtimeIngestionDelayMs_$6"
+  cache: true
+  labels:
+database: "$2"
+table: "$1$3"
+tableType: "$4"
+partition: "$5"
 - pattern: 
&q

(pinot) branch master updated (3b5711686b -> bdb91919de)

2024-05-30 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


from 3b5711686b Serialize V2 Plan using Protobufs instead of reflection. 
(#13221)
 add bdb91919de Bugfix. Allow tenant rebalance with downtime as true 
(#13246)

No new revisions were added by this update.

Summary of changes:
 .../controller/helix/core/rebalance/tenant/DefaultTenantRebalancer.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)


-
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org



(pinot) branch master updated (256d0aa5a5 -> 2f4a412c52)

2024-05-29 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


from 256d0aa5a5 Do not pause ingestion when upsert snapshot flow errors out 
(#13257)
 add 2f4a412c52 Properly handle complex type transformer in segment 
processor framework (#13258)

No new revisions were added by this update.

Summary of changes:
 .../segment/processing/mapper/SegmentMapper.java   | 37 --
 .../framework/SegmentProcessorFrameworkTest.java   | 20 +++-
 .../recordtransformer/CompositeTransformer.java|  4 ---
 3 files changed, 46 insertions(+), 15 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org



(pinot) branch master updated: Move offset validation logic to consumer classes (#13015)

2024-05-23 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new 29c560f523 Move offset validation logic to consumer classes (#13015)
29c560f523 is described below

commit 29c560f523bab4529e6685c7df061105d8dc3df1
Author: Kartik Khare 
AuthorDate: Thu May 23 19:02:09 2024 +0530

Move offset validation logic to consumer classes (#13015)

* Enhance Kinesis consumer

* Simplify the handling

* Address comments

* Move offset validation logic to consumer classes

* Add missing message interface to message batch

* fix linting

* remove unused interface

* Cleanup and refactoring

* lint fixes

-

Co-authored-by: Xiaotian (Jackie) Jiang 
Co-authored-by: Kartik Khare 

Co-authored-by: Kartik Khare 
---
 .../realtime/RealtimeSegmentDataManager.java| 21 -
 .../plugin/stream/kafka20/KafkaMessageBatch.java|  9 -
 .../stream/kafka20/KafkaPartitionLevelConsumer.java |  5 -
 .../plugin/stream/kinesis/KinesisConsumer.java  |  1 -
 .../org/apache/pinot/spi/stream/MessageBatch.java   |  8 
 5 files changed, 28 insertions(+), 16 deletions(-)

diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeSegmentDataManager.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeSegmentDataManager.java
index 01fffced36..b441f086de 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeSegmentDataManager.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeSegmentDataManager.java
@@ -468,10 +468,7 @@ public class RealtimeSegmentDataManager extends 
SegmentDataManager {
 throw t;
   }
 
-  StreamPartitionMsgOffset batchFirstOffset = 
messageBatch.getFirstMessageOffset();
-  if (batchFirstOffset != null) {
-validateStartOffset(_currentOffset, batchFirstOffset);
-  }
+  reportDataLoss(messageBatch);
 
   boolean endCriteriaReached = processStreamEvents(messageBatch, 
idlePipeSleepTimeMillis);
 
@@ -922,18 +919,16 @@ public class RealtimeSegmentDataManager extends 
SegmentDataManager {
   }
 
   /**
-   * Checks if the begin offset of the stream partition has been 
fast-forwarded.
-   * batchFirstOffset should be less than or equal to startOffset.
-   * If batchFirstOffset is greater, then some messages were not received.
+   * Checks and reports if the consumer is going through data loss.
*
-   * @param startOffset The offset of the first message desired, inclusive.
-   * @param batchFirstOffset The offset of the first message in the batch.
+   * @param messageBatch Message batch to validate
*/
-  private void validateStartOffset(StreamPartitionMsgOffset startOffset, 
StreamPartitionMsgOffset batchFirstOffset) {
-if (batchFirstOffset.compareTo(startOffset) > 0) {
+  private void reportDataLoss(MessageBatch messageBatch) {
+if (messageBatch.hasDataLoss()) {
   _serverMetrics.addMeteredTableValue(_tableStreamName, 
ServerMeter.STREAM_DATA_LOSS, 1L);
-  String message =
-  "startOffset(" + startOffset + ") is older than topic's beginning 
offset(" + batchFirstOffset + ")";
+  String message = String.format("Message loss detected in stream 
partition: %s for table: %s startOffset: %s "
+  + "batchFirstOffset: %s", _partitionGroupId, _tableNameWithType, 
_startOffset,
+  messageBatch.getFirstMessageOffset());
   _segmentLogger.error(message);
   _realtimeTableDataManager.addSegmentError(_segmentNameStr, new 
SegmentErrorInfo(now(), message, null));
 }
diff --git 
a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaMessageBatch.java
 
b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaMessageBatch.java
index 3f137b54af..1e3361ba00 100644
--- 
a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaMessageBatch.java
+++ 
b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaMessageBatch.java
@@ -33,6 +33,7 @@ public class KafkaMessageBatch implements 
MessageBatch {
   private final long _offsetOfNextBatch;
   private final long _firstOffset;
   private final StreamMessageMetadata _lastMessageMetadata;
+  private final boolean _hasDataLoss;
 
   /**
* @param messages the messages, which may be smaller than {@see 
unfilteredMessageCount}
@@ -43,12 +44,13 @@ public class KafkaMessageBatch implements 
MessageBatch {
*dela

(pinot) branch master updated (a385e28c3d -> 9e1246de98)

2024-05-20 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


from a385e28c3d [Minor bugfix] Decrypt file in minion task executor when 
downloading from servers (#13178)
 add 9e1246de98 Enhance Kinesis consumer (#12806)

No new revisions were added by this update.

Summary of changes:
 .../core/realtime/stream/StreamConfigTest.java |  27 ---
 pinot-integration-tests/pom.xml|   7 -
 .../pinot-stream-ingestion/pinot-kinesis/pom.xml   |  60 +--
 .../pinot/plugin/stream/kinesis/KinesisConfig.java |   3 +-
 .../plugin/stream/kinesis/KinesisConsumer.java | 200 +++--
 .../kinesis/KinesisStreamMetadataProvider.java |   6 +-
 .../plugin/stream/kinesis/KinesisConsumerTest.java |  85 +
 .../pinot/spi/stream/PartitionGroupConsumer.java   |   6 +-
 .../org/apache/pinot/spi/stream/StreamConfig.java  |   9 +-
 9 files changed, 116 insertions(+), 287 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org



(pinot) branch master updated: Made the error message around dimension table size clearer (#13163)

2024-05-17 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new ceb9c1acf1 Made the error message around dimension table size clearer 
(#13163)
ceb9c1acf1 is described below

commit ceb9c1acf17972f851e54bb9d4b0060f72516a44
Author: Ragesh Rajagopalan 
AuthorDate: Fri May 17 08:31:48 2024 -0700

Made the error message around dimension table size clearer (#13163)
---
 .../java/org/apache/pinot/segment/local/utils/TableConfigUtils.java | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/TableConfigUtils.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/TableConfigUtils.java
index 6729f1b027..fea2dfb8f1 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/TableConfigUtils.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/TableConfigUtils.java
@@ -1412,9 +1412,9 @@ public final class TableConfigUtils {
   tableConfig.getTableName());
 } else {
   if (quotaConfig.getStorageInBytes() > maxAllowedSizeInBytes) {
-throw new IllegalStateException(
-String.format("Invalid storage quota: %d, max allowed size: 
%d", quotaConfig.getStorageInBytes(),
-maxAllowedSizeInBytes));
+throw new IllegalStateException(String.format(
+"Exceeded storage size for dimension table. Requested size: 
%d, Max allowed size: %d",
+quotaConfig.getStorageInBytes(), maxAllowedSizeInBytes));
   }
 }
   }


-
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org



(pinot) branch master updated: Bigfix. Added missing paramName (#13060)

2024-05-02 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new b9140715b0 Bigfix. Added missing paramName (#13060)
b9140715b0 is described below

commit b9140715b09cf0cc88114fcc2d0af3c48f02a16e
Author: Shounak kulkarni 
AuthorDate: Fri May 3 10:23:04 2024 +0500

Bigfix. Added missing paramName (#13060)
---
 .../apache/pinot/controller/api/resources/PinotTaskRestletResource.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git 
a/pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PinotTaskRestletResource.java
 
b/pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PinotTaskRestletResource.java
index f240234b03..ffda1ce99c 100644
--- 
a/pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PinotTaskRestletResource.java
+++ 
b/pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PinotTaskRestletResource.java
@@ -553,7 +553,7 @@ public class PinotTaskRestletResource {
 
   @GET
   @Path("/tasks/scheduler/jobDetails")
-  @Authorize(targetType = TargetType.TABLE, action = 
Actions.Table.GET_SCHEDULER_JOB_DETAILS)
+  @Authorize(targetType = TargetType.TABLE, paramName = "tableName", action = 
Actions.Table.GET_SCHEDULER_JOB_DETAILS)
   @Produces(MediaType.APPLICATION_JSON)
   @ApiOperation("Fetch job details for table tasks")
   public Map getCronSchedulerJobDetails(


-
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org



(pinot) branch master updated: Upgrade Pulsar to 3.2.2 (#12967)

2024-04-23 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new bc9e8ee541 Upgrade Pulsar to 3.2.2 (#12967)
bc9e8ee541 is described below

commit bc9e8ee5413c8611fe2be3ed6c3d7073e750d608
Author: Xiaotian (Jackie) Jiang <1751+jackie-ji...@users.noreply.github.com>
AuthorDate: Tue Apr 23 21:43:12 2024 -0700

Upgrade Pulsar to 3.2.2 (#12967)
---
 .../pinot-stream-ingestion/pinot-pulsar/pom.xml| 134 +++--
 .../pinot/plugin/stream/pulsar/PulsarUtils.java|  27 ++---
 .../plugin/stream/pulsar/PulsarConsumerTest.java   |   2 +-
 pinot-tools/pom.xml|  38 --
 pom.xml|  31 ++---
 5 files changed, 37 insertions(+), 195 deletions(-)

diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/pom.xml 
b/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/pom.xml
index f742f52f45..cb13fb9bba 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/pom.xml
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/pom.xml
@@ -37,137 +37,33 @@
   
 package
 ${basedir}/../../..
-0.16.0
-1.63.0
-1.63.0
-2.6.2
-1.17
+3.2.2
+1.19.7
   
 
   
-
-  org.testcontainers
-  pulsar
-  1.19.7
-  test
-
-
-  org.mockito
-  mockito-core
-  test
-
 
   org.apache.pulsar
-  pulsar-client-original
-  
-
-  commons-configuration
-  commons-configuration
-
-
-  org.bouncycastle
-  bcpkix-jdk15on
-
-
-  org.bouncycastle
-  bcprov-ext-jdk15on
-
-
-  org.eclipse.jetty
-  jetty-util
-
-  
+  pulsar-client
+  ${pulsar.version}
 
+
 
   org.apache.pulsar
-  pulsar-client-admin-original
-
-
-  org.glassfish.jersey.core
-  jersey-server
-
-
-  org.glassfish.jersey.containers
-  jersey-container-grizzly2-http
-
-
-  org.glassfish.jersey.containers
-  jersey-container-servlet-core
-
-
-  io.netty
-  netty-resolver
-
-
-  io.prometheus
-  simpleclient_common
-  ${simpleclient_common.version}
-
-
-  com.google.api.grpc
-  proto-google-common-protos
-
-
-  io.grpc
-  grpc-context
-  ${grpc-context.version}
-
-
-  io.grpc
-  grpc-protobuf-lite
-  ${grpc-protobuf-lite.version}
-
-
-  io.prometheus
-  simpleclient
-  ${simpleclient_common.version}
-
-
-  org.eclipse.jetty
-  jetty-server
-
-
-  org.eclipse.jetty
-  jetty-servlet
-
-
-  com.squareup.okio
-  okio
-
-
-  io.prometheus
-  simpleclient_hotspot
-  ${simpleclient_common.version}
-
-
-  org.codehaus.mojo
-  animal-sniffer-annotations
-  ${codehaus-annotations.version}
-
-
-  com.github.ben-manes.caffeine
-  caffeine
-  ${caffeine.version}
-
-
-  io.netty
-  netty-codec-socks
-
-
-  org.bouncycastle
-  bcpkix-jdk15to18
-
-
-  org.bouncycastle
-  bcprov-ext-jdk15to18
+  pulsar-client-admin
+  ${pulsar.version}
+  test
 
 
-  org.bouncycastle
-  bcprov-jdk15to18
+  org.testcontainers
+  pulsar
+  ${testcontainers.pulsar.version}
+  test
 
 
-  org.apache.pinot
-  pinot-spi
+  org.mockito
+  mockito-core
+  test
 
   
 
diff --git 
a/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/src/main/java/org/apache/pinot/plugin/stream/pulsar/PulsarUtils.java
 
b/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/src/main/java/org/apache/pinot/plugin/stream/pulsar/PulsarUtils.java
index 0ccacc3047..e1b7b50c21 100644
--- 
a/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/src/main/java/org/apache/pinot/plugin/stream/pulsar/PulsarUtils.java
+++ 
b/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/src/main/java/org/apache/pinot/plugin/stream/pulsar/PulsarUtils.java
@@ -22,6 +22,7 @@ import com.google.common.annotations.VisibleForTesting;
 import com.google.common.collect.Maps;
 import java.nio.ByteBuffer;
 import java.util.Base64;
+import java.util.BitSet;
 import java.util.Map;
 import java.util.Set;
 import org.apache.commons.lang3.StringUtils;
@@ -31,12 +32,11 @@ import org.apache.pinot.spi.stream.OffsetCriteria;
 import org.apache.pinot.spi.stream.StreamMessageMetadata;
 import org.apache.pulsar.client.api.Message;
 import org.apache.pulsar.client.api.MessageId;
+import org.apache.pulsar.client.api.MessageIdAdv;
 import org.apache.pulsar.client.api.Reader;
 import org.apache.pulsar.client.api.SubscriptionInitialPosition;
-import org.apache.pulsar.client.impl.BatchMessag

(pinot) branch master updated: Update metric rules to emit database as a label (#12739)

2024-04-04 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new 77a5672f6c Update metric rules to emit database as a label (#12739)
77a5672f6c is described below

commit 77a5672f6c42fe1e536230875a92ef4f27310e02
Author: Shounak kulkarni 
AuthorDate: Fri Apr 5 11:37:22 2024 +0500

Update metric rules to emit database as a label (#12739)
---
 .../jmx_prometheus_javaagent/configs/broker.yml| 227 ---
 .../configs/controller.yml | 289 +
 .../jmx_prometheus_javaagent/configs/minion.yml|  36 +-
 .../etc/jmx_prometheus_javaagent/configs/pinot.yml | 665 -
 .../jmx_prometheus_javaagent/configs/server.yml| 223 +++
 5 files changed, 829 insertions(+), 611 deletions(-)

diff --git 
a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/broker.yml 
b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/broker.yml
index ef2ab9f153..ce5e5df7b9 100644
--- a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/broker.yml
+++ b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/broker.yml
@@ -1,76 +1,90 @@
 rules:
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
-  name: "pinot_broker_authorization_$2"
+- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+  name: "pinot_broker_authorization_$4"
   cache: true
   labels:
-table: "$1"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
-  name: "pinot_broker_documentsScanned_$2"
+database: "$2"
+table: "$1$3"
+- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+  name: "pinot_broker_documentsScanned_$4"
   cache: true
   labels:
-table: "$1"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
-  name: "pinot_broker_entriesScannedInFilter_$2"
+database: "$2"
+table: "$1$3"
+- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+  name: "pinot_broker_entriesScannedInFilter_$4"
   cache: true
   labels:
-table: "$1"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
-  name: "pinot_broker_entriesScannedPostFilter_$2"
+database: "$2"
+table: "$1$3"
+- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+  name: "pinot_broker_entriesScannedPostFilter_$4"
   cache: true
   labels:
-table: "$1"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
-  name: "pinot_broker_freshnessLagMs_$2"
+database: "$2"
+table: "$1$3"
+- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+  name: "pinot_broker_freshnessLagMs_$4"
   cache: true
   labels:
-table: "$1"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
-  name: "pinot_broker_queries_$2"
+database: "$2"
+table: "$1$3"
+- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+  name: "pinot_broker_queries_$4"
   cache: true
   labels:
-table: "$1"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
-  name: "pinot_broker_queryExecution_$2"
+database: "$2"
+table: "$1$3"
+- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+  name: "pinot_broker_queryExecution_$4"
   cache: true
   labels:
-table: "$1"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
-  name: "pinot_broker_queryRouting_$2"
+database: "$2"
+table: "$1$3"
+- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+  name: "pinot_broker_queryRouting_$4"
   cache: true
   labels:
-table: "$1"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
-  name: "pinot_broker_reduce_$2"
+database: "$2"
+table: "$1$3"
+- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+  name: "pinot_broker_reduce_$4"
   cache: true
   labels:
-table: "$1"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
-  name: "pinot_broker_requestCompilation_$2"
+database: "$2"
+table: "$1$3"
+- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)&qu

(pinot) branch master updated: Make task manager APIs database aware (#12766)

2024-04-03 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new 11d64d08f5 Make task manager APIs database aware (#12766)
11d64d08f5 is described below

commit 11d64d08f5f536d13cd449bf6ffe6f36f463e7bb
Author: Shounak kulkarni 
AuthorDate: Wed Apr 3 18:58:54 2024 +0500

Make task manager APIs database aware (#12766)

* Deprecate getAllTables() in favour of getAllTables(databaseName)

* Expect database context for scheduling tasks

* test fixes
---
 .../api/resources/PinotTaskRestletResource.java|  6 ++--
 .../helix/core/minion/PinotTaskManager.java| 33 ++
 2 files changed, 32 insertions(+), 7 deletions(-)

diff --git 
a/pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PinotTaskRestletResource.java
 
b/pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PinotTaskRestletResource.java
index f6b38fdc88..fbee62fc71 100644
--- 
a/pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PinotTaskRestletResource.java
+++ 
b/pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PinotTaskRestletResource.java
@@ -104,6 +104,7 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import static org.apache.pinot.spi.utils.CommonConstants.DATABASE;
+import static org.apache.pinot.spi.utils.CommonConstants.DEFAULT_DATABASE;
 import static 
org.apache.pinot.spi.utils.CommonConstants.SWAGGER_AUTHORIZATION_KEY;
 
 
@@ -618,17 +619,18 @@ public class PinotTaskRestletResource {
   public Map scheduleTasks(@ApiParam(value = "Task type") 
@QueryParam("taskType") String taskType,
   @ApiParam(value = "Table name (with type suffix)") 
@QueryParam("tableName") String tableName,
   @Context HttpHeaders headers) {
+String database = headers != null ? headers.getHeaderString(DATABASE) : 
DEFAULT_DATABASE;
 if (taskType != null) {
   // Schedule task for the given task type
   String taskName = tableName != null
   ? _pinotTaskManager.scheduleTask(taskType, 
DatabaseUtils.translateTableName(tableName, headers))
-  : _pinotTaskManager.scheduleTask(taskType);
+  : _pinotTaskManager.scheduleTask(taskType, database);
   return Collections.singletonMap(taskType, taskName);
 } else {
   // Schedule tasks for all task types
   return tableName != null
   ? 
_pinotTaskManager.scheduleTasks(DatabaseUtils.translateTableName(tableName, 
headers))
-  : _pinotTaskManager.scheduleTasks();
+  : _pinotTaskManager.scheduleTasksForDatabase(database);
 }
   }
 
diff --git 
a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/PinotTaskManager.java
 
b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/PinotTaskManager.java
index d2086dfb1c..f656108cb7 100644
--- 
a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/PinotTaskManager.java
+++ 
b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/PinotTaskManager.java
@@ -479,11 +479,22 @@ public class PinotTaskManager extends 
ControllerPeriodicTask {
   }
 
   /**
-   * Public API to schedule tasks (all task types) for all tables. It might be 
called from the non-leader controller.
+   * Public API to schedule tasks (all task types) for all tables in default 
database.
+   * It might be called from the non-leader controller.
* Returns a map from the task type to the task scheduled.
*/
+  @Deprecated
   public synchronized Map scheduleTasks() {
-return scheduleTasks(_pinotHelixResourceManager.getAllTables(), false);
+return 
scheduleTasks(_pinotHelixResourceManager.getAllTables(CommonConstants.DEFAULT_DATABASE),
 false);
+  }
+
+  /**
+   * Public API to schedule tasks (all task types) for all tables in given 
database.
+   * It might be called from the non-leader controller.
+   * Returns a map from the task type to the task scheduled.
+   */
+  public synchronized Map scheduleTasksForDatabase(String 
database) {
+return scheduleTasks(_pinotHelixResourceManager.getAllTables(database), 
false);
   }
 
   /**
@@ -597,17 +608,29 @@ public class PinotTaskManager extends 
ControllerPeriodicTask {
   }
 
   /**
-   * Public API to schedule task for the given task type. It might be called 
from the non-leader controller. Returns the
-   * task name, or {@code null} if no task is scheduled.
+   * Public API to schedule task for the given task type in default database.
+   * It might be called from the non-leader controller.
+   * Returns the task name, or {@code null} if no task is scheduled.
*/
+  @Deprecated
   @Nullable
   public synchronized String scheduleTask(String taskType) {
+return scheduleTaskForDatabase(ta

(pinot) branch master updated: Deprecate PinotHelixResourceManager#getAllTables() in favour of getAllTables(String databaseName) (#12782)

2024-04-03 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new eb8fbba11c Deprecate PinotHelixResourceManager#getAllTables() in 
favour of getAllTables(String databaseName) (#12782)
eb8fbba11c is described below

commit eb8fbba11c6c1d466d0d084b9cbca62556fcdcf3
Author: Shounak kulkarni 
AuthorDate: Wed Apr 3 16:57:33 2024 +0500

Deprecate PinotHelixResourceManager#getAllTables() in favour of 
getAllTables(String databaseName) (#12782)

* Deprecate getAllTables() in favour of getAllTables(databaseName)

* Adapt getAllTables() consumers to iterate over all tables across databases

* mock fixes
---
 .../pinot/controller/BaseControllerStarter.java| 118 +++--
 .../helix/core/PinotHelixResourceManager.java  |   1 +
 .../core/cleanup/StaleInstancesCleanupTask.java|   9 +-
 .../core/periodictask/ControllerPeriodicTask.java  |   5 +-
 .../helix/RealtimeConsumerMonitorTest.java |   5 +-
 .../controller/helix/SegmentStatusCheckerTest.java |  45 ++--
 .../periodictask/ControllerPeriodicTaskTest.java   |   6 +-
 .../helix/core/retention/RetentionManagerTest.java |   5 +-
 8 files changed, 118 insertions(+), 76 deletions(-)

diff --git 
a/pinot-controller/src/main/java/org/apache/pinot/controller/BaseControllerStarter.java
 
b/pinot-controller/src/main/java/org/apache/pinot/controller/BaseControllerStarter.java
index 0f071d0895..f78a49f2f9 100644
--- 
a/pinot-controller/src/main/java/org/apache/pinot/controller/BaseControllerStarter.java
+++ 
b/pinot-controller/src/main/java/org/apache/pinot/controller/BaseControllerStarter.java
@@ -587,64 +587,66 @@ public abstract class BaseControllerStarter implements 
ServiceStartable {
 AtomicInteger failedToUpdateTableConfigCount = new AtomicInteger();
 ZkHelixPropertyStore propertyStore = 
_helixResourceManager.getPropertyStore();
 
-List allTables = _helixResourceManager.getAllTables();
-allTables.forEach(tableNameWithType -> {
-  Pair tableConfigWithVersion =
-  ZKMetadataProvider.getTableConfigWithVersion(propertyStore, 
tableNameWithType);
-  if (tableConfigWithVersion == null) {
-// This might due to table deletion, just log it here.
-LOGGER.warn("Failed to find table config for table: {}, the table 
likely already got deleted",
-tableNameWithType);
-return;
-  }
-  TableConfig tableConfig = tableConfigWithVersion.getLeft();
-  String rawTableName = 
TableNameBuilder.extractRawTableName(tableNameWithType);
-  String schemaPath = 
ZKMetadataProvider.constructPropertyStorePathForSchema(rawTableName);
-  boolean schemaExists = propertyStore.exists(schemaPath, 
AccessOption.PERSISTENT);
-  String existSchemaName = 
tableConfig.getValidationConfig().getSchemaName();
-  if (existSchemaName == null || existSchemaName.equals(rawTableName)) {
-// Although the table config is valid, we still need to ensure the 
schema exists
-if (!schemaExists) {
-  LOGGER.warn("Failed to find schema for table: {}", 
tableNameWithType);
-  tableWithoutSchemaCount.getAndIncrement();
-  return;
-}
-// Table config is already in good status
-return;
-  }
-  misconfiguredTableCount.getAndIncrement();
-  if (schemaExists) {
-// If a schema named `rawTableName` already exists, then likely this 
is a misconfiguration.
-// Reset schema name in table config to null to let the table point to 
the existing schema.
-LOGGER.warn("Schema: {} already exists, fix the schema name in table 
config from {} to null", rawTableName,
-existSchemaName);
-  } else {
-// Copy the schema current table referring to to `rawTableName` if it 
does not exist
-Schema schema = _helixResourceManager.getSchema(existSchemaName);
-if (schema == null) {
-  LOGGER.warn("Failed to find schema: {} for table: {}", 
existSchemaName, tableNameWithType);
-  tableWithoutSchemaCount.getAndIncrement();
-  return;
-}
-schema.setSchemaName(rawTableName);
-if (propertyStore.create(schemaPath, SchemaUtils.toZNRecord(schema), 
AccessOption.PERSISTENT)) {
-  LOGGER.info("Copied schema: {} to {}", existSchemaName, 
rawTableName);
-} else {
-  LOGGER.warn("Failed to copy schema: {} to {}", existSchemaName, 
rawTableName);
-  failedToCopySchemaCount.getAndIncrement();
-  return;
-}
-  }
-  // Update table config to remove schema name
-  tableConfig.getValidationConfig().setSchemaName(null);
-  if (ZKMetadataProvider.setTableConfig(propertyStore, tableConfig, 
tableConfigWithVersion.getRight())) {

(pinot) branch master updated: Bug fix: Do not ignore scheme property (#12332)

2024-02-13 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new b38fcee62c Bug fix: Do not ignore scheme property (#12332)
b38fcee62c is described below

commit b38fcee62cda17797dc1528f6b973908a18dab0b
Author: Kartik Khare 
AuthorDate: Wed Feb 14 11:44:42 2024 +0530

Bug fix: Do not ignore scheme property (#12332)

Co-authored-by: Kartik Khare 

---
 .../pinot/client/JsonAsyncHttpPinotClientTransportFactory.java   | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git 
a/pinot-clients/pinot-java-client/src/main/java/org/apache/pinot/client/JsonAsyncHttpPinotClientTransportFactory.java
 
b/pinot-clients/pinot-java-client/src/main/java/org/apache/pinot/client/JsonAsyncHttpPinotClientTransportFactory.java
index 6f4e390599..3471796d28 100644
--- 
a/pinot-clients/pinot-java-client/src/main/java/org/apache/pinot/client/JsonAsyncHttpPinotClientTransportFactory.java
+++ 
b/pinot-clients/pinot-java-client/src/main/java/org/apache/pinot/client/JsonAsyncHttpPinotClientTransportFactory.java
@@ -85,8 +85,9 @@ public class JsonAsyncHttpPinotClientTransportFactory 
implements PinotClientTran
   _headers = ConnectionUtils.getHeadersFromProperties(properties);
 }
 
-if (_scheme == null) {
-  _scheme = properties.getProperty("scheme", 
CommonConstants.HTTP_PROTOCOL);
+String scheme = properties.getProperty("scheme", 
CommonConstants.HTTP_PROTOCOL);
+if (_scheme == null || !_scheme.contentEquals(scheme)) {
+  _scheme = scheme;
 }
 
 if (_sslContext == null && 
_scheme.contentEquals(CommonConstants.HTTPS_PROTOCOL)) {


-
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org



(pinot) branch master updated: Adding bytes string data type integration tests (#12387)

2024-02-13 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new 04dd57924b Adding bytes string data type integration tests (#12387)
04dd57924b is described below

commit 04dd57924b45cf536470c827f4b56584d76df058
Author: Xiang Fu 
AuthorDate: Tue Feb 13 22:14:22 2024 -0800

Adding bytes string data type integration tests (#12387)
---
 .../common/function/scalar/StringFunctions.java|  10 +
 .../integration/tests/custom/BytesTypeTest.java| 314 +
 2 files changed, 324 insertions(+)

diff --git 
a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java
 
b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java
index 5a49314943..22f312a8b8 100644
--- 
a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java
+++ 
b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java
@@ -48,6 +48,7 @@ public class StringFunctions {
   private final static Pattern LTRIM = Pattern.compile("^\\s+");
   private final static Pattern RTRIM = Pattern.compile("\\s+$");
 
+
   /**
* @see StringUtils#reverse(String)
* @param input
@@ -495,6 +496,15 @@ public class StringFunctions {
 return input.getBytes(StandardCharsets.US_ASCII);
   }
 
+  /**
+   * @param input bytes
+   * @return ASCII encoded string
+   */
+  @ScalarFunction
+  public static String fromAscii(byte[] input) {
+return new String(input, StandardCharsets.US_ASCII);
+  }
+
   /**
* @param input UUID as string
* @return bytearray
diff --git 
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/custom/BytesTypeTest.java
 
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/custom/BytesTypeTest.java
new file mode 100644
index 00..8e3f18c30d
--- /dev/null
+++ 
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/custom/BytesTypeTest.java
@@ -0,0 +1,314 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.integration.tests.custom;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.google.common.collect.ImmutableList;
+import java.io.File;
+import java.nio.ByteBuffer;
+import java.util.Base64;
+import java.util.UUID;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.commons.codec.binary.Hex;
+import org.apache.pinot.common.function.scalar.DataTypeConversionFunctions;
+import org.apache.pinot.common.function.scalar.StringFunctions;
+import org.apache.pinot.spi.data.FieldSpec;
+import org.apache.pinot.spi.data.Schema;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+
+@Test(suiteName = "CustomClusterIntegrationTest")
+public class BytesTypeTest extends CustomDataQueryClusterIntegrationTest {
+
+  protected static final String DEFAULT_TABLE_NAME = "BytesTypeTest";
+  private static final String FIXED_HEX_STRIING_VALUE = 
"968a3c6a5eeb42168bae0e895034a26f";
+
+  private static final int NUM_TOTAL_DOCS = 1000;
+  private static final String HEX_STR = "hexStr";
+  private static final String HEX_BYTES = "hexBytes";
+  private static final String UUID_STR = "uuidStr";
+  private static final String UUID_BYTES = "uuidBytes";
+  private static final String UTF8_STR = "utf8Str";
+  private static final String UTF8_BYTES = "utf8Bytes";
+  private static final String ASCII_STR = "asciiStr";
+  private static final String ASCII_BYTES = "asciiBytes";
+  private static final String BASE64_STR = "base64Str";
+  private static final String BASE64_BYTES = "base64Bytes";
+  private static final String RANDOM_STR = "randomStr";
+  private static final String RANDOM_BYTES = "randomBytes";
+  private static final String FIXED_STRING = &

(pinot) branch master updated: Enhancing metadata API to return upsert partition to primary key count map for both controller and server APIs (#12334)

2024-02-08 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new 947b47e3f4 Enhancing metadata API to return upsert partition to 
primary key count map for both controller and server APIs (#12334)
947b47e3f4 is described below

commit 947b47e3f49bb7434dbb6f47b6c538fa91c61084
Author: 9aman <35227405+9a...@users.noreply.github.com>
AuthorDate: Fri Feb 9 11:42:03 2024 +0530

Enhancing metadata API to return upsert partition to primary key count map 
for both controller and server APIs (#12334)
---
 .../common/restlet/resources/TableMetadataInfo.java | 10 +-
 .../controller/util/ServerSegmentMetadataReader.java| 11 ++-
 .../data/manager/realtime/RealtimeTableDataManager.java | 12 
 .../tests/models/DummyTableUpsertMetadataManager.java   |  6 ++
 .../upsert/ConcurrentMapTableUpsertMetadataManager.java | 10 ++
 .../local/upsert/TableUpsertMetadataManager.java|  8 
 .../pinot/server/api/resources/TablesResource.java  | 17 -
 7 files changed, 71 insertions(+), 3 deletions(-)

diff --git 
a/pinot-common/src/main/java/org/apache/pinot/common/restlet/resources/TableMetadataInfo.java
 
b/pinot-common/src/main/java/org/apache/pinot/common/restlet/resources/TableMetadataInfo.java
index 27e28ab376..4a6953ac2c 100644
--- 
a/pinot-common/src/main/java/org/apache/pinot/common/restlet/resources/TableMetadataInfo.java
+++ 
b/pinot-common/src/main/java/org/apache/pinot/common/restlet/resources/TableMetadataInfo.java
@@ -43,6 +43,7 @@ public class TableMetadataInfo {
   private final Map _columnCardinalityMap;
   private final Map _maxNumMultiValuesMap;
   private final Map> _columnIndexSizeMap;
+  private final Map> 
_upsertPartitionToServerPrimaryKeyCountMap;
 
   @JsonCreator
   public TableMetadataInfo(@JsonProperty("tableName") String tableName,
@@ -50,7 +51,9 @@ public class TableMetadataInfo {
   @JsonProperty("numRows") long numRows, @JsonProperty("columnLengthMap") 
Map columnLengthMap,
   @JsonProperty("columnCardinalityMap") Map 
columnCardinalityMap,
   @JsonProperty("maxNumMultiValuesMap") Map 
maxNumMultiValuesMap,
-  @JsonProperty("columnIndexSizeMap") Map> 
columnIndexSizeMap) {
+  @JsonProperty("columnIndexSizeMap") Map> 
columnIndexSizeMap,
+  @JsonProperty("upsertPartitionToServerPrimaryKeyCountMap")
+  Map> 
upsertPartitionToServerPrimaryKeyCountMap) {
 _tableName = tableName;
 _diskSizeInBytes = sizeInBytes;
 _numSegments = numSegments;
@@ -59,6 +62,7 @@ public class TableMetadataInfo {
 _columnCardinalityMap = columnCardinalityMap;
 _maxNumMultiValuesMap = maxNumMultiValuesMap;
 _columnIndexSizeMap = columnIndexSizeMap;
+_upsertPartitionToServerPrimaryKeyCountMap = 
upsertPartitionToServerPrimaryKeyCountMap;
   }
 
   public String getTableName() {
@@ -92,4 +96,8 @@ public class TableMetadataInfo {
   public Map> getColumnIndexSizeMap() {
 return _columnIndexSizeMap;
   }
+
+  public Map> 
getUpsertPartitionToServerPrimaryKeyCountMap() {
+return _upsertPartitionToServerPrimaryKeyCountMap;
+  }
 }
diff --git 
a/pinot-controller/src/main/java/org/apache/pinot/controller/util/ServerSegmentMetadataReader.java
 
b/pinot-controller/src/main/java/org/apache/pinot/controller/util/ServerSegmentMetadataReader.java
index f728d51635..cdcc7dc78c 100644
--- 
a/pinot-controller/src/main/java/org/apache/pinot/controller/util/ServerSegmentMetadataReader.java
+++ 
b/pinot-controller/src/main/java/org/apache/pinot/controller/util/ServerSegmentMetadataReader.java
@@ -112,6 +112,7 @@ public class ServerSegmentMetadataReader {
 final Map columnCardinalityMap = new HashMap<>();
 final Map maxNumMultiValuesMap = new HashMap<>();
 final Map> columnIndexSizeMap = new 
HashMap<>();
+final Map> 
upsertPartitionToServerPrimaryKeyCountMap = new HashMap<>();
 for (Map.Entry streamResponse : 
serviceResponse._httpResponses.entrySet()) {
   try {
 TableMetadataInfo tableMetadataInfo =
@@ -128,6 +129,14 @@ public class ServerSegmentMetadataReader {
   }
   return l;
 }));
+
tableMetadataInfo.getUpsertPartitionToServerPrimaryKeyCountMap().forEach(
+(partition, serverToPrimaryKeyCount) -> 
upsertPartitionToServerPrimaryKeyCountMap.merge(partition,
+new HashMap<>(serverToPrimaryKeyCount), (l, r) -> {
+  for (Map.Entry serverToPKCount : r.entrySet()) 
{
+l.merge(serverToPKCount.getKey(), 
serverToPKCount.getValue(), Long::sum);
+  }
+  return l;
+}));
   } catch (IOException e) {

(pinot) branch master updated: Make server resource classes configurable (#12324)

2024-01-27 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new 23dbb08e8d Make server resource classes configurable (#12324)
23dbb08e8d is described below

commit 23dbb08e8d6dcfc0d034e0145986d6b9df491ea2
Author: Kartik Khare 
AuthorDate: Sun Jan 28 01:41:54 2024 +0530

Make server resource classes configurable (#12324)

* Make server resource classes configurable

* Update 
pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java

Co-authored-by: Xiaotian (Jackie) Jiang 
<1751+jackie-ji...@users.noreply.github.com>

* fix variable name

* Fix imports

-

Co-authored-by: Kartik Khare 
Co-authored-by: Xiaotian (Jackie) Jiang 
<1751+jackie-ji...@users.noreply.github.com>
---
 .../java/org/apache/pinot/server/api/AdminApiApplication.java  | 10 +++---
 .../main/java/org/apache/pinot/spi/utils/CommonConstants.java  |  2 ++
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git 
a/pinot-server/src/main/java/org/apache/pinot/server/api/AdminApiApplication.java
 
b/pinot-server/src/main/java/org/apache/pinot/server/api/AdminApiApplication.java
index a590449f55..ae0fb5e2d0 100644
--- 
a/pinot-server/src/main/java/org/apache/pinot/server/api/AdminApiApplication.java
+++ 
b/pinot-server/src/main/java/org/apache/pinot/server/api/AdminApiApplication.java
@@ -53,17 +53,21 @@ import org.slf4j.LoggerFactory;
 public class AdminApiApplication extends ResourceConfig {
   private static final Logger LOGGER = 
LoggerFactory.getLogger(AdminApiApplication.class);
   public static final String PINOT_CONFIGURATION = "pinotConfiguration";
-  public static final String RESOURCE_PACKAGE = 
"org.apache.pinot.server.api.resources";
   public static final String SERVER_INSTANCE_ID = "serverInstanceId";
 
   private final AtomicBoolean _shutDownInProgress = new AtomicBoolean();
   private final ServerInstance _serverInstance;
   private HttpServer _httpServer;
+  private final String _adminApiResourcePackages;
+
 
   public AdminApiApplication(ServerInstance instance, AccessControlFactory 
accessControlFactory,
   PinotConfiguration serverConf) {
 _serverInstance = instance;
-packages(RESOURCE_PACKAGE);
+
+_adminApiResourcePackages = 
serverConf.getProperty(CommonConstants.Server.CONFIG_OF_SERVER_RESOURCE_PACKAGES,
+CommonConstants.Server.DEFAULT_SERVER_RESOURCE_PACKAGES);
+packages(_adminApiResourcePackages);
 property(PINOT_CONFIGURATION, serverConf);
 
 register(new AbstractBinder() {
@@ -132,7 +136,7 @@ public class AdminApiApplication extends ResourceConfig {
   beanConfig.setSchemes(new String[]{CommonConstants.HTTP_PROTOCOL, 
CommonConstants.HTTPS_PROTOCOL});
 }
 beanConfig.setBasePath("/");
-beanConfig.setResourcePackage(RESOURCE_PACKAGE);
+beanConfig.setResourcePackage(_adminApiResourcePackages);
 beanConfig.setScan(true);
 try {
   beanConfig.setHost(InetAddress.getLocalHost().getHostName());
diff --git 
a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java 
b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java
index 190de27151..368e194a39 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java
@@ -568,6 +568,8 @@ public class CommonConstants {
 public static final String CONFIG_OF_SWAGGER_USE_HTTPS = 
"pinot.server.swagger.use.https";
 public static final String CONFIG_OF_ADMIN_API_PORT = 
"pinot.server.adminapi.port";
 public static final int DEFAULT_ADMIN_API_PORT = 8097;
+public static final String CONFIG_OF_SERVER_RESOURCE_PACKAGES = 
"server.restlet.api.resource.packages";
+public static final String DEFAULT_SERVER_RESOURCE_PACKAGES = 
"org.apache.pinot.server.api.resources";
 
 public static final String CONFIG_OF_SEGMENT_FORMAT_VERSION = 
"pinot.server.instance.segment.format.version";
 public static final String CONFIG_OF_REALTIME_OFFHEAP_ALLOCATION = 
"pinot.server.instance.realtime.alloc.offheap";


-
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org



(pinot) branch master updated: Use higher fetch timeout for Kinesis (#12214)

2024-01-22 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new ced6bc282e Use higher fetch timeout for Kinesis (#12214)
ced6bc282e is described below

commit ced6bc282ea9049f45f59f99738e5f1132a03a18
Author: Kartik Khare 
AuthorDate: Mon Jan 22 22:16:02 2024 +0530

Use higher fetch timeout for Kinesis (#12214)

* Use higher fetch timeout for Kinesis

* Add todo

* Add test

-

Co-authored-by: Kartik Khare 
Co-authored-by: Kartik Khare 

---
 .../core/realtime/stream/StreamConfigTest.java | 27 ++
 .../org/apache/pinot/spi/stream/StreamConfig.java  |  7 +-
 2 files changed, 33 insertions(+), 1 deletion(-)

diff --git 
a/pinot-core/src/test/java/org/apache/pinot/core/realtime/stream/StreamConfigTest.java
 
b/pinot-core/src/test/java/org/apache/pinot/core/realtime/stream/StreamConfigTest.java
index 333eecab04..11c7ee2010 100644
--- 
a/pinot-core/src/test/java/org/apache/pinot/core/realtime/stream/StreamConfigTest.java
+++ 
b/pinot-core/src/test/java/org/apache/pinot/core/realtime/stream/StreamConfigTest.java
@@ -403,4 +403,31 @@ public class StreamConfigTest {
   // expected
 }
   }
+
+  @Test
+  public void testKinesisFetchTimeout() {
+String streamType = "fakeStream";
+String topic = "fakeTopic";
+String tableName = "fakeTable_REALTIME";
+String consumerFactoryClass = "KinesisConsumerFactory";
+String decoderClass = FakeStreamMessageDecoder.class.getName();
+
+Map streamConfigMap = new HashMap<>();
+streamConfigMap.put(StreamConfigProperties.STREAM_TYPE, streamType);
+streamConfigMap.put(
+StreamConfigProperties.constructStreamProperty(streamType, 
StreamConfigProperties.STREAM_TOPIC_NAME), topic);
+
streamConfigMap.put(StreamConfigProperties.constructStreamProperty(streamType,
+StreamConfigProperties.STREAM_CONSUMER_FACTORY_CLASS), 
consumerFactoryClass);
+streamConfigMap.put(
+StreamConfigProperties.constructStreamProperty(streamType, 
StreamConfigProperties.STREAM_DECODER_CLASS),
+decoderClass);
+
+String consumerType = "simple";
+streamConfigMap.put(
+StreamConfigProperties.constructStreamProperty(streamType, 
StreamConfigProperties.STREAM_CONSUMER_TYPES),
+consumerType);
+StreamConfig streamConfig = new StreamConfig(tableName, streamConfigMap);
+
+assertEquals(streamConfig.getFetchTimeoutMillis(), 
StreamConfig.DEFAULT_STREAM_FETCH_TIMEOUT_MILLIS_KINESIS);
+  }
 }
diff --git 
a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamConfig.java 
b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamConfig.java
index 94f8adf566..ea24f5d01b 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamConfig.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamConfig.java
@@ -48,6 +48,7 @@ public class StreamConfig {
 
   public static final long DEFAULT_STREAM_CONNECTION_TIMEOUT_MILLIS = 30_000;
   public static final int DEFAULT_STREAM_FETCH_TIMEOUT_MILLIS = 5_000;
+  public static final int DEFAULT_STREAM_FETCH_TIMEOUT_MILLIS_KINESIS = 
600_000;
   public static final int DEFAULT_IDLE_TIMEOUT_MILLIS = 3 * 60 * 1000;
 
   private static final double CONSUMPTION_RATE_LIMIT_NOT_SPECIFIED = -1;
@@ -142,7 +143,11 @@ public class StreamConfig {
 }
 _connectionTimeoutMillis = connectionTimeoutMillis;
 
-int fetchTimeoutMillis = DEFAULT_STREAM_FETCH_TIMEOUT_MILLIS;
+// For Kinesis, we need to set a higher fetch timeout to avoid getting 
stuck in empty records loop
+// TODO: Remove this once we have a better way to handle empty records in 
Kinesis
+int fetchTimeoutMillis =
+_consumerFactoryClassName.contains("KinesisConsumerFactory") ? 
DEFAULT_STREAM_FETCH_TIMEOUT_MILLIS_KINESIS
+: DEFAULT_STREAM_FETCH_TIMEOUT_MILLIS;
 String fetchTimeoutKey =
 StreamConfigProperties.constructStreamProperty(_type, 
StreamConfigProperties.STREAM_FETCH_TIMEOUT_MILLIS);
 String fetchTimeoutValue = streamConfigMap.get(fetchTimeoutKey);


-
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org



(pinot) branch master updated: Expose metric for table rebalance (#12270)

2024-01-19 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new 894e56e0a2 Expose metric for table rebalance (#12270)
894e56e0a2 is described below

commit 894e56e0a2f3623ed21661caea749d065348256f
Author: Prashant Pandey <84911643+suddend...@users.noreply.github.com>
AuthorDate: Fri Jan 19 17:42:30 2024 +0530

Expose metric for table rebalance (#12270)
---
 .../etc/jmx_prometheus_javaagent/configs/controller.yml  | 12 
 .../org/apache/pinot/common/metrics/ControllerGauge.java |  4 +++-
 .../apache/pinot/controller/helix/SegmentStatusChecker.java  |  1 +
 .../helix/core/rebalance/ZkBasedTableRebalanceObserver.java  |  8 
 .../core/rebalance/TestZkBasedTableRebalanceObserver.java|  3 +++
 5 files changed, 15 insertions(+), 13 deletions(-)

diff --git 
a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml 
b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml
index c5c9809d2e..45ff802de3 100644
--- a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml
+++ b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml
@@ -204,18 +204,6 @@ rules:
   cache: true
   labels:
 version: "$2"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
-  name: "pinot_controller_tableConsumptionPaused_$3"
-  cache: true
-  labels:
-tableName: "$1"
-tableType: "$2"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
-  name: "pinot_controller_tableDisabled_$3"
-  cache: true
-  labels:
-tableName: "$1"
-tableType: "$2"
 
   ## Metrics that fit the catch-all patterns above should not be added to this 
file.
   ## In case a metric does not fit the catch-all patterns, add them before 
this comment
diff --git 
a/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java
 
b/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java
index 3444ffae5f..ca8c141447 100644
--- 
a/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java
+++ 
b/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java
@@ -153,7 +153,9 @@ public enum ControllerGauge implements 
AbstractMetrics.Gauge {
 
   TABLE_CONSUMPTION_PAUSED("tableConsumptionPaused", false),
 
-  TABLE_DISABLED("tableDisabled", false);
+  TABLE_DISABLED("tableDisabled", false),
+
+  TABLE_REBALANCE_IN_PROGRESS("tableRebalanceInProgress", false);
 
   private final String _gaugeName;
   private final String _unit;
diff --git 
a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/SegmentStatusChecker.java
 
b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/SegmentStatusChecker.java
index 617564757e..d0af31044f 100644
--- 
a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/SegmentStatusChecker.java
+++ 
b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/SegmentStatusChecker.java
@@ -363,6 +363,7 @@ public class SegmentStatusChecker extends 
ControllerPeriodicTask> 
currentState,
   Map> targetState) {
 boolean updatedStatsInZk = false;
+_controllerMetrics.setValueOfTableGauge(_tableNameWithType, 
ControllerGauge.TABLE_REBALANCE_IN_PROGRESS, 1);
 switch (trigger) {
   case START_TRIGGER:
 updateOnStart(currentState, targetState);
@@ -119,6 +125,7 @@ public class ZkBasedTableRebalanceObserver implements 
TableRebalanceObserver {
   public void onSuccess(String msg) {
 Preconditions.checkState(RebalanceResult.Status.DONE != 
_tableRebalanceProgressStats.getStatus(),
 "Table Rebalance already completed");
+_controllerMetrics.setValueOfTableGauge(_tableNameWithType, 
ControllerGauge.TABLE_REBALANCE_IN_PROGRESS, 0);
 long timeToFinishInSeconds = (System.currentTimeMillis() - 
_tableRebalanceProgressStats.getStartTimeMs()) / 1000L;
 _tableRebalanceProgressStats.setCompletionStatusMsg(msg);
 
_tableRebalanceProgressStats.setTimeToFinishInSeconds(timeToFinishInSeconds);
@@ -132,6 +139,7 @@ public class ZkBasedTableRebalanceObserver implements 
TableRebalanceObserver {
 
   @Override
   public void onError(String errorMsg) {
+_controllerMetrics.setValueOfTableGauge(_tableNameWithType, 
ControllerGauge.TABLE_REBALANCE_IN_PROGRESS, 0);
 long timeToFinishInSeconds = (System.currentTimeMillis() - 
_tableRebalanceProgressStats.getStartTimeMs()) / 1000;
 
_tableRebalanceProgressStats.setTimeToFinishInSeconds(timeToFinishInSeconds);
 _tableRebalanceProgressStats.setStatus(RebalanceResult.Status.FAILED);
diff --git 
a/pinot-controller/src/test/java/org/apache/pinot/controller/heli

(pinot) branch master updated: Allow server level configuration for Upsert metadata class (#11851)

2024-01-17 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new 21f3d283d4 Allow server level configuration for Upsert metadata class 
(#11851)
21f3d283d4 is described below

commit 21f3d283d42e1b4f5b16f3ef84549eb34b8b7031
Author: Kartik Khare 
AuthorDate: Wed Jan 17 16:40:47 2024 +0530

Allow server level configuration for Upsert metadata class (#11851)

* Allow server level configuration for Upsert metadata class

* Fix null pointer exceptions

* Add tests

* Fix tests

* fix tests

* Fix tests

* Move logic to parse upsert configs from instance data manager to table 
upsert factory

* Address review comments

-

Co-authored-by: Kartik Khare 

---
 .../manager/realtime/RealtimeTableDataManager.java |   8 +-
 .../tests/UpsertTableIntegrationTest.java  | 121 -
 .../UpsertTableSegmentPreloadIntegrationTest.java  |  11 +-
 .../models/DummyTableUpsertMetadataManager.java| 115 
 .../upsert/BaseTableUpsertMetadataManager.java |   1 +
 .../upsert/TableUpsertMetadataManagerFactory.java  |  28 -
 .../MutableSegmentImplUpsertComparisonColTest.java |   3 +-
 .../mutable/MutableSegmentImplUpsertTest.java  |   3 +-
 .../helix/HelixInstanceDataManagerConfig.java  |   7 ++
 .../config/instance/InstanceDataManagerConfig.java |   2 +
 10 files changed, 265 insertions(+), 34 deletions(-)

diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeTableDataManager.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeTableDataManager.java
index 60cd58199f..333be09b0c 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeTableDataManager.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeTableDataManager.java
@@ -207,7 +207,8 @@ public class RealtimeTableDataManager extends 
BaseTableDataManager {
   Preconditions.checkState(schema != null, "Failed to find schema for 
table: %s", _tableNameWithType);
   // NOTE: Set _tableUpsertMetadataManager before initializing it because 
when preloading is enabled, we need to
   //   load segments into it
-  _tableUpsertMetadataManager = 
TableUpsertMetadataManagerFactory.create(tableConfig);
+  _tableUpsertMetadataManager = 
TableUpsertMetadataManagerFactory.create(tableConfig,
+  
_tableDataManagerConfig.getInstanceDataManagerConfig().getUpsertConfigs());
   _tableUpsertMetadataManager.init(tableConfig, schema, this, 
_helixManager, _segmentPreloadExecutor);
 }
 
@@ -697,6 +698,11 @@ public class RealtimeTableDataManager extends 
BaseTableDataManager {
 return _instanceId;
   }
 
+  @VisibleForTesting
+  public TableUpsertMetadataManager getTableUpsertMetadataManager() {
+return _tableUpsertMetadataManager;
+  }
+
   /**
* Validate a schema against the table config for real-time record 
consumption.
* Ideally, we should validate these things when schema is added or table is 
created, but either of these
diff --git 
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/UpsertTableIntegrationTest.java
 
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/UpsertTableIntegrationTest.java
index e140020a39..634390effc 100644
--- 
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/UpsertTableIntegrationTest.java
+++ 
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/UpsertTableIntegrationTest.java
@@ -18,20 +18,36 @@
  */
 package org.apache.pinot.integration.tests;
 
+import com.google.common.base.Joiner;
 import com.google.common.collect.ImmutableList;
 import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import javax.annotation.Nullable;
 import org.apache.commons.io.FileUtils;
+import org.apache.commons.lang.StringUtils;
+import org.apache.helix.HelixManager;
+import org.apache.helix.model.InstanceConfig;
 import org.apache.pinot.client.ResultSet;
+import org.apache.pinot.common.utils.config.TagNameUtils;
+import org.apache.pinot.common.utils.helix.HelixHelper;
+import org.apache.pinot.core.data.manager.realtime.RealtimeTableDataManager;
+import 
org.apache.pinot.integration.tests.models.DummyTableUpsertMetadataManager;
+import org.apache.pinot.segment.local.upsert.TableUpsertMetadataManagerFactory;
+import org.apache.pinot.server.starter.helix.BaseServerStarter;
+import org.apache.pinot.server.starter.helix.HelixInstanceDataManagerConfig;
 import org.apache.pinot.spi.config.table.TableConfi

(pinot) branch master updated: Bug fix: reset primary key count to 0 when table is deleted (#12169)

2023-12-19 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new f0b78b44e3 Bug fix: reset primary key count to 0 when table is deleted 
(#12169)
f0b78b44e3 is described below

commit f0b78b44e38a424a652735f66557994b7f6e6c43
Author: Kartik Khare 
AuthorDate: Tue Dec 19 17:47:14 2023 +0530

Bug fix: reset primary key count to 0 when table is deleted (#12169)
---
 .../segment/local/upsert/BasePartitionUpsertMetadataManager.java | 5 +
 1 file changed, 5 insertions(+)

diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/BasePartitionUpsertMetadataManager.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/BasePartitionUpsertMetadataManager.java
index b7f9696b11..f13875c759 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/BasePartitionUpsertMetadataManager.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/BasePartitionUpsertMetadataManager.java
@@ -772,6 +772,11 @@ public abstract class BasePartitionUpsertMetadataManager 
implements PartitionUps
   }
 }
 doClose();
+// We don't remove the segment from the metadata manager when
+// it's closed. This was done to make table deletion faster. Since we 
don't remove the segment, we never decrease
+// the primary key count. So, we set the primary key count to 0 here.
+_serverMetrics.setValueOfPartitionGauge(_tableNameWithType, _partitionId, 
ServerGauge.UPSERT_PRIMARY_KEYS_COUNT,
+0L);
 _logger.info("Closed the metadata manager");
   }
 


-
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org



(pinot) branch master updated: [feature] add support for StreamNative OAuth2 authentication for pulsar. (#12068)

2023-12-18 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new fd45c1bbd2 [feature] add support for StreamNative OAuth2 
authentication for pulsar. (#12068)
fd45c1bbd2 is described below

commit fd45c1bbd273dab29aebe4576cf0475d219e2fca
Author: Jeffrey Bolle 
AuthorDate: Mon Dec 18 04:39:52 2023 -0500

[feature] add support for StreamNative OAuth2 authentication for pulsar. 
(#12068)

* [feature] add support for StreamNative OAuth2 authentication for pulsar.

Addresses #12067

* add comments documenting new params.

* Add validation for provided OAuth2 config.

* checkstyle

* Added todo per PR comments.
---
 .../pinot/plugin/stream/pulsar/PulsarConfig.java   | 87 +-
 .../PulsarPartitionLevelConnectionHandler.java | 26 +++
 .../plugin/stream/pulsar/PulsarConfigTest.java | 54 ++
 3 files changed, 148 insertions(+), 19 deletions(-)

diff --git 
a/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/src/main/java/org/apache/pinot/plugin/stream/pulsar/PulsarConfig.java
 
b/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/src/main/java/org/apache/pinot/plugin/stream/pulsar/PulsarConfig.java
index 8fefc0e7c5..8cdc8f8647 100644
--- 
a/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/src/main/java/org/apache/pinot/plugin/stream/pulsar/PulsarConfig.java
+++ 
b/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/src/main/java/org/apache/pinot/plugin/stream/pulsar/PulsarConfig.java
@@ -19,6 +19,9 @@
 package org.apache.pinot.plugin.stream.pulsar;
 
 import com.google.common.base.Preconditions;
+import java.io.File;
+import java.net.MalformedURLException;
+import java.net.URL;
 import java.util.Collections;
 import java.util.Map;
 import java.util.Objects;
@@ -42,6 +45,10 @@ public class PulsarConfig {
   public static final String BOOTSTRAP_SERVERS = "bootstrap.servers";
   public static final String AUTHENTICATION_TOKEN = "authenticationToken";
   public static final String TLS_TRUST_CERTS_FILE_PATH = 
"tlsTrustCertsFilePath";
+
+  public static final String OAUTH_ISSUER_URL = "issuerUrl";
+  public static final String OAUTH_CREDS_FILE_PATH = "credsFilePath";
+  public static final String OAUTH_AUDIENCE = "audience";
   public static final String ENABLE_KEY_VALUE_STITCH = "enableKeyValueStitch";
   public static final String METADATA_FIELDS = "metadata.fields"; //list of 
the metadata fields comma separated
 
@@ -52,6 +59,15 @@ public class PulsarConfig {
   private final SubscriptionInitialPosition _subscriptionInitialPosition;
   private final String _authenticationToken;
   private final String _tlsTrustCertsFilePath;
+
+  private final String _issuerUrl; // OAUTH2 issuer URL example: 
"https://auth.streamnative.cloud";
+
+  // Absolute path of your downloaded key file on the local file system.
+  // example: file:///path/to/private_creds_file
+  //TODO: find a good way to support pushing this secret to all servers.
+  private final String _credentialsFilePath;
+  private final String _audience; // Audience for your OAUTH2 client: 
urn:sn:pulsar:test:test-cluster
+
   // Deprecated since pulsar supports record key extraction
   @Deprecated
   private final boolean _enableKeyValueStitch;
@@ -59,38 +75,59 @@ public class PulsarConfig {
   private final Set 
_metadataFields;
   public PulsarConfig(StreamConfig streamConfig, String subscriberId) {
 Map streamConfigMap = streamConfig.getStreamConfigsMap();
-_pulsarTopicName = streamConfig.getTopicName();
-_bootstrapServers =
-
streamConfigMap.get(StreamConfigProperties.constructStreamProperty(STREAM_TYPE, 
BOOTSTRAP_SERVERS));
 _subscriberId = subscriberId;
 
-String authenticationTokenKey = 
StreamConfigProperties.constructStreamProperty(STREAM_TYPE, 
AUTHENTICATION_TOKEN);
-_authenticationToken = streamConfigMap.get(authenticationTokenKey);
-
-String tlsTrustCertsFilePathKey = StreamConfigProperties.
-constructStreamProperty(STREAM_TYPE, TLS_TRUST_CERTS_FILE_PATH);
-_tlsTrustCertsFilePath = streamConfigMap.get(tlsTrustCertsFilePathKey);
-
-String enableKeyValueStitchKey = StreamConfigProperties.
-constructStreamProperty(STREAM_TYPE, ENABLE_KEY_VALUE_STITCH);
-_enableKeyValueStitch = 
Boolean.parseBoolean(streamConfigMap.get(enableKeyValueStitchKey));
-
+_pulsarTopicName = streamConfig.getTopicName();
+_bootstrapServers = getConfigValue(streamConfigMap, BOOTSTRAP_SERVERS);
 Preconditions.checkNotNull(_bootstrapServers, "No brokers provided in the 
config");
 
+_authenticationToken = getConfigValue(streamConfigMap, 
AUTHENTICATION_TOKEN);
+_tlsTrustCertsFilePath = getConfigValue(streamConfigMap, 
TLS_TRUST

[pinot] branch master updated: Multiple JDBC Driver fixes to support Jetbrains Intellij/Datagrip database tooling (#11814)

2023-10-23 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new 60d34daf17 Multiple JDBC Driver fixes to support Jetbrains 
Intellij/Datagrip database tooling (#11814)
60d34daf17 is described below

commit 60d34daf17f72b32a193a221756d5b72168c6e00
Author: Tim Veil <3260845+timv...@users.noreply.github.com>
AuthorDate: Tue Oct 24 00:45:02 2023 -0400

Multiple JDBC Driver fixes to support Jetbrains Intellij/Datagrip database 
tooling (#11814)

* fix for #11813;

* removed empty line based on review
---
 .../pinot/client/PinotConnectionMetaData.java  | 12 +++
 .../pinot/client/PinotPreparedStatement.java   |  2 +-
 .../pinot/client/base/AbstractBaseConnection.java  | 24 --
 .../pinot/client/base/AbstractBaseStatement.java   |  8 
 .../controller/PinotControllerTransport.java   |  2 +-
 5 files changed, 27 insertions(+), 21 deletions(-)

diff --git 
a/pinot-clients/pinot-jdbc-client/src/main/java/org/apache/pinot/client/PinotConnectionMetaData.java
 
b/pinot-clients/pinot-jdbc-client/src/main/java/org/apache/pinot/client/PinotConnectionMetaData.java
index 30f13d2b0c..94e680f9ca 100644
--- 
a/pinot-clients/pinot-jdbc-client/src/main/java/org/apache/pinot/client/PinotConnectionMetaData.java
+++ 
b/pinot-clients/pinot-jdbc-client/src/main/java/org/apache/pinot/client/PinotConnectionMetaData.java
@@ -150,30 +150,34 @@ public class PinotConnectionMetaData extends 
AbstractBaseConnectionMetaData {
   public ResultSet getColumns(String catalog, String schemaPattern, String 
tableNamePattern, String columnNamePattern)
   throws SQLException {
 
+if (tableNamePattern != null && tableNamePattern.equals("%")) {
+  LOGGER.warn("driver does not support pattern [{}] for table name", 
tableNamePattern);
+  return PinotResultSet.empty();
+}
+
 SchemaResponse schemaResponse = 
_controllerTransport.getTableSchema(tableNamePattern, _controllerURL);
 PinotMeta pinotMeta = new PinotMeta();
 pinotMeta.setColumnNames(TABLE_SCHEMA_COLUMNS);
 pinotMeta.setColumnDataTypes(TABLE_SCHEMA_COLUMNS_DTYPES);
 
-String tableName = schemaResponse.getSchemaName();
 int ordinalPosition = 1;
 if (schemaResponse.getDimensions() != null) {
   for (JsonNode columns : schemaResponse.getDimensions()) {
-appendColumnMeta(pinotMeta, tableName, ordinalPosition, columns);
+appendColumnMeta(pinotMeta, tableNamePattern, ordinalPosition, 
columns);
 ordinalPosition++;
   }
 }
 
 if (schemaResponse.getMetrics() != null) {
   for (JsonNode columns : schemaResponse.getMetrics()) {
-appendColumnMeta(pinotMeta, tableName, ordinalPosition, columns);
+appendColumnMeta(pinotMeta, tableNamePattern, ordinalPosition, 
columns);
 ordinalPosition++;
   }
 }
 
 if (schemaResponse.getDateTimeFieldSpecs() != null) {
   for (JsonNode columns : schemaResponse.getDateTimeFieldSpecs()) {
-appendColumnMeta(pinotMeta, tableName, ordinalPosition, columns);
+appendColumnMeta(pinotMeta, tableNamePattern, ordinalPosition, 
columns);
 ordinalPosition++;
   }
 }
diff --git 
a/pinot-clients/pinot-jdbc-client/src/main/java/org/apache/pinot/client/PinotPreparedStatement.java
 
b/pinot-clients/pinot-jdbc-client/src/main/java/org/apache/pinot/client/PinotPreparedStatement.java
index 8edac605d8..06ada14ac8 100644
--- 
a/pinot-clients/pinot-jdbc-client/src/main/java/org/apache/pinot/client/PinotPreparedStatement.java
+++ 
b/pinot-clients/pinot-jdbc-client/src/main/java/org/apache/pinot/client/PinotPreparedStatement.java
@@ -203,7 +203,7 @@ public class PinotPreparedStatement extends 
AbstractBasePreparedStatement {
   }
   return _resultSet;
 } catch (PinotClientException e) {
-  throw new SQLException("Failed to execute query : {}", _query, e);
+  throw new SQLException(String.format("Failed to execute query : %s", 
_query), e);
 }
   }
 
diff --git 
a/pinot-clients/pinot-jdbc-client/src/main/java/org/apache/pinot/client/base/AbstractBaseConnection.java
 
b/pinot-clients/pinot-jdbc-client/src/main/java/org/apache/pinot/client/base/AbstractBaseConnection.java
index 73a1d19433..cbcad08d36 100644
--- 
a/pinot-clients/pinot-jdbc-client/src/main/java/org/apache/pinot/client/base/AbstractBaseConnection.java
+++ 
b/pinot-clients/pinot-jdbc-client/src/main/java/org/apache/pinot/client/base/AbstractBaseConnection.java
@@ -51,7 +51,7 @@ public abstract class AbstractBaseConnection implements 
Connection {
   @Override
   public void clearWarnings()
   throws SQLException {
-throw new SQLFeatureNotSupportedException();
+// no-op
   }
 
   @Override
@@ -111,42 +111,43 @@ public abstract class Abstra

[pinot] branch master updated: Add integration test for rebalance in upsert tables (#11568)

2023-10-08 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new 52d16f7668 Add integration test for rebalance in upsert tables (#11568)
52d16f7668 is described below

commit 52d16f76685929eb46d001f0a435beca204da6cb
Author: Kartik Khare 
AuthorDate: Mon Oct 9 11:54:13 2023 +0530

Add integration test for rebalance in upsert tables (#11568)

* Add integration test for rebalance in upsert tables

* Add reload test as well

* Reload tests

* Remove upsert integration test since partial upsert tests cover both

* Add explicit status checks for rebalance/reload jobs rather than sleep

* Fix flakiness due to segment commit during reload

* Use new RebalanceConfig class

* Add missing partial upsert cases

* Refactor jobstatus to segment reload status

-

Co-authored-by: Kartik Khare 
Co-authored-by: Kartik Khare 

---
 .../controller/helix/ControllerRequestClient.java  |   6 +-
 .../pinot/controller/helix/ControllerTest.java |  12 +-
 .../tests/BaseClusterIntegrationTestSet.java   |   4 +-
 ...PartialUpsertTableRebalanceIntegrationTest.java | 451 +
 .../utils/builder/ControllerRequestURLBuilder.java |   6 +-
 5 files changed, 468 insertions(+), 11 deletions(-)

diff --git 
a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/ControllerRequestClient.java
 
b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/ControllerRequestClient.java
index 3ec7fc3642..ff27954f70 100644
--- 
a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/ControllerRequestClient.java
+++ 
b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/ControllerRequestClient.java
@@ -175,11 +175,13 @@ public class ControllerRequestClient {
 }
   }
 
-  public void reloadTable(String tableName, TableType tableType, boolean 
forceDownload)
+  public String reloadTable(String tableName, TableType tableType, boolean 
forceDownload)
   throws IOException {
 try {
-  HttpClient.wrapAndThrowHttpException(_httpClient.sendJsonPostRequest(new 
URL(
+  SimpleHttpResponse simpleHttpResponse =
+  
HttpClient.wrapAndThrowHttpException(_httpClient.sendJsonPostRequest(new URL(
   _controllerRequestURLBuilder.forTableReload(tableName, tableType, 
forceDownload)).toURI(), null));
+  return simpleHttpResponse.getResponse();
 } catch (HttpErrorStatusException | URISyntaxException e) {
   throw new IOException(e);
 }
diff --git 
a/pinot-controller/src/test/java/org/apache/pinot/controller/helix/ControllerTest.java
 
b/pinot-controller/src/test/java/org/apache/pinot/controller/helix/ControllerTest.java
index 33932ee38b..e4f62da327 100644
--- 
a/pinot-controller/src/test/java/org/apache/pinot/controller/helix/ControllerTest.java
+++ 
b/pinot-controller/src/test/java/org/apache/pinot/controller/helix/ControllerTest.java
@@ -716,14 +716,14 @@ public class ControllerTest {
 return getControllerRequestClient().getTableSize(tableName);
   }
 
-  public void reloadOfflineTable(String tableName)
+  public String reloadOfflineTable(String tableName)
   throws IOException {
-reloadOfflineTable(tableName, false);
+return reloadOfflineTable(tableName, false);
   }
 
-  public void reloadOfflineTable(String tableName, boolean forceDownload)
+  public String reloadOfflineTable(String tableName, boolean forceDownload)
   throws IOException {
-getControllerRequestClient().reloadTable(tableName, TableType.OFFLINE, 
forceDownload);
+return getControllerRequestClient().reloadTable(tableName, 
TableType.OFFLINE, forceDownload);
   }
 
   public void reloadOfflineSegment(String tableName, String segmentName, 
boolean forceDownload)
@@ -731,9 +731,9 @@ public class ControllerTest {
 getControllerRequestClient().reloadSegment(tableName, segmentName, 
forceDownload);
   }
 
-  public void reloadRealtimeTable(String tableName)
+  public String reloadRealtimeTable(String tableName)
   throws IOException {
-getControllerRequestClient().reloadTable(tableName, TableType.REALTIME, 
false);
+return getControllerRequestClient().reloadTable(tableName, 
TableType.REALTIME, false);
   }
 
   public void createBrokerTenant(String tenantName, int numBrokers)
diff --git 
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/BaseClusterIntegrationTestSet.java
 
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/BaseClusterIntegrationTestSet.java
index bcda7532ed..1d2a75215f 100644
--- 
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/BaseClusterIntegrationTestSet.java
+++ 
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests

[pinot] branch master updated: Add DECIMAL data type to orc record reader (#11377)

2023-08-19 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new dd22f8bca1 Add DECIMAL data type to orc record reader (#11377)
dd22f8bca1 is described below

commit dd22f8bca1aa25260aeb2b320debc3a716b13915
Author: Kartik Khare 
AuthorDate: Sat Aug 19 19:39:20 2023 +0530

Add DECIMAL data type to orc record reader (#11377)

* Add DECIMAL data type to orc record reader

* Add test

-

Co-authored-by: Kartik Khare 
---
 .../plugin/inputformat/orc/ORCRecordReader.java| 12 
 .../inputformat/orc/ORCRecordExtractorTest.java| 22 +-
 2 files changed, 25 insertions(+), 9 deletions(-)

diff --git 
a/pinot-plugins/pinot-input-format/pinot-orc/src/main/java/org/apache/pinot/plugin/inputformat/orc/ORCRecordReader.java
 
b/pinot-plugins/pinot-input-format/pinot-orc/src/main/java/org/apache/pinot/plugin/inputformat/orc/ORCRecordReader.java
index 8a4d3fd709..fb1c04a2c2 100644
--- 
a/pinot-plugins/pinot-input-format/pinot-orc/src/main/java/org/apache/pinot/plugin/inputformat/orc/ORCRecordReader.java
+++ 
b/pinot-plugins/pinot-input-format/pinot-orc/src/main/java/org/apache/pinot/plugin/inputformat/orc/ORCRecordReader.java
@@ -32,6 +32,7 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
@@ -164,6 +165,7 @@ public class ORCRecordReader implements RecordReader {
   case BINARY:
   case VARCHAR:
   case CHAR:
+  case DECIMAL:
 return true;
   default:
 return false;
@@ -368,6 +370,16 @@ public class ORCRecordReader implements RecordReader {
 } else {
   return null;
 }
+  case DECIMAL:
+// Extract to string
+DecimalColumnVector decimalColumnVector = (DecimalColumnVector) 
columnVector;
+if (decimalColumnVector.noNulls || !decimalColumnVector.isNull[rowId]) 
{
+  StringBuilder stringBuilder = new StringBuilder();
+  decimalColumnVector.stringifyValue(stringBuilder, rowId);
+  return stringBuilder.toString();
+} else {
+  return null;
+}
   default:
 // Unsupported types
 throw new IllegalStateException("Unsupported field type: " + category 
+ " for field: " + field);
diff --git 
a/pinot-plugins/pinot-input-format/pinot-orc/src/test/java/org/apache/pinot/plugin/inputformat/orc/ORCRecordExtractorTest.java
 
b/pinot-plugins/pinot-input-format/pinot-orc/src/test/java/org/apache/pinot/plugin/inputformat/orc/ORCRecordExtractorTest.java
index b4e6f14ea3..347986d961 100644
--- 
a/pinot-plugins/pinot-input-format/pinot-orc/src/test/java/org/apache/pinot/plugin/inputformat/orc/ORCRecordExtractorTest.java
+++ 
b/pinot-plugins/pinot-input-format/pinot-orc/src/test/java/org/apache/pinot/plugin/inputformat/orc/ORCRecordExtractorTest.java
@@ -30,13 +30,16 @@ import java.util.Map;
 import java.util.Set;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
 import org.apache.orc.OrcFile;
 import org.apache.orc.TypeDescription;
 import org.apache.orc.Writer;
@@ -76,7 +79,8 @@ public class ORCRecordExtractorTest extends 
AbstractRecordExtractorTest {
 + 
"simpleStruct:struct,"
 + 
"complexStruct:struct>,"
-+ 
"complexList:array>," + 
"simpleMap:map,"
++ 
"complexList:array>,"
++ "simpleMap:map,"
 + 
"complexMap:map>" + ">");
 // @format:on
 // CHECKSTYLE:ON
@@ -118,7 +122,7 @@ public class ORCRecordExtractorTest extends 
AbstractRecordExtractorTest {
 StructColumnVector complexListElementVector = (StructColumnVector) 
complexListVector.child;
 LongColumnVector co

[pinot] branch master updated: Allow custom operations before upsert preload starts (#11265)

2023-08-04 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new 75a4bc3f39 Allow custom operations before upsert preload starts 
(#11265)
75a4bc3f39 is described below

commit 75a4bc3f39ef825665b368929520a543b74bfd54
Author: Xiaotian (Jackie) Jiang <1751+jackie-ji...@users.noreply.github.com>
AuthorDate: Fri Aug 4 06:03:48 2023 -0700

Allow custom operations before upsert preload starts (#11265)
---
 .../upsert/BaseTableUpsertMetadataManager.java | 51 --
 1 file changed, 37 insertions(+), 14 deletions(-)

diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/BaseTableUpsertMetadataManager.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/BaseTableUpsertMetadataManager.java
index 148248b0be..2fecc4fbe5 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/BaseTableUpsertMetadataManager.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/BaseTableUpsertMetadataManager.java
@@ -57,9 +57,10 @@ import org.slf4j.LoggerFactory;
 @ThreadSafe
 public abstract class BaseTableUpsertMetadataManager implements 
TableUpsertMetadataManager {
   private static final Logger LOGGER = 
LoggerFactory.getLogger(BaseTableUpsertMetadataManager.class);
-  private TableConfig _tableConfig;
-  private Schema _schema;
-  private TableDataManager _tableDataManager;
+
+  protected TableConfig _tableConfig;
+  protected Schema _schema;
+  protected TableDataManager _tableDataManager;
   protected String _tableNameWithType;
   protected List _primaryKeyColumns;
   protected List _comparisonColumns;
@@ -70,6 +71,9 @@ public abstract class BaseTableUpsertMetadataManager 
implements TableUpsertMetad
   protected double _metadataTTL;
   protected File _tableIndexDir;
   protected ServerMetrics _serverMetrics;
+  protected HelixManager _helixManager;
+  protected ExecutorService _segmentPreloadExecutor;
+
   private volatile boolean _isPreloading = false;
 
   @Override
@@ -109,6 +113,11 @@ public abstract class BaseTableUpsertMetadataManager 
implements TableUpsertMetad
 _metadataTTL = upsertConfig.getMetadataTTL();
 _tableIndexDir = tableDataManager.getTableDataDir();
 _serverMetrics = serverMetrics;
+_helixManager = helixManager;
+_segmentPreloadExecutor = segmentPreloadExecutor;
+
+initCustomVariables();
+
 if (_enableSnapshot && segmentPreloadExecutor != null && 
upsertConfig.isEnablePreload()) {
   // Preloading the segments with snapshots for fast upsert metadata 
recovery.
   // Note that there is an implicit waiting logic between the thread doing 
the segment preloading here and the
@@ -119,7 +128,7 @@ public abstract class BaseTableUpsertMetadataManager 
implements TableUpsertMetad
   // happens as the lambda of ConcurrentHashMap.computeIfAbsent() method, 
which ensures the waiting logic.
   try {
 _isPreloading = true;
-preloadSegments(helixManager, segmentPreloadExecutor);
+preloadSegments();
   } catch (Exception e) {
 // Even if preloading fails, we should continue to complete the 
initialization, so that TableDataManager can be
 // created. Once TableDataManager is created, no more segment 
preloading would happen, and the normal segment
@@ -136,16 +145,24 @@ public abstract class BaseTableUpsertMetadataManager 
implements TableUpsertMetad
 }
   }
 
+  /**
+   * Can be overridden to initialize custom variables after other variables 
are set but before preload starts. This is
+   * needed because preload will load segments which might require these 
custom variables.
+   */
+  protected void initCustomVariables() {
+  }
+
   /**
* Get the ideal state and find segments assigned to current instance, then 
preload those with validDocIds snapshot.
* Skip those without the snapshots and those whose crc has changed, as they 
will be handled by normal Helix state
* transitions, which will proceed after the preloading phase fully 
completes.
*/
-  private void preloadSegments(HelixManager helixManager, ExecutorService 
segmentPreloadExecutor)
+  private void preloadSegments()
   throws Exception {
 LOGGER.info("Preload segments from table: {} for fast upsert metadata 
recovery", _tableNameWithType);
-IdealState idealState = HelixHelper.getTableIdealState(helixManager, 
_tableNameWithType);
-ZkHelixPropertyStore propertyStore = 
helixManager.getHelixPropertyStore();
+onPreloadStart();
+IdealState idealState = HelixHelper.getTableIdealState(_helixManager, 
_tableNameWithType);
+ZkHelixPropertyStore propertyStore = 
_helixManager.getHelixPropertyStore();
 String instanceId = getInstanceId();
 Index

[pinot] branch master updated: Make Preload Integration test more extensible (#11195)

2023-07-28 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new b69f438a79 Make Preload Integration test more extensible (#11195)
b69f438a79 is described below

commit b69f438a79716179c3a7c8161a67d675bdd9909e
Author: Kartik Khare 
AuthorDate: Fri Jul 28 22:59:01 2023 +0530

Make Preload Integration test more extensible (#11195)

* Make Preload Integration test more extensible

* Make snapshot method protected as well

-

Co-authored-by: Kartik Khare 
---
 .../UpsertTableSegmentPreloadIntegrationTest.java  | 38 ++
 1 file changed, 24 insertions(+), 14 deletions(-)

diff --git 
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/UpsertTableSegmentPreloadIntegrationTest.java
 
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/UpsertTableSegmentPreloadIntegrationTest.java
index e662403347..0bd5a84af6 100644
--- 
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/UpsertTableSegmentPreloadIntegrationTest.java
+++ 
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/UpsertTableSegmentPreloadIntegrationTest.java
@@ -73,12 +73,17 @@ public class UpsertTableSegmentPreloadIntegrationTest 
extends BaseClusterIntegra
 startBroker();
 startServers(NUM_SERVERS);
 
-// Unpack the Avro files
-List avroFiles = unpackAvroData(_tempDir);
-
 // Start Kafka and push data into Kafka
 startKafka();
 
+populateTables();
+  }
+
+  protected void populateTables()
+  throws Exception {
+// Unpack the Avro files
+List avroFiles = unpackAvroData(_tempDir);
+
 // Create and upload schema and table config
 Schema schema = createSchema();
 addSchema(schema);
@@ -192,6 +197,16 @@ public class UpsertTableSegmentPreloadIntegrationTest 
extends BaseClusterIntegra
 assertEquals(getCurrentCountStarResult(), getCountStarResult());
 assertEquals(getCurrentCountStarResultWithoutUpsert(), 
getCountStarResultWithoutUpsert());
 
+waitForSnapshotCreation();
+
+// Restart the servers and check again
+restartServers();
+verifyIdealState(7);
+waitForAllDocsLoaded(600_000L);
+  }
+
+  protected void waitForSnapshotCreation()
+  throws Exception {
 Set consumingSegments = 
getConsumingSegmentsFromIdealState(getTableName() + "_REALTIME");
 // trigger force commit for snapshots
 String jobId = forceCommit(getTableName());
@@ -211,7 +226,7 @@ public class UpsertTableSegmentPreloadIntegrationTest 
extends BaseClusterIntegra
 
serverStarter.getConfig().getProperty(CommonConstants.Server.CONFIG_OF_INSTANCE_DATA_DIR);
 File[] files = new File(segmentDir, getTableName() + 
"_REALTIME").listFiles();
 for (File file : files) {
-  if (file.getName().contains("tmp") || 
file.getName().contains("consumer")) {
+  if (!file.getName().startsWith(getTableName())) {
 continue;
   }
   if (file.isDirectory()) {
@@ -231,15 +246,10 @@ public class UpsertTableSegmentPreloadIntegrationTest 
extends BaseClusterIntegra
   } catch (Exception e) {
 return false;
   }
-}, 6L, "Error verifying force commit operation on table!");
-
-// Restart the servers and check again
-restartServers();
-verifyIdealState(7);
-waitForAllDocsLoaded(600_000L);
+}, 12L, "Error verifying force commit operation on table!");
   }
 
-  private void verifyIdealState(int numSegmentsExpected) {
+  protected void verifyIdealState(int numSegmentsExpected) {
 IdealState idealState = HelixHelper.getTableIdealState(_helixManager, 
REALTIME_TABLE_NAME);
 Map> segmentAssignment = 
idealState.getRecord().getMapFields();
 assertEquals(segmentAssignment.size(), numSegmentsExpected);
@@ -295,7 +305,7 @@ public class UpsertTableSegmentPreloadIntegrationTest 
extends BaseClusterIntegra
 }
   }
 
-  public Set getConsumingSegmentsFromIdealState(String 
tableNameWithType) {
+  protected Set getConsumingSegmentsFromIdealState(String 
tableNameWithType) {
 IdealState tableIdealState = 
_controllerStarter.getHelixResourceManager().getTableIdealState(tableNameWithType);
 Map> segmentAssignment = 
tableIdealState.getRecord().getMapFields();
 Set matchingSegments = new 
HashSet<>(HashUtil.getHashMapCapacity(segmentAssignment.size()));
@@ -308,7 +318,7 @@ public class UpsertTableSegmentPreloadIntegrationTest 
extends BaseClusterIntegra
 return matchingSegments;
   }
 
-  public boolean isForceCommitJobCompleted(String forceCommitJobId)
+  protected boolean isForceCommitJobCompleted(String forceCommitJobId)
 

[pinot] branch master updated: rename metadataFields config field name to match other metadata config fields. (#11202)

2023-07-27 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new 459e88aae2 rename metadataFields config field name to match other 
metadata config fields. (#11202)
459e88aae2 is described below

commit 459e88aae2ec1c0c10922e005963c96731359ce9
Author: Jeff Bolle 
AuthorDate: Fri Jul 28 01:36:02 2023 -0400

rename metadataFields config field name to match other metadata config 
fields. (#11202)
---
 .../main/java/org/apache/pinot/plugin/stream/pulsar/PulsarConfig.java   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git 
a/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/src/main/java/org/apache/pinot/plugin/stream/pulsar/PulsarConfig.java
 
b/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/src/main/java/org/apache/pinot/plugin/stream/pulsar/PulsarConfig.java
index 8abd7ce0d3..8fefc0e7c5 100644
--- 
a/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/src/main/java/org/apache/pinot/plugin/stream/pulsar/PulsarConfig.java
+++ 
b/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/src/main/java/org/apache/pinot/plugin/stream/pulsar/PulsarConfig.java
@@ -43,7 +43,7 @@ public class PulsarConfig {
   public static final String AUTHENTICATION_TOKEN = "authenticationToken";
   public static final String TLS_TRUST_CERTS_FILE_PATH = 
"tlsTrustCertsFilePath";
   public static final String ENABLE_KEY_VALUE_STITCH = "enableKeyValueStitch";
-  public static final String METADATA_FIELDS = "metadataFields"; //list of the 
metadata fields comma separated
+  public static final String METADATA_FIELDS = "metadata.fields"; //list of 
the metadata fields comma separated
 
   private final String _pulsarTopicName;
   private final String _subscriberId;


-
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org



[pinot] branch master updated: Add Integration test for Upsert Preload (#11160)

2023-07-26 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new d9c64f347a Add Integration test for Upsert Preload (#11160)
d9c64f347a is described below

commit d9c64f347af0d1c6e27c8b0c304033be9f463f69
Author: Kartik Khare 
AuthorDate: Wed Jul 26 16:22:09 2023 +0530

Add Integration test for Upsert Preload (#11160)

* Add Integration test for Upsert Preload

* Fix tests

* Reverting Base method changes as it breaks compatibility tests
---
 ... UpsertTableSegmentPreloadIntegrationTest.java} | 123 +++--
 .../UpsertTableSegmentUploadIntegrationTest.java   |   3 +-
 2 files changed, 115 insertions(+), 11 deletions(-)

diff --git 
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/UpsertTableSegmentUploadIntegrationTest.java
 
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/UpsertTableSegmentPreloadIntegrationTest.java
similarity index 60%
copy from 
pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/UpsertTableSegmentUploadIntegrationTest.java
copy to 
pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/UpsertTableSegmentPreloadIntegrationTest.java
index 7fddd5e43c..e662403347 100644
--- 
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/UpsertTableSegmentUploadIntegrationTest.java
+++ 
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/UpsertTableSegmentPreloadIntegrationTest.java
@@ -18,18 +18,26 @@
  */
 package org.apache.pinot.integration.tests;
 
+import com.fasterxml.jackson.databind.JsonNode;
 import java.io.File;
 import java.io.IOException;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import org.apache.commons.io.FileUtils;
 import org.apache.helix.model.IdealState;
+import org.apache.pinot.common.utils.HashUtil;
 import org.apache.pinot.common.utils.LLCSegmentName;
 import org.apache.pinot.common.utils.helix.HelixHelper;
+import org.apache.pinot.server.starter.helix.BaseServerStarter;
 import org.apache.pinot.spi.config.table.TableConfig;
 import org.apache.pinot.spi.config.table.TableType;
 import org.apache.pinot.spi.data.Schema;
+import org.apache.pinot.spi.env.PinotConfiguration;
+import org.apache.pinot.spi.utils.CommonConstants;
 import 
org.apache.pinot.spi.utils.CommonConstants.Helix.StateModel.SegmentStateModel;
+import org.apache.pinot.spi.utils.JsonUtils;
 import org.apache.pinot.spi.utils.builder.TableNameBuilder;
 import org.apache.pinot.util.TestUtils;
 import org.testng.annotations.AfterClass;
@@ -38,10 +46,11 @@ import org.testng.annotations.Test;
 
 import static org.testng.Assert.assertEquals;
 import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertTrue;
 
 
-public class UpsertTableSegmentUploadIntegrationTest extends 
BaseClusterIntegrationTestSet {
-  private static final int NUM_SERVERS = 2;
+public class UpsertTableSegmentPreloadIntegrationTest extends 
BaseClusterIntegrationTestSet {
+  private static final int NUM_SERVERS = 1;
   private static final String PRIMARY_KEY_COL = "clientId";
   private static final String REALTIME_TABLE_NAME = 
TableNameBuilder.REALTIME.tableNameWithType(DEFAULT_TABLE_NAME);
 
@@ -69,22 +78,31 @@ public class UpsertTableSegmentUploadIntegrationTest 
extends BaseClusterIntegrat
 
 // Start Kafka and push data into Kafka
 startKafka();
-pushAvroIntoKafka(avroFiles);
 
 // Create and upload schema and table config
 Schema schema = createSchema();
 addSchema(schema);
-TableConfig tableConfig = createUpsertTableConfig(avroFiles.get(0), 
PRIMARY_KEY_COL, null, getNumKafkaPartitions());
+TableConfig tableConfig =
+createUpsertTableConfig(avroFiles.get(0), PRIMARY_KEY_COL, null, 
getNumKafkaPartitions());
+tableConfig.getUpsertConfig().setEnablePreload(true);
+tableConfig.getUpsertConfig().setEnableSnapshot(true);
 addTableConfig(tableConfig);
 
 // Create and upload segments
 ClusterIntegrationTestUtils.buildSegmentsFromAvro(avroFiles, tableConfig, 
schema, 0, _segmentDir, _tarDir);
 uploadSegments(getTableName(), TableType.REALTIME, _tarDir);
 
+pushAvroIntoKafka(avroFiles);
 // Wait for all documents loaded
 waitForAllDocsLoaded(600_000L);
   }
 
+  @Override
+  protected void overrideServerConf(PinotConfiguration serverConf) {
+
serverConf.setProperty(CommonConstants.Server.INSTANCE_DATA_MANAGER_CONFIG_PREFIX
 + ".max.segment.preload.threads",
+"1");
+  }
+
   @AfterClass
   public void tearDown()
   throws IOException {
@@ -166,27 +184,78 @@ public class UpsertTableSegmentUploadIntegrationTest 
extends BaseClusterIntegrat
   @Test
   public void testSegmentAssign

[pinot] branch master updated (8293facfa0 -> e302318d33)

2023-05-19 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


from 8293facfa0 Keep last completed segment for retention (#10754)
 add e302318d33 bug fix: add @JsonProperty to SegmentAssignmentConfig 
(#10759)

No new revisions were added by this update.

Summary of changes:
 .../pinot/spi/config/table/assignment/SegmentAssignmentConfig.java   | 1 +
 1 file changed, 1 insertion(+)


-
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org



[pinot] branch master updated: Add request id to the V2 broker response (#10706)

2023-05-09 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new 4f5030530f Add request id to the V2 broker response (#10706)
4f5030530f is described below

commit 4f5030530f55e9a08ad9e3cdbf8a5d96319bbb57
Author: Kartik Khare 
AuthorDate: Wed May 10 11:34:36 2023 +0530

Add request id to the V2 broker response (#10706)

* Add request id to the V2 broker response

* Add unit test

-

Co-authored-by: Kartik Khare 
---
 .../MultiStageBrokerRequestHandler.java|  1 +
 .../MultiStageBrokerRequestHandlerTest.java| 92 ++
 .../response/broker/BrokerResponseNativeV2.java| 25 --
 3 files changed, 111 insertions(+), 7 deletions(-)

diff --git 
a/pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/MultiStageBrokerRequestHandler.java
 
b/pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/MultiStageBrokerRequestHandler.java
index 860592392f..ffc72c0732 100644
--- 
a/pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/MultiStageBrokerRequestHandler.java
+++ 
b/pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/MultiStageBrokerRequestHandler.java
@@ -223,6 +223,7 @@ public class MultiStageBrokerRequestHandler extends 
BaseBrokerRequestHandler {
 sqlNodeAndOptions.getParseTimeNs() + (executionEndTimeNs - 
compilationStartTimeNs));
 brokerResponse.setTimeUsedMs(totalTimeMs);
 brokerResponse.setResultTable(queryResults);
+brokerResponse.setRequestId(String.valueOf(requestId));
 
 for (Map.Entry entry : 
stageIdStatsMap.entrySet()) {
   if (entry.getKey() == 0) {
diff --git 
a/pinot-broker/src/test/java/org/apache/pinot/broker/requesthandler/MultiStageBrokerRequestHandlerTest.java
 
b/pinot-broker/src/test/java/org/apache/pinot/broker/requesthandler/MultiStageBrokerRequestHandlerTest.java
new file mode 100644
index 00..c8ebfa0266
--- /dev/null
+++ 
b/pinot-broker/src/test/java/org/apache/pinot/broker/requesthandler/MultiStageBrokerRequestHandlerTest.java
@@ -0,0 +1,92 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.broker.requesthandler;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import org.apache.pinot.broker.broker.AccessControlFactory;
+import org.apache.pinot.broker.broker.AllowAllAccessControlFactory;
+import org.apache.pinot.broker.queryquota.QueryQuotaManager;
+import org.apache.pinot.broker.routing.BrokerRoutingManager;
+import org.apache.pinot.common.config.provider.TableCache;
+import org.apache.pinot.common.metrics.BrokerMetrics;
+import org.apache.pinot.query.service.QueryConfig;
+import org.apache.pinot.spi.env.PinotConfiguration;
+import org.apache.pinot.spi.trace.DefaultRequestContext;
+import org.apache.pinot.spi.trace.RequestContext;
+import org.apache.pinot.spi.utils.CommonConstants;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
+import org.testng.Assert;
+import org.testng.annotations.AfterClass;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.Test;
+
+public class MultiStageBrokerRequestHandlerTest {
+
+  private PinotConfiguration _config;
+  @Mock
+  private BrokerRoutingManager _routingManager;
+
+  private AccessControlFactory _accessControlFactory;
+  @Mock
+  private QueryQuotaManager _queryQuotaManager;
+  @Mock
+  private TableCache _tableCache;
+
+  @Mock
+  private BrokerMetrics _brokerMetrics;
+
+  private MultiStageBrokerRequestHandler _requestHandler;
+
+  @BeforeClass
+  public void setUp() {
+MockitoAnnotations.openMocks(this);
+_config = new PinotConfiguration();
+_config.setProperty(CommonConstants.Broker.CONFIG_OF_BROKER_TIMEOUT_MS, 
"1");
+_config.setProperty(QueryConfig.KEY_OF_QUERY_RUNNER_PORT, "12345");
+_accessControlFactory = new AllowAllAccessControlFactory();
+_requestHandler =
+new MultiStageBrokerRequestHandler(_config, "testBroke

[pinot] branch master updated: Bug fix: Partial upsert default strategy is null (#10610)

2023-04-13 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new 13a792f03f Bug fix: Partial upsert default strategy is null (#10610)
13a792f03f is described below

commit 13a792f03f267bf53e36fc06340d43d8fc1d7af4
Author: Kartik Khare 
AuthorDate: Fri Apr 14 11:09:54 2023 +0530

Bug fix: Partial upsert default strategy is null (#10610)
---
 .../main/java/org/apache/pinot/spi/config/table/UpsertConfig.java  | 2 +-
 .../java/org/apache/pinot/spi/config/table/UpsertConfigTest.java   | 7 +++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git 
a/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/UpsertConfig.java 
b/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/UpsertConfig.java
index 68b532dc83..f4ce363d00 100644
--- 
a/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/UpsertConfig.java
+++ 
b/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/UpsertConfig.java
@@ -49,7 +49,7 @@ public class UpsertConfig extends BaseJsonConfig {
   private Map _partialUpsertStrategies;
 
   @JsonPropertyDescription("default upsert strategy for partial mode")
-  private Strategy _defaultPartialUpsertStrategy;
+  private Strategy _defaultPartialUpsertStrategy = Strategy.OVERWRITE;
 
   @JsonPropertyDescription("Columns for upsert comparison, default to time 
column")
   private List _comparisonColumns;
diff --git 
a/pinot-spi/src/test/java/org/apache/pinot/spi/config/table/UpsertConfigTest.java
 
b/pinot-spi/src/test/java/org/apache/pinot/spi/config/table/UpsertConfigTest.java
index 1311de9d41..6b5df41e25 100644
--- 
a/pinot-spi/src/test/java/org/apache/pinot/spi/config/table/UpsertConfigTest.java
+++ 
b/pinot-spi/src/test/java/org/apache/pinot/spi/config/table/UpsertConfigTest.java
@@ -46,4 +46,11 @@ public class UpsertConfigTest {
 
upsertConfig2.setDefaultPartialUpsertStrategy(UpsertConfig.Strategy.OVERWRITE);
 assertEquals(upsertConfig2.getPartialUpsertStrategies(), 
partialUpsertStratgies);
   }
+
+  @Test
+  public void testUpsertConfigForDefaults() {
+UpsertConfig upsertConfig = new UpsertConfig(UpsertConfig.Mode.PARTIAL);
+assertEquals(upsertConfig.getHashFunction(), HashFunction.NONE);
+assertEquals(upsertConfig.getDefaultPartialUpsertStrategy(), 
UpsertConfig.Strategy.OVERWRITE);
+  }
 }


-
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org



[pinot] branch master updated: Do not record operator stats when tracing is enabled (#10447)

2023-04-10 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new fe7a72b9d2 Do not record operator stats when tracing is enabled 
(#10447)
fe7a72b9d2 is described below

commit fe7a72b9d27d8764670778474595ebe18d101e16
Author: Kartik Khare 
AuthorDate: Tue Apr 11 10:08:45 2023 +0530

Do not record operator stats when tracing is enabled (#10447)

* Add tracing parameter to opChainContext

* Do not record stats when tracing is disabled

* Do not collect stats at end of leafstage

* Do not fill stage stats when tracing is disabled

* Fix tests

* Record stageWallTime even when tracing is disabled

* Add javadocs for operators

* Add more operators in the chain

* Bug fix: End time pointing to serialization time and not operator end time

* Change test to chain actual operators instead of mock

* Bug fix: Record stage block and row stats even when tracing is disabled

* Fix tests after rebasing with SortedReceivedOperator PR

* Table names should always be populated in the stats

* Fix tests

-

Co-authored-by: Kartik Khare 
Co-authored-by: Kartik Khare 

---
 .../MultiStageBrokerRequestHandler.java|   4 +-
 .../query/reduce/ExecutionStatsAggregator.java |  15 +-
 .../apache/pinot/query/runtime/QueryRunner.java|   9 +-
 .../LeafStageTransferableBlockOperator.java|   9 +
 .../runtime/operator/MailboxSendOperator.java  |   9 +
 .../query/runtime/operator/MultiStageOperator.java |  19 +-
 .../pinot/query/runtime/operator/OpChainStats.java |  12 +-
 .../query/runtime/operator/OperatorStats.java  |  15 +-
 .../runtime/plan/OpChainExecutionContext.java  |  11 +-
 .../query/runtime/plan/PlanRequestContext.java |   9 +-
 .../runtime/plan/ServerRequestPlanVisitor.java |   3 +-
 .../plan/server/ServerPlanRequestContext.java  |   4 +-
 .../query/service/dispatch/QueryDispatcher.java|  22 +-
 .../pinot/query/runtime/QueryRunnerTest.java   |   3 +-
 .../pinot/query/runtime/QueryRunnerTestBase.java   |  16 +-
 .../executor/OpChainSchedulerServiceTest.java  |   2 +-
 .../runtime/executor/RoundRobinSchedulerTest.java  |   2 +-
 .../operator/MailboxReceiveOperatorTest.java   |  32 +--
 .../runtime/operator/MailboxSendOperatorTest.java  |   7 +-
 .../pinot/query/runtime/operator/OpChainTest.java  | 257 -
 .../query/runtime/operator/OperatorTestUtil.java   |  10 +-
 .../operator/SortedMailboxReceiveOperatorTest.java |  38 +--
 .../runtime/queries/ResourceBasedQueriesTest.java  |   5 +-
 .../service/dispatch/QueryDispatcherTest.java  |   4 +-
 24 files changed, 425 insertions(+), 92 deletions(-)

diff --git 
a/pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/MultiStageBrokerRequestHandler.java
 
b/pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/MultiStageBrokerRequestHandler.java
index ebb7b7dc3e..9e74dcf400 100644
--- 
a/pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/MultiStageBrokerRequestHandler.java
+++ 
b/pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/MultiStageBrokerRequestHandler.java
@@ -209,14 +209,14 @@ public class MultiStageBrokerRequestHandler extends 
BaseBrokerRequestHandler {
 
 ResultTable queryResults;
 Map stageIdStatsMap = new HashMap<>();
-for (Integer stageId: queryPlan.getStageMetadataMap().keySet()) {
+for (Integer stageId : queryPlan.getStageMetadataMap().keySet()) {
   stageIdStatsMap.put(stageId, new ExecutionStatsAggregator(traceEnabled));
 }
 
 long executionStartTimeNs = System.nanoTime();
 try {
   queryResults = _queryDispatcher.submitAndReduce(requestId, queryPlan, 
_mailboxService, queryTimeoutMs,
-  sqlNodeAndOptions.getOptions(), stageIdStatsMap);
+  sqlNodeAndOptions.getOptions(), stageIdStatsMap, traceEnabled);
 } catch (Exception e) {
   LOGGER.info("query execution failed", e);
   return new 
BrokerResponseNative(QueryException.getException(QueryException.QUERY_EXECUTION_ERROR,
 e));
diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/query/reduce/ExecutionStatsAggregator.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/query/reduce/ExecutionStatsAggregator.java
index c8ef48af4a..bc8612c551 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/query/reduce/ExecutionStatsAggregator.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/query/reduce/ExecutionStatsAggregator.java
@@ -20,7 +20,6 @@ package org.apache.pinot.core.query.reduce;
 
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.

[pinot] branch master updated: Allow configuring helix timeouts for EV dropped in Instance manager (#10510)

2023-04-01 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new 3062bd0e54 Allow configuring helix timeouts for EV dropped in Instance 
manager (#10510)
3062bd0e54 is described below

commit 3062bd0e54723375157994f1921e526008f3748c
Author: Kartik Khare 
AuthorDate: Sat Apr 1 19:41:26 2023 +0530

Allow configuring helix timeouts for EV dropped in Instance manager (#10510)

* Allow configuring helix timeouts for data manager

* Refactor: rename method and cache configs

-

Co-authored-by: Kartik Khare 

---
 .../starter/helix/HelixInstanceDataManager.java   | 12 +++-
 .../starter/helix/HelixInstanceDataManagerConfig.java | 19 +++
 .../config/instance/InstanceDataManagerConfig.java|  4 
 3 files changed, 30 insertions(+), 5 deletions(-)

diff --git 
a/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/HelixInstanceDataManager.java
 
b/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/HelixInstanceDataManager.java
index aabe2dd715..6806d16c10 100644
--- 
a/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/HelixInstanceDataManager.java
+++ 
b/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/HelixInstanceDataManager.java
@@ -80,9 +80,6 @@ import org.slf4j.LoggerFactory;
 @ThreadSafe
 public class HelixInstanceDataManager implements InstanceDataManager {
   private static final Logger LOGGER = 
LoggerFactory.getLogger(HelixInstanceDataManager.class);
-  // TODO: Make this configurable
-  private static final long EXTERNAL_VIEW_DROPPED_MAX_WAIT_MS = 20 * 60_000L; 
// 20 minutes
-  private static final long EXTERNAL_VIEW_CHECK_INTERVAL_MS = 1_000L; // 1 
second
 
   private final ConcurrentHashMap 
_tableDataManagerMap = new ConcurrentHashMap<>();
 
@@ -93,6 +90,8 @@ public class HelixInstanceDataManager implements 
InstanceDataManager {
   private ZkHelixPropertyStore _propertyStore;
   private SegmentUploader _segmentUploader;
   private Supplier _isServerReadyToServeQueries = () -> false;
+  private long _externalViewDroppedMaxWaitMs;
+  private long _externalViewDroppedCheckInternalMs;
 
   // Fixed size LRU cache for storing last N errors on the instance.
   // Key is TableNameWithType-SegmentName pair.
@@ -116,6 +115,9 @@ public class HelixInstanceDataManager implements 
InstanceDataManager {
 _segmentUploader = new 
PinotFSSegmentUploader(_instanceDataManagerConfig.getSegmentStoreUri(),
 PinotFSSegmentUploader.DEFAULT_SEGMENT_UPLOAD_TIMEOUT_MILLIS);
 
+_externalViewDroppedMaxWaitMs = 
_instanceDataManagerConfig.getExternalViewDroppedMaxWaitMs();
+_externalViewDroppedCheckInternalMs = 
_instanceDataManagerConfig.getExternalViewDroppedCheckIntervalMs();
+
 File instanceDataDir = new 
File(_instanceDataManagerConfig.getInstanceDataDir());
 initInstanceDataDir(instanceDataDir);
 
@@ -232,7 +234,7 @@ public class HelixInstanceDataManager implements 
InstanceDataManager {
   public void deleteTable(String tableNameWithType)
   throws Exception {
 // Wait externalview to converge
-long endTimeMs = System.currentTimeMillis() + 
EXTERNAL_VIEW_DROPPED_MAX_WAIT_MS;
+long endTimeMs = System.currentTimeMillis() + 
_externalViewDroppedMaxWaitMs;
 do {
   ExternalView externalView = _helixManager.getHelixDataAccessor()
   
.getProperty(_helixManager.getHelixDataAccessor().keyBuilder().externalView(tableNameWithType));
@@ -249,7 +251,7 @@ public class HelixInstanceDataManager implements 
InstanceDataManager {
 });
 return;
   }
-  Thread.sleep(EXTERNAL_VIEW_CHECK_INTERVAL_MS);
+  Thread.sleep(_externalViewDroppedCheckInternalMs);
 } while (System.currentTimeMillis() < endTimeMs);
 throw new TimeoutException(
 "Timeout while waiting for ExternalView to converge for the table to 
delete: " + tableNameWithType);
diff --git 
a/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/HelixInstanceDataManagerConfig.java
 
b/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/HelixInstanceDataManagerConfig.java
index 1cc1e07609..8f31fe1b0b 100644
--- 
a/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/HelixInstanceDataManagerConfig.java
+++ 
b/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/HelixInstanceDataManagerConfig.java
@@ -128,10 +128,17 @@ public class HelixInstanceDataManagerConfig implements 
InstanceDataManagerConfig
   private static final String DELETED_SEGMENTS_CACHE_TTL_MINUTES = 
"table.deleted.segments.cache.ttl.minutes";
   private static final String PEER_DOWNLOAD_SCHEME = "peer.download.scheme";
 
+  // Check if the external view is dropped for a table, and if so, wait for 
the external

[pinot] branch master updated: Do not serialize metrics in each Operator (#10473)

2023-04-01 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new 941cbf84f9 Do not serialize metrics in each Operator (#10473)
941cbf84f9 is described below

commit 941cbf84f912d7cd47f86299c8f4652cd046e097
Author: Kartik Khare 
AuthorDate: Sat Apr 1 19:41:00 2023 +0530

Do not serialize metrics in each Operator (#10473)

* WIP: Do not serialize metrics

* No need to pass stats between operator. Only collected in the end at the 
send operator

* Use opchain stats to record operatorStats

* No need to serialie metrics in receive operator

* Remove attachStats method and create stats object inside context itself

* Make stats thread safe

* Add test for opchain stats

* Ensure SendOperator stats are populated before serializing stats

* Fix variable scope

* Use operator stats map directly from opchain stats

* unify return statements outside inner for loop in MailboxSendOperator

-

Co-authored-by: Kartik Khare 
---
 .../apache/pinot/query/runtime/QueryRunner.java|   6 +-
 .../runtime/executor/OpChainSchedulerService.java  |   1 -
 .../LeafStageTransferableBlockOperator.java|   3 +-
 .../runtime/operator/MailboxReceiveOperator.java   |   7 +-
 .../runtime/operator/MailboxSendOperator.java  |  12 +-
 .../query/runtime/operator/MultiStageOperator.java |  42 ++-
 .../pinot/query/runtime/operator/OpChain.java  |  11 +-
 .../pinot/query/runtime/operator/OpChainStats.java |  21 +++-
 .../query/runtime/operator/OperatorStats.java  |  14 +--
 .../runtime/operator/utils/OperatorUtils.java  |   4 +-
 .../runtime/plan/OpChainExecutionContext.java  |  14 +++
 .../query/service/dispatch/QueryDispatcher.java|  25 +++--
 .../executor/OpChainSchedulerServiceTest.java  |   6 +-
 .../runtime/executor/RoundRobinSchedulerTest.java  |  34 --
 .../runtime/operator/MailboxSendOperatorTest.java  |   2 +-
 .../pinot/query/runtime/operator/OpChainTest.java  | 124 +
 16 files changed, 237 insertions(+), 89 deletions(-)

diff --git 
a/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/QueryRunner.java
 
b/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/QueryRunner.java
index b0d0433f42..097b05b455 100644
--- 
a/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/QueryRunner.java
+++ 
b/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/QueryRunner.java
@@ -52,6 +52,7 @@ import 
org.apache.pinot.query.runtime.executor.OpChainSchedulerService;
 import org.apache.pinot.query.runtime.executor.RoundRobinScheduler;
 import 
org.apache.pinot.query.runtime.operator.LeafStageTransferableBlockOperator;
 import org.apache.pinot.query.runtime.operator.MailboxSendOperator;
+import org.apache.pinot.query.runtime.operator.MultiStageOperator;
 import org.apache.pinot.query.runtime.operator.OpChain;
 import org.apache.pinot.query.runtime.plan.DistributedStagePlan;
 import org.apache.pinot.query.runtime.plan.OpChainExecutionContext;
@@ -213,8 +214,9 @@ public class QueryRunner {
   OpChainExecutionContext opChainExecutionContext =
   new OpChainExecutionContext(_mailboxService, requestId, 
sendNode.getStageId(), _rootServer, deadlineMs,
   deadlineMs, distributedStagePlan.getMetadataMap());
-  mailboxSendOperator = new MailboxSendOperator(opChainExecutionContext,
-  new LeafStageTransferableBlockOperator(opChainExecutionContext, 
serverQueryResults, sendNode.getDataSchema()),
+  MultiStageOperator leafStageOperator =
+  new LeafStageTransferableBlockOperator(opChainExecutionContext, 
serverQueryResults, sendNode.getDataSchema());
+  mailboxSendOperator = new MailboxSendOperator(opChainExecutionContext, 
leafStageOperator,
   sendNode.getExchangeType(), sendNode.getPartitionKeySelector(), 
sendNode.getCollationKeys(),
   sendNode.getCollationDirections(), sendNode.isSortOnSender(), 
sendNode.getStageId(),
   sendNode.getReceiverStageId());
diff --git 
a/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/executor/OpChainSchedulerService.java
 
b/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/executor/OpChainSchedulerService.java
index 3706627349..1f29584dcc 100644
--- 
a/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/executor/OpChainSchedulerService.java
+++ 
b/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/executor/OpChainSchedulerService.java
@@ -111,7 +111,6 @@ public class OpChainSchedulerService extends 
AbstractExecutionThreadService {
 LOGGER.error("({}): Completed erroneously {} {}", 
operatorChain, operatorChain.getStats(),
 result.ge

[pinot] branch master updated: Index spi: all types (#10193)

2023-03-27 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new 1d02d0eb50 Index spi: all types (#10193)
1d02d0eb50 is described below

commit 1d02d0eb50da0cc32d08dfee3676f98c3cdbe8f4
Author: Gonzalo Ortiz Jaureguizar 
AuthorDate: Tue Mar 28 08:47:08 2023 +0200

Index spi: all types (#10193)

* Draft commit with the IndexType draft

* Draft commit with the IndexService draft

* Draft commit with simplified version of all index types

* Simplify IndexType with tons of UnsupportedOperationException

* Remove BloomIndexType to make this code compile

* Recover simplified BloomIndexType

* Fix an error on IndexService

* Add all types to StandardIndexes

* Fix two checkstyle issues

* supress unchecked cast warning in StandardIndexes.java

* Remove segment dir param from IndexReaderFactory.read

* Improve javadoc on IndexType.deserialize

* Declare a constant before attributes

* Remove unused @JsonIgnore annotation

* Remove IndexDeclaration. Use the single config deserialize method instead

* Improve BloomIndexType example

* remove alternativeSingleValue method from IndexCreator

* Change FieldIndexConfigs.Builder to do not accept null configs

* Rename add methods in IndexCreator as `add`

* Rename IndexType.deserialize as IndexType.getConfig

* Rename IndexReaderFactory.read as IndexReaderFactory.createIndexReader

* Remove copy of IndexHandler

* Move IndexHandler to org.apache.pinot.segment.spi.index from 
org.apache.pinot.segment.local.segment.index.loader

* Add FieldIndexConfigs.toString

* Add javadoc

* Remove indexName concept

* Update code to index-spi

* Update all index types to have implementation all required methods

* Apply some stetical changes recommended in the PR

* Improve javadoc

* Fix typo in javadoc

* Add some changes included in index-spi-all-types

* Add javadoc to StandardIndexes

* Add javadoc and make _allIndexes immutable

* Add javadoc on IndexPlugin

* Remove unused import

* Add more javadoc

* Define standard index ids in StandardIndexes

* Add a map from id to index types

* Add exit_criteria to TODO comment in OnHeapGuavaBloomFilterCreator

* Changed javadoc for IndexService

* Rename get and getOrThrow as getOptional or get

* Cache reader factory instance

* Make DEFAULT final class public

* Make IndexService private

* make constructor private

* add DEFAULT_RANGE_INDEX_VERSION

* add empty lines between class decl, contant and constructor
---
 .../local/segment/index/bloom/BloomIndexType.java  |   3 +-
 .../index/dictionary/DictionaryIndexPlugin.java|  32 +++
 .../index/dictionary/DictionaryIndexType.java  |  92 ++
 .../segment/index/forward/ForwardIndexPlugin.java  |  32 +++
 .../segment/index/forward/ForwardIndexType.java| 105 +
 .../local/segment/index/fst/FstIndexPlugin.java|  32 +++
 .../local/segment/index/fst/FstIndexType.java  |  92 ++
 .../local/segment/index/h3/H3IndexPlugin.java  |  32 +++
 .../local/segment/index/h3/H3IndexType.java|  93 ++
 .../index/inverted/InvertedIndexPlugin.java|  32 +++
 .../segment/index/inverted/InvertedIndexType.java  |  92 ++
 .../local/segment/index/json/JsonIndexPlugin.java  |  32 +++
 .../local/segment/index/json/JsonIndexType.java|  92 ++
 .../index/nullvalue/NullValueIndexPlugin.java  |  32 +++
 .../index/nullvalue/NullValueIndexType.java|  93 ++
 .../segment/index/range/RangeIndexPlugin.java  |  32 +++
 .../local/segment/index/range/RangeIndexType.java  |  99 +++
 .../local/segment/index/text/TextIndexPlugin.java  |  32 +++
 .../local/segment/index/text/TextIndexType.java|  93 ++
 .../pinot/segment/spi/index/StandardIndexes.java   |  53 ++-
 20 files changed, 1189 insertions(+), 6 deletions(-)

diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/bloom/BloomIndexType.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/bloom/BloomIndexType.java
index 4d8ae842e9..402f39b380 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/bloom/BloomIndexType.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/bloom/BloomIndexType.java
@@ -31,6 +31,7 @@ import

[pinot] branch master updated: Add support to create realtime segment in local (#10433)

2023-03-16 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new 16cae5d9c6 Add support to create realtime segment in local (#10433)
16cae5d9c6 is described below

commit 16cae5d9c630dd247fb037d4c0e7a18a8374178c
Author: Kartik Khare 
AuthorDate: Thu Mar 16 15:22:03 2023 +0530

Add support to create realtime segment in local (#10433)

Co-authored-by: Kartik Khare 
---
 .../pinot/tools/admin/command/CreateSegmentCommand.java  | 16 
 1 file changed, 16 insertions(+)

diff --git 
a/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/CreateSegmentCommand.java
 
b/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/CreateSegmentCommand.java
index 4263f4bbbe..48d27ab3ed 100644
--- 
a/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/CreateSegmentCommand.java
+++ 
b/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/CreateSegmentCommand.java
@@ -26,12 +26,14 @@ import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.Future;
 import org.apache.commons.io.FileUtils;
+import org.apache.pinot.common.utils.LLCSegmentName;
 import 
org.apache.pinot.segment.local.indexsegment.immutable.ImmutableSegmentLoader;
 import 
org.apache.pinot.segment.local.segment.creator.impl.SegmentIndexCreationDriverImpl;
 import org.apache.pinot.segment.spi.ImmutableSegment;
 import org.apache.pinot.segment.spi.creator.SegmentGeneratorConfig;
 import org.apache.pinot.segment.spi.creator.SegmentIndexCreationDriver;
 import org.apache.pinot.spi.config.table.TableConfig;
+import org.apache.pinot.spi.config.table.TableType;
 import org.apache.pinot.spi.data.Schema;
 import org.apache.pinot.spi.data.readers.FileFormat;
 import org.apache.pinot.spi.data.readers.RecordReaderConfig;
@@ -82,6 +84,10 @@ public class CreateSegmentCommand extends 
AbstractBaseAdminCommand implements Co
   description = "Option to fail the segment creation if output is an empty 
segment.")
   private boolean _failOnEmptySegment = false;
 
+  @CommandLine.Option(names = {"-realtimePartitionId"},
+  description = "If table is realtime, partition id to be used for segment 
name generation. Default is 0.")
+  private int _realtimePartitionId = 0;
+
   @CommandLine.Option(names = {"-postCreationVerification"},
   description = "Verify segment data file after segment creation. Please 
ensure you have enough local disk to"
   + " hold data for verification")
@@ -139,6 +145,11 @@ public class CreateSegmentCommand extends 
AbstractBaseAdminCommand implements Co
 return this;
   }
 
+  public CreateSegmentCommand setRealtimePartitionId(int realtimePartitionId) {
+_realtimePartitionId = realtimePartitionId;
+return this;
+  }
+
   public CreateSegmentCommand setPostCreationVerification(boolean 
postCreationVerification) {
 _postCreationVerification = postCreationVerification;
 return this;
@@ -209,6 +220,7 @@ public class CreateSegmentCommand extends 
AbstractBaseAdminCommand implements Co
 }
 LOGGER.info("Using table config: {}", tableConfig.toJsonString());
 String rawTableName = 
TableNameBuilder.extractRawTableName(tableConfig.getTableName());
+TableType tableType = tableConfig.getTableType();
 
 Preconditions.checkArgument(_schemaFile != null, "'schemaFile' must be 
specified");
 Schema schema;
@@ -246,6 +258,10 @@ public class CreateSegmentCommand extends 
AbstractBaseAdminCommand implements Co
 segmentGeneratorConfig.setReaderConfig(recordReaderConfig);
 segmentGeneratorConfig.setTableName(rawTableName);
 segmentGeneratorConfig.setSequenceId(sequenceId);
+if (tableType == TableType.REALTIME) {
+  segmentGeneratorConfig.setSegmentName(new 
LLCSegmentName(rawTableName, _realtimePartitionId, sequenceId,
+  System.currentTimeMillis()).getSegmentName());
+}
 segmentGeneratorConfig.setFailOnEmptySegment(_failOnEmptySegment);
 for (int j = 0; j <= _retry; j++) {
   try {


-
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org



[pinot] branch master updated (2e55d1c04d -> 4eeaf82185)

2023-03-07 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


from 2e55d1c04d Patch: Allow fetching comparable index from 
ComparisonColumns (#10388)
 add 4eeaf82185 Refactor Spark Connector into two modules for reusability 
(#10321)

No new revisions were added by this update.

Summary of changes:
 .../README.md  |   0
 ...pinot-connector-executor-server-interaction.jpg | Bin
 .../documentation/read_model.md|   0
 .../pom.xml| 139 +-
 ...org.apache.spark.sql.sources.DataSourceRegister |   0
 .../spark/datasource/PinotDataSourceReader.scala   |  12 +-
 .../spark/datasource/PinotDataSourceV2.scala   |   0
 .../spark/datasource/PinotInputPartition.scala |  16 +-
 .../spark/datasource/TypeConverter.scala}  |   6 +-
 .../spark/datasource/query}/FilterPushDown.scala   |   2 +-
 .../src/test/resources/schema/pinot-schema.json|   0
 .../src/test/resources/schema/spark-schema.json|   0
 .../connector/spark/datasource}/BaseTest.scala |   2 +-
 .../ExampleSparkPinotConnectorTest.scala   |   4 +-
 .../spark/datasource/TypeConverterTest.scala}  |  14 +-
 .../datasource/query}/FilterPushDownTest.scala |   4 +-
 .../pom.xml|  43 ++---
 .../spark/common/CaseInsensitiveStringMap.java | 201 +
 .../pinot/connector/spark/common}/HttpUtils.scala  |   3 +-
 .../pinot/connector/spark/common}/Logging.scala|   2 +-
 .../spark/common}/PinotClusterClient.scala |  26 ++-
 .../spark/common}/PinotDataSourceReadOptions.scala |  55 +++---
 .../pinot/connector/spark/common}/exceptions.scala |   2 +-
 .../pinot/connector/spark/common}/package.scala|   4 +-
 .../spark/common/partition}/PinotSplitter.scala|  20 +-
 .../connector/spark/common/query/ScanQuery.scala}  |   4 +-
 .../spark/common/query/ScanQueryGenerator.scala}   |  19 +-
 .../reader/PinotAbstractPartitionReader.scala  |  72 
 .../reader}/PinotGrpcServerDataFetcher.scala   |  13 +-
 .../common/reader}/PinotServerDataFetcher.scala|  17 +-
 .../src/test/resources/log4j2.xml  |   0
 .../pinot/connector/spark/common}/BaseTest.scala   |   2 +-
 .../common}/PinotDataSourceReadOptionsTest.scala   |  19 +-
 .../spark/common}/PinotSplitterTest.scala  |  24 +--
 .../common/query/ScanQueryGeneratorTest.scala} |  15 +-
 .../datasource/PinotInputPartitionReader.scala |  72 
 pinot-connectors/pom.xml   |   3 +-
 37 files changed, 431 insertions(+), 384 deletions(-)
 rename pinot-connectors/{pinot-spark-connector => 
pinot-spark-2-connector}/README.md (100%)
 rename pinot-connectors/{pinot-spark-connector => 
pinot-spark-2-connector}/documentation/images/spark-pinot-connector-executor-server-interaction.jpg
 (100%)
 rename pinot-connectors/{pinot-spark-connector => 
pinot-spark-2-connector}/documentation/read_model.md (100%)
 copy pinot-connectors/{pinot-spark-connector => 
pinot-spark-2-connector}/pom.xml (63%)
 rename pinot-connectors/{pinot-spark-connector => 
pinot-spark-2-connector}/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
 (100%)
 rename pinot-connectors/{pinot-spark-connector => 
pinot-spark-2-connector}/src/main/scala/org/apache/pinot/connector/spark/datasource/PinotDataSourceReader.scala
 (89%)
 rename pinot-connectors/{pinot-spark-connector => 
pinot-spark-2-connector}/src/main/scala/org/apache/pinot/connector/spark/datasource/PinotDataSourceV2.scala
 (100%)
 rename pinot-connectors/{pinot-spark-connector => 
pinot-spark-2-connector}/src/main/scala/org/apache/pinot/connector/spark/datasource/PinotInputPartition.scala
 (63%)
 rename 
pinot-connectors/{pinot-spark-connector/src/main/scala/org/apache/pinot/connector/spark/connector/PinotUtils.scala
 => 
pinot-spark-2-connector/src/main/scala/org/apache/pinot/connector/spark/datasource/TypeConverter.scala}
 (97%)
 rename 
pinot-connectors/{pinot-spark-connector/src/main/scala/org/apache/pinot/connector/spark/connector
 => 
pinot-spark-2-connector/src/main/scala/org/apache/pinot/connector/spark/datasource/query}/FilterPushDown.scala
 (98%)
 rename pinot-connectors/{pinot-spark-connector => 
pinot-spark-2-connector}/src/test/resources/schema/pinot-schema.json (100%)
 rename pinot-connectors/{pinot-spark-connector => 
pinot-spark-2-connector}/src/test/resources/schema/spark-schema.json (100%)
 copy 
pinot-connectors/{pinot-spark-connector/src/test/scala/org/apache/pinot/connector/spark
 => 
pinot-spark-2-connector/src/test/scala/org/apache/pinot/connector/spark/datasource}/BaseTest.scala
 (94%)
 rename 
pinot-connectors/{pinot-spark-connector/src/test/scala/org/apache/pinot/connector/spark
 => 
pinot-spark-2-connector

[pinot] branch master updated: Patch: Allow fetching comparable index from ComparisonColumns (#10388)

2023-03-07 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new 2e55d1c04d Patch: Allow fetching comparable index from 
ComparisonColumns (#10388)
2e55d1c04d is described below

commit 2e55d1c04dc66da80d1bc72f347952d9cc69797a
Author: Kartik Khare 
AuthorDate: Tue Mar 7 19:57:18 2023 +0530

Patch: Allow fetching comparable index from ComparisonColumns (#10388)

Co-authored-by: Kartik Khare 
---
 .../java/org/apache/pinot/segment/local/upsert/ComparisonColumns.java | 4 
 1 file changed, 4 insertions(+)

diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/ComparisonColumns.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/ComparisonColumns.java
index de223f27d3..5d40e5a350 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/ComparisonColumns.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/ComparisonColumns.java
@@ -33,6 +33,10 @@ public class ComparisonColumns implements 
Comparable {
 return _values;
   }
 
+  public int getComparableIndex() {
+return _comparableIndex;
+  }
+
   @Override
   public int compareTo(ComparisonColumns other) {
 // _comparisonColumns should only at most one non-null comparison value. 
If not, it is the user's responsibility.


-
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org



[pinot] branch master updated: Add operator level stats to response when tracing is enabled (#10364)

2023-03-05 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new f28525b417 Add operator level stats to response when tracing is 
enabled (#10364)
f28525b417 is described below

commit f28525b4176b39c45bf326bd8ecc6234cee1e027
Author: Kartik Khare 
AuthorDate: Mon Mar 6 12:05:42 2023 +0530

Add operator level stats to response when tracing is enabled (#10364)

* Add operator level stats to response when tracing is enabled

* Add tests for operatorStats on tracing
---
 .../MultiStageBrokerRequestHandler.java|  6 -
 .../response/broker/BrokerResponseNativeV2.java|  2 +-
 .../response/broker/BrokerResponseStats.java   | 18 +++--
 .../api/resources/PinotQueryResource.java  |  6 ++---
 .../query/reduce/ExecutionStatsAggregator.java | 12 ++---
 .../pinot/query/runtime/QueryRunnerTestBase.java   |  4 +--
 .../runtime/queries/ResourceBasedQueriesTest.java  | 30 ++
 7 files changed, 54 insertions(+), 24 deletions(-)

diff --git 
a/pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/MultiStageBrokerRequestHandler.java
 
b/pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/MultiStageBrokerRequestHandler.java
index 17f018f047..9ebc6ea6b4 100644
--- 
a/pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/MultiStageBrokerRequestHandler.java
+++ 
b/pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/MultiStageBrokerRequestHandler.java
@@ -169,10 +169,14 @@ public class MultiStageBrokerRequestHandler extends 
BaseBrokerRequestHandler {
   return new 
BrokerResponseNative(QueryException.getException(QueryException.SQL_PARSING_ERROR,
 e));
 }
 
+boolean traceEnabled = Boolean.parseBoolean(
+request.has(CommonConstants.Broker.Request.TRACE) ? 
request.get(CommonConstants.Broker.Request.TRACE).asText()
+: "false");
+
 ResultTable queryResults;
 Map stageIdStatsMap = new HashMap<>();
 for (Integer stageId: queryPlan.getStageMetadataMap().keySet()) {
-  stageIdStatsMap.put(stageId, new ExecutionStatsAggregator(false));
+  stageIdStatsMap.put(stageId, new ExecutionStatsAggregator(traceEnabled));
 }
 
 try {
diff --git 
a/pinot-common/src/main/java/org/apache/pinot/common/response/broker/BrokerResponseNativeV2.java
 
b/pinot-common/src/main/java/org/apache/pinot/common/response/broker/BrokerResponseNativeV2.java
index 79605773d7..5bf631e129 100644
--- 
a/pinot-common/src/main/java/org/apache/pinot/common/response/broker/BrokerResponseNativeV2.java
+++ 
b/pinot-common/src/main/java/org/apache/pinot/common/response/broker/BrokerResponseNativeV2.java
@@ -81,7 +81,7 @@ public class BrokerResponseNativeV2 extends 
BrokerResponseNative {
   }
 
   public void addStageStat(Integer stageId, BrokerResponseStats 
brokerResponseStats) {
-if (!brokerResponseStats.getOperatorIds().isEmpty()) {
+if (!brokerResponseStats.getOperatorStats().isEmpty()) {
   _stageIdStats.put(stageId, brokerResponseStats);
 }
   }
diff --git 
a/pinot-common/src/main/java/org/apache/pinot/common/response/broker/BrokerResponseStats.java
 
b/pinot-common/src/main/java/org/apache/pinot/common/response/broker/BrokerResponseStats.java
index 60d7ab4813..83cbd16f3c 100644
--- 
a/pinot-common/src/main/java/org/apache/pinot/common/response/broker/BrokerResponseStats.java
+++ 
b/pinot-common/src/main/java/org/apache/pinot/common/response/broker/BrokerResponseStats.java
@@ -23,7 +23,9 @@ import com.fasterxml.jackson.annotation.JsonProperty;
 import com.fasterxml.jackson.annotation.JsonPropertyOrder;
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 import org.apache.pinot.spi.utils.JsonUtils;
 
 
@@ -38,14 +40,14 @@ import org.apache.pinot.spi.utils.JsonUtils;
 "totalDocs", "timeUsedMs", "offlineThreadCpuTimeNs", 
"realtimeThreadCpuTimeNs",
 "offlineSystemActivitiesCpuTimeNs", "realtimeSystemActivitiesCpuTimeNs", 
"offlineResponseSerializationCpuTimeNs",
 "realtimeResponseSerializationCpuTimeNs", "offlineTotalCpuTimeNs", 
"realtimeTotalCpuTimeNs",
-"traceInfo", "operatorIds", "tableNames"})
+"traceInfo", "operatorStats", "tableNames"})
 @JsonInclude(JsonInclude.Include.NON_DEFAULT)
 public class BrokerResponseStats extends BrokerResponseNative {
 
   private int _numBlocks = 0;
   private int _numRows = 0;
   private long _stageExecutionTimeMs = 0;
-  private List _operatorIds = new ArrayList<>();
+  private Map> _operatorStats = new HashMap<>();
   private List _tableNames = new Arra

[pinot] branch master updated: Add Statistics grouped at Stage ID level in the V2 Engine Response (#10337)

2023-02-28 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new c5dd6df27c Add Statistics grouped at Stage ID level in the V2 Engine 
Response (#10337)
c5dd6df27c is described below

commit c5dd6df27c384b523c77adcb66956bf0e5c37251
Author: Kartik Khare 
AuthorDate: Tue Feb 28 15:49:46 2023 +0530

Add Statistics grouped at Stage ID level in the V2 Engine Response (#10337)

* WIP: aggregate stats on stage level for response

* Make response backward compatible

* Add new metadata keys to enum and replace hardcoded values; also add 
table names to the stats

* Rename operatorExecutionTime to stageExecutionTime for correct 
understanding

* Remove sysout

* Remove duplicate code inside BrokerResponseStats class

* Remove unused constants from OperatorUtils and fix formatting

* Add test for stage level stats as well as BrokerResponseNativeV2

* Add followup TODOs and move method to utils class

-

Co-authored-by: Kartik Khare 
---
 .../MultiStageBrokerRequestHandler.java|  36 ++-
 .../apache/pinot/common/datatable/DataTable.java   |  15 ++-
 .../response/broker/BrokerResponseNativeV2.java|  93 +
 .../response/broker/BrokerResponseStats.java   | 110 +
 .../query/reduce/ExecutionStatsAggregator.java |  76 --
 .../query/runtime/operator/MultiStageOperator.java |   2 +
 .../query/runtime/operator/OperatorStats.java  |  15 ++-
 .../runtime/operator/utils/OperatorUtils.java  |  14 ++-
 .../pinot/query/service/QueryDispatcher.java   |  20 ++--
 .../pinot/query/runtime/QueryRunnerTestBase.java   |   7 +-
 .../runtime/queries/ResourceBasedQueriesTest.java  |  56 ---
 11 files changed, 395 insertions(+), 49 deletions(-)

diff --git 
a/pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/MultiStageBrokerRequestHandler.java
 
b/pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/MultiStageBrokerRequestHandler.java
index 63c1e8f9ad..e2dae3b75f 100644
--- 
a/pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/MultiStageBrokerRequestHandler.java
+++ 
b/pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/MultiStageBrokerRequestHandler.java
@@ -20,6 +20,7 @@ package org.apache.pinot.broker.requesthandler;
 
 import com.fasterxml.jackson.databind.JsonNode;
 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.concurrent.TimeUnit;
@@ -37,6 +38,8 @@ import org.apache.pinot.common.metrics.BrokerMetrics;
 import org.apache.pinot.common.request.BrokerRequest;
 import org.apache.pinot.common.response.BrokerResponse;
 import org.apache.pinot.common.response.broker.BrokerResponseNative;
+import org.apache.pinot.common.response.broker.BrokerResponseNativeV2;
+import org.apache.pinot.common.response.broker.BrokerResponseStats;
 import org.apache.pinot.common.response.broker.ResultTable;
 import org.apache.pinot.common.utils.DataSchema;
 import org.apache.pinot.common.utils.config.QueryOptionsUtils;
@@ -58,6 +61,7 @@ import org.apache.pinot.spi.env.PinotConfiguration;
 import org.apache.pinot.spi.exception.BadQueryRequestException;
 import org.apache.pinot.spi.trace.RequestContext;
 import org.apache.pinot.spi.utils.CommonConstants;
+import org.apache.pinot.spi.utils.builder.TableNameBuilder;
 import org.apache.pinot.sql.parsers.SqlNodeAndOptions;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -133,7 +137,7 @@ public class MultiStageBrokerRequestHandler extends 
BaseBrokerRequestHandler {
 return handleRequest(requestId, query, sqlNodeAndOptions, request, 
requesterIdentity, requestContext);
   }
 
-  private BrokerResponseNative handleRequest(long requestId, String query,
+  private BrokerResponse handleRequest(long requestId, String query,
   @Nullable SqlNodeAndOptions sqlNodeAndOptions, JsonNode request, 
@Nullable RequesterIdentity requesterIdentity,
   RequestContext requestContext)
   throws Exception {
@@ -166,16 +170,20 @@ public class MultiStageBrokerRequestHandler extends 
BaseBrokerRequestHandler {
 }
 
 ResultTable queryResults;
-ExecutionStatsAggregator executionStatsAggregator = new 
ExecutionStatsAggregator(false);
+Map stageIdStatsMap = new HashMap<>();
+for (Integer stageId: queryPlan.getStageMetadataMap().keySet()) {
+  stageIdStatsMap.put(stageId, new ExecutionStatsAggregator(false));
+}
+
 try {
   queryResults = _queryDispatcher.submitAndReduce(requestId, queryPlan, 
_mailboxService, queryTimeoutMs,
-  sqlNodeAndOptions.getOptions(), executionStatsAggregator);
+  sqlNodeAndOptions.getOptions(), stageIdStatsMap);
 } catch (Exc

[pinot] branch master updated: Minor: Limit MultiStageQuickstart output (#10307)

2023-02-20 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new 442bfc3af1 Minor: Limit MultiStageQuickstart output (#10307)
442bfc3af1 is described below

commit 442bfc3af1bdae230fda6aa5112f028e7620c5b8
Author: Kartik Khare 
AuthorDate: Mon Feb 20 15:58:21 2023 +0530

Minor: Limit MultiStageQuickstart output (#10307)

Co-authored-by: Kartik Khare 
---
 .../java/org/apache/pinot/tools/MultistageEngineQuickStart.java | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git 
a/pinot-tools/src/main/java/org/apache/pinot/tools/MultistageEngineQuickStart.java
 
b/pinot-tools/src/main/java/org/apache/pinot/tools/MultistageEngineQuickStart.java
index fc563d9970..690e0b 100644
--- 
a/pinot-tools/src/main/java/org/apache/pinot/tools/MultistageEngineQuickStart.java
+++ 
b/pinot-tools/src/main/java/org/apache/pinot/tools/MultistageEngineQuickStart.java
@@ -57,7 +57,7 @@ public class MultistageEngineQuickStart extends Quickstart {
 printStatus(Quickstart.Color.YELLOW, "* Multi-stage engine quickstart 
setup complete *");
 Map queryOptions = Collections.singletonMap("queryOptions",
 CommonConstants.Broker.Request.QueryOptionKey.USE_MULTISTAGE_ENGINE + 
"=true");
-String q1 = "SELECT count(*) FROM baseballStats_OFFLINE";
+String q1 = "SELECT count(*) FROM baseballStats_OFFLINE LIMIT 10";
 printStatus(Quickstart.Color.YELLOW, "Total number of documents in the 
table");
 printStatus(Quickstart.Color.CYAN, "Query : " + q1);
 printStatus(Quickstart.Color.YELLOW, 
prettyPrintResponse(runner.runQuery(q1, queryOptions)));
@@ -65,7 +65,7 @@ public class MultistageEngineQuickStart extends Quickstart {
 
 String q2 = "SELECT a.playerID, a.runs, a.yearID, b.runs, b.yearID"
 + " FROM baseballStats_OFFLINE AS a JOIN baseballStats_OFFLINE AS b ON 
a.playerID = b.playerID"
-+ " WHERE a.runs > 160 AND b.runs < 2";
++ " WHERE a.runs > 160 AND b.runs < 2 LIMIT 10";
 printStatus(Quickstart.Color.YELLOW, "Correlate the same player(s) with 
more than 160-run some year(s) and"
 + " with less than 2-run some other year(s)");
 printStatus(Quickstart.Color.CYAN, "Query : " + q2);
@@ -75,7 +75,7 @@ public class MultistageEngineQuickStart extends Quickstart {
 String q3 = "SELECT a.playerName, a.teamID, b.teamName \n"
 + "FROM baseballStats_OFFLINE AS a\n"
 + "JOIN dimBaseballTeams_OFFLINE AS b\n"
-+ "ON a.teamID = b.teamID";
++ "ON a.teamID = b.teamID LIMIT 10";
 printStatus(Quickstart.Color.YELLOW, "Baseball Stats with joined team 
names");
 printStatus(Quickstart.Color.CYAN, "Query : " + q3);
 printStatus(Quickstart.Color.YELLOW, 
prettyPrintResponse(runner.runQuery(q3, queryOptions)));


-
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org



[pinot] branch master updated: Handle interrupted exception during Kinesis rate limit (#10297)

2023-02-17 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new 4e0afef532 Handle interrupted exception during Kinesis rate limit 
(#10297)
4e0afef532 is described below

commit 4e0afef5320c799e3a4083bb0594de1427ac70da
Author: Kartik Khare 
AuthorDate: Fri Feb 17 15:42:10 2023 +0530

Handle interrupted exception during Kinesis rate limit (#10297)
---
 .../org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java| 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git 
a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
 
b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
index 3ded54ec8f..0fd889d1d4 100644
--- 
a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
+++ 
b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
@@ -173,7 +173,12 @@ public class KinesisConsumer extends 
KinesisConnectionHandler implements Partiti
 }
 
 if (currentWindowRequests >= _rpsLimit) {
-  Thread.sleep(SLEEP_TIME_BETWEEN_REQUESTS);
+  try {
+Thread.sleep(SLEEP_TIME_BETWEEN_REQUESTS);
+  } catch (InterruptedException e) {
+LOGGER.debug("Sleep interrupted while rate limiting Kinesis 
requests", e);
+break;
+  }
 }
   }
 


-
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org



[pinot] branch master updated (2f345da102 -> a0162559b5)

2023-02-15 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


from 2f345da102 Handle big In clause (#10254)
 add a0162559b5 Add broker metrics to V2 Engine (#10214)

No new revisions were added by this update.

Summary of changes:
 .../MultiStageBrokerRequestHandler.java|  52 ++-
 .../pinot/common/datablock/DataBlockUtils.java |   5 +
 .../pinot/common/datablock/MetadataBlock.java  |  28 +++-
 .../pinot/common/datablock/MetadataBlockTest.java  |   3 +-
 .../pinot/query/mailbox/GrpcReceivingMailbox.java  |   6 +-
 .../channel/MailboxContentStreamObserver.java  |   1 +
 .../query/runtime/blocks/TransferableBlock.java|  13 +-
 .../runtime/blocks/TransferableBlockUtils.java |   4 +
 .../runtime/executor/OpChainSchedulerService.java  |   1 +
 .../query/runtime/operator/AggregateOperator.java  |   1 +
 .../query/runtime/operator/HashJoinOperator.java   |   7 +
 .../LeafStageTransferableBlockOperator.java|   3 +
 .../runtime/operator/MailboxReceiveOperator.java   |   8 +-
 .../query/runtime/operator/MultiStageOperator.java |  30 +++-
 .../pinot/query/runtime/operator/OpChainStats.java |  11 ++
 .../query/runtime/operator/OperatorStats.java  |  29 +++-
 .../query/runtime/operator/TransformOperator.java  |   6 +-
 .../runtime/operator/utils/OperatorUtils.java  |  66 +++-
 .../runtime/operator/utils/StatsAggregator.java| 170 +
 .../pinot/query/service/QueryDispatcher.java   |  51 ++-
 20 files changed, 470 insertions(+), 25 deletions(-)
 create mode 100644 
pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/utils/StatsAggregator.java


-
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org



[pinot] branch master updated (f004683cba -> f53bdf8ce8)

2023-01-23 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


from f004683cba Push bytes comparison down into ValueReader (#10161)
 add f53bdf8ce8 Make a system property to call system.exit() for 
LaunchDataIngestionJobCommand (#10155)

No new revisions were added by this update.

Summary of changes:
 .../pinot/tools/admin/command/LaunchDataIngestionJobCommand.java | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)


-
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org



[pinot] branch master updated: Fix local segment tar copy (#10048)

2023-01-03 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new c50cef11a7 Fix local segment tar copy (#10048)
c50cef11a7 is described below

commit c50cef11a79065acb6384336f9b7fb47524cf953
Author: Saurabh Dubey 
AuthorDate: Wed Jan 4 10:44:08 2023 +0530

Fix local segment tar copy (#10048)

Co-authored-by: Saurabh Dubey 
---
 .../java/org/apache/pinot/segment/local/utils/SegmentPushUtils.java| 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/SegmentPushUtils.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/SegmentPushUtils.java
index c64d9a1135..92832cf889 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/SegmentPushUtils.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/SegmentPushUtils.java
@@ -24,6 +24,7 @@ import java.io.InputStream;
 import java.io.Serializable;
 import java.net.URI;
 import java.net.URISyntaxException;
+import java.net.URLDecoder;
 import java.nio.file.FileSystems;
 import java.nio.file.PathMatcher;
 import java.nio.file.Paths;
@@ -384,7 +385,7 @@ public class SegmentPushUtils implements Serializable {
 try {
   if (fileSystem instanceof LocalPinotFS) {
 // For local file system, we don't need to copy the tar file.
-tarFile = new File(tarFileURI);
+tarFile = new File(URLDecoder.decode(tarFileURI.getRawPath(), 
"UTF-8"));
   } else {
 // For other file systems, we need to download the file to local file 
system
 fileSystem.copyToLocalFile(tarFileURI, tarFile);


-
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org



[pinot] branch master updated: Handle in-memory segment metadata for index checking (#10017)

2022-12-21 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new 5dc60be5d5 Handle in-memory segment metadata for index checking 
(#10017)
5dc60be5d5 is described below

commit 5dc60be5d522a478721ef082105fbfe41bdfbe94
Author: Xiaotian (Jackie) Jiang <1751+jackie-ji...@users.noreply.github.com>
AuthorDate: Wed Dec 21 01:13:13 2022 -0800

Handle in-memory segment metadata for index checking (#10017)
---
 .../index/loader/columnminmaxvalue/ColumnMinMaxValueGenerator.java | 7 +--
 .../index/loader/defaultcolumn/BaseDefaultColumnHandler.java   | 6 --
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/columnminmaxvalue/ColumnMinMaxValueGenerator.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/columnminmaxvalue/ColumnMinMaxValueGenerator.java
index b426f8ade9..7f3c97c16d 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/columnminmaxvalue/ColumnMinMaxValueGenerator.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/columnminmaxvalue/ColumnMinMaxValueGenerator.java
@@ -42,16 +42,18 @@ import static org.apache.pinot.spi.data.FieldSpec.DataType;
 
 public class ColumnMinMaxValueGenerator {
   private final SegmentMetadata _segmentMetadata;
-  private final PropertiesConfiguration _segmentProperties;
   private final SegmentDirectory.Writer _segmentWriter;
   private final ColumnMinMaxValueGeneratorMode _columnMinMaxValueGeneratorMode;
 
+  // NOTE: _segmentProperties shouldn't be used when checking whether min/max 
value need to be generated because at that
+  //   time _segmentMetadata might not be loaded from a local file
+  private PropertiesConfiguration _segmentProperties;
+
   private boolean _minMaxValueAdded;
 
   public ColumnMinMaxValueGenerator(SegmentMetadata segmentMetadata, 
SegmentDirectory.Writer segmentWriter,
   ColumnMinMaxValueGeneratorMode columnMinMaxValueGeneratorMode) {
 _segmentMetadata = segmentMetadata;
-_segmentProperties = 
SegmentMetadataUtils.getPropertiesConfiguration(segmentMetadata);
 _segmentWriter = segmentWriter;
 _columnMinMaxValueGeneratorMode = columnMinMaxValueGeneratorMode;
   }
@@ -68,6 +70,7 @@ public class ColumnMinMaxValueGenerator {
   public void addColumnMinMaxValue()
   throws Exception {
 Preconditions.checkState(_columnMinMaxValueGeneratorMode != 
ColumnMinMaxValueGeneratorMode.NONE);
+_segmentProperties = 
SegmentMetadataUtils.getPropertiesConfiguration(_segmentMetadata);
 for (String column : getColumnsToAddMinMaxValue()) {
   addColumnMinMaxValueForColumn(column);
 }
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/BaseDefaultColumnHandler.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/BaseDefaultColumnHandler.java
index 9e3bb38449..5d647cd335 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/BaseDefaultColumnHandler.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/BaseDefaultColumnHandler.java
@@ -120,7 +120,9 @@ public abstract class BaseDefaultColumnHandler implements 
DefaultColumnHandler {
   protected final Schema _schema;
   protected final SegmentDirectory.Writer _segmentWriter;
 
-  private final PropertiesConfiguration _segmentProperties;
+  // NOTE: _segmentProperties shouldn't be used when checking whether default 
column need to be created because at that
+  //   time _segmentMetadata might not be loaded from a local file
+  private PropertiesConfiguration _segmentProperties;
 
   protected BaseDefaultColumnHandler(File indexDir, SegmentMetadata 
segmentMetadata,
   IndexLoadingConfig indexLoadingConfig, Schema schema, 
SegmentDirectory.Writer segmentWriter) {
@@ -129,7 +131,6 @@ public abstract class BaseDefaultColumnHandler implements 
DefaultColumnHandler {
 _indexLoadingConfig = indexLoadingConfig;
 _schema = schema;
 _segmentWriter = segmentWriter;
-_segmentProperties = 
SegmentMetadataUtils.getPropertiesConfiguration(segmentMetadata);
   }
 
   @Override
@@ -151,6 +152,7 @@ public abstract class BaseDefaultColumnHandler implements 
DefaultColumnHandler {
 }
 
 // Update each default column based on the default column action.
+_segmentProperties = 
SegmentMetadataUtils.getPropertiesConfiguration(_segmentMetadata);
 Iterator> entryIterator = 
defaultColumnActionMap.entrySet().iterator();
 while (entryIterator.hasNext()) {
   

[pinot] branch master updated: Add capability to update and delete table config via CLI (#9852)

2022-12-07 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new bb83f5c7e2 Add capability to update and delete table config via CLI 
(#9852)
bb83f5c7e2 is described below

commit bb83f5c7e2bd12d4d028bbf12df19cd34742cad5
Author: Kartik Khare 
AuthorDate: Wed Dec 7 15:03:40 2022 +0530

Add capability to update and delete table config via CLI (#9852)

* Add capability top update and delete table config via CLI

* Add old command for backward compatibility

* Only use update command

* revert linting

Co-authored-by: Kartik Khare 
---
 .../pinot/tools/admin/command/AddTableCommand.java| 19 ++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git 
a/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/AddTableCommand.java
 
b/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/AddTableCommand.java
index 707b329c65..a9950cdf4f 100644
--- 
a/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/AddTableCommand.java
+++ 
b/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/AddTableCommand.java
@@ -75,6 +75,10 @@ public class AddTableCommand extends 
AbstractBaseAdminCommand implements Command
   @CommandLine.Option(names = {"-controllerProtocol"}, required = false, 
description = "protocol for controller.")
   private String _controllerProtocol = CommonConstants.HTTP_PROTOCOL;
 
+  @CommandLine.Option(names = {"-update"}, required = false,
+  description = "Update the existing table instead of creating new one")
+  private boolean _update = false;
+
   @CommandLine.Option(names = {"-exec"}, required = false, description = 
"Execute the command.")
   private boolean _exec;
 
@@ -191,6 +195,15 @@ public class AddTableCommand extends 
AbstractBaseAdminCommand implements Command
 return res.contains("successfully added");
   }
 
+  public boolean sendTableUpdateRequest(JsonNode node, String tableName)
+  throws IOException {
+String res = AbstractBaseAdminCommand.sendRequest("PUT",
+
ControllerRequestURLBuilder.baseUrl(_controllerAddress).forTableConfigsUpdate(tableName),
 node.toString(),
+makeAuthHeaders(makeAuthProvider(_authProvider, _authTokenUrl, 
_authToken, _user, _password)));
+LOGGER.info(res);
+return res.contains("TableConfigs updated");
+  }
+
   @Override
   public boolean execute()
   throws Exception {
@@ -239,7 +252,11 @@ public class AddTableCommand extends 
AbstractBaseAdminCommand implements Command
 "Failed reading schema " + _schemaFile);
 TableConfigs tableConfigs = new TableConfigs(rawTableName, schema, 
offlineTableConfig, realtimeTableConfig);
 
-return sendTableCreationRequest(JsonUtils.objectToJsonNode(tableConfigs));
+if (_update) {
+  return sendTableUpdateRequest(tableConfigs.toJsonNode(), rawTableName);
+} else {
+  return sendTableCreationRequest(tableConfigs.toJsonNode());
+}
   }
 
   private static  T attempt(Callable callable, String errorMessage) {


-
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org



[pinot] branch master updated: Allow uploading realtime segments via CLI (#9861)

2022-12-07 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new 66ed9b7167 Allow uploading realtime segments via CLI (#9861)
66ed9b7167 is described below

commit 66ed9b7167241da15c171d667006a3f00a13734c
Author: Kartik Khare 
AuthorDate: Wed Dec 7 15:03:25 2022 +0530

Allow uploading realtime segments via CLI (#9861)

* Allow uploading realtime segments via CLI

* use tabletype as seperate param

* Allow setting table type

Co-authored-by: Kartik Khare 
---
 .../tools/admin/command/UploadSegmentCommand.java  | 32 --
 1 file changed, 23 insertions(+), 9 deletions(-)

diff --git 
a/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/UploadSegmentCommand.java
 
b/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/UploadSegmentCommand.java
index 535bc1e958..c721fc3e01 100644
--- 
a/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/UploadSegmentCommand.java
+++ 
b/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/UploadSegmentCommand.java
@@ -20,14 +20,15 @@ package org.apache.pinot.tools.admin.command;
 
 import com.google.common.base.Preconditions;
 import java.io.File;
+import java.io.FileInputStream;
 import java.net.URI;
-import java.util.Collections;
+import java.util.List;
 import org.apache.commons.io.FileUtils;
-import org.apache.http.message.BasicNameValuePair;
+import org.apache.http.Header;
 import org.apache.pinot.common.utils.FileUploadDownloadClient;
 import org.apache.pinot.common.utils.TarGzCompressionUtils;
-import org.apache.pinot.common.utils.http.HttpClient;
 import org.apache.pinot.spi.auth.AuthProvider;
+import org.apache.pinot.spi.config.table.TableType;
 import org.apache.pinot.spi.utils.CommonConstants;
 import org.apache.pinot.spi.utils.NetUtils;
 import org.apache.pinot.tools.Command;
@@ -70,10 +71,13 @@ public class UploadSegmentCommand extends 
AbstractBaseAdminCommand implements Co
   @CommandLine.Option(names = {"-segmentDir"}, required = true, description = 
"Path to segment directory.")
   private String _segmentDir = null;
 
-  // TODO: make this as a required field once we deprecate the table name from 
segment metadata
-  @CommandLine.Option(names = {"-tableName"}, required = false, description = 
"Table name to upload.")
+  @CommandLine.Option(names = {"-tableName"}, required = false, description = 
"Table name to upload")
   private String _tableName = null;
 
+  @CommandLine.Option(names = {"-tableType"}, required = false,
+  description = "Table type to upload. Can be OFFLINE or REALTIME")
+  private TableType _tableType = TableType.OFFLINE;
+
   @CommandLine.Option(names = {"-help", "-h", "--h", "--help"}, required = 
false, help = true,
   description = "Print this message.")
   private boolean _help = false;
@@ -140,6 +144,15 @@ public class UploadSegmentCommand extends 
AbstractBaseAdminCommand implements Co
 return this;
   }
 
+  public UploadSegmentCommand setTableName(String tableName) {
+_tableName = tableName;
+return this;
+  }
+
+  public void setTableType(TableType tableType) {
+_tableType = tableType;
+  }
+
   @Override
   public boolean execute()
   throws Exception {
@@ -173,10 +186,11 @@ public class UploadSegmentCommand extends 
AbstractBaseAdminCommand implements Co
 }
 
 LOGGER.info("Uploading segment tar file: {}", segmentTarFile);
-fileUploadDownloadClient.uploadSegment(uploadSegmentHttpURI, 
segmentTarFile.getName(), segmentTarFile,
-makeAuthHeaders(makeAuthProvider(_authProvider, _authTokenUrl, 
_authToken, _user, _password)),
-Collections.singletonList(new 
BasicNameValuePair(FileUploadDownloadClient.QueryParameters.TABLE_NAME,
-_tableName)), HttpClient.DEFAULT_SOCKET_TIMEOUT_MS);
+List headerList = makeAuthHeaders(_authProvider);
+
+FileInputStream fileInputStream = new FileInputStream(segmentTarFile);
+fileUploadDownloadClient.uploadSegment(uploadSegmentHttpURI, 
segmentTarFile.getName(),
+fileInputStream, headerList, null, _tableName, _tableType);
   }
 } finally {
   // Delete the temporary working directory.


-
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org



[pinot] branch master updated: Add rate limit to Kinesis requests (#9863)

2022-12-06 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new 76c649258c Add rate limit to Kinesis requests (#9863)
76c649258c is described below

commit 76c649258c625d431a42ff1fbc1b3003fe013066
Author: Kartik Khare 
AuthorDate: Wed Dec 7 12:38:33 2022 +0530

Add rate limit to Kinesis requests (#9863)

* Add rate limit to Kinesis requests

* Throw warning on zero RPS and rate limit all non-empty requests as well

Co-authored-by: Kartik Khare 
---
 .../pinot/plugin/stream/kinesis/KinesisConfig.java | 20 
 .../plugin/stream/kinesis/KinesisConsumer.java | 22 +-
 2 files changed, 41 insertions(+), 1 deletion(-)

diff --git 
a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConfig.java
 
b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConfig.java
index d8a3795a2a..47adf78c20 100644
--- 
a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConfig.java
+++ 
b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConfig.java
@@ -23,6 +23,8 @@ import com.google.common.base.Preconditions;
 import java.util.Map;
 import java.util.UUID;
 import org.apache.pinot.spi.stream.StreamConfig;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 import software.amazon.awssdk.services.kinesis.model.ShardIteratorType;
 
 
@@ -30,6 +32,8 @@ import 
software.amazon.awssdk.services.kinesis.model.ShardIteratorType;
  * Kinesis stream specific config
  */
 public class KinesisConfig {
+  private static final Logger LOGGER = 
LoggerFactory.getLogger(KinesisConfig.class);
+
   public static final String STREAM_TYPE = "kinesis";
   public static final String SHARD_ITERATOR_TYPE = "shardIteratorType";
   public static final String REGION = "region";
@@ -37,6 +41,8 @@ public class KinesisConfig {
   public static final String SECRET_KEY = "secretKey";
   public static final String MAX_RECORDS_TO_FETCH = "maxRecordsToFetch";
   public static final String ENDPOINT = "endpoint";
+  public static final String RPS_LIMIT = "requests_per_second_limit";
+
 
   // IAM role configs
   /**
@@ -64,6 +70,7 @@ public class KinesisConfig {
   public static final String DEFAULT_IAM_ROLE_BASED_ACCESS_ENABLED = "false";
   public static final String DEFAULT_SESSION_DURATION_SECONDS = "900";
   public static final String DEFAULT_ASYNC_SESSION_UPDATED_ENABLED = "true";
+  public static final String DEFAULT_RPS_LIMIT = "5";
 
   private final String _streamTopicName;
   private final String _awsRegion;
@@ -80,6 +87,7 @@ public class KinesisConfig {
   private String _externalId;
   private int _sessionDurationSeconds;
   private boolean _asyncSessionUpdateEnabled;
+  private int _rpsLimit;
 
   public KinesisConfig(StreamConfig streamConfig) {
 Map props = streamConfig.getStreamConfigsMap();
@@ -88,6 +96,14 @@ public class KinesisConfig {
 Preconditions.checkNotNull(_awsRegion, "Must provide 'region' in stream 
config for table: %s",
 streamConfig.getTableNameWithType());
 _numMaxRecordsToFetch = 
Integer.parseInt(props.getOrDefault(MAX_RECORDS_TO_FETCH, DEFAULT_MAX_RECORDS));
+_rpsLimit = Integer.parseInt(props.getOrDefault(RPS_LIMIT, 
DEFAULT_RPS_LIMIT));
+
+if (_rpsLimit <= 0) {
+  LOGGER.warn("Invalid 'requests_per_second_limit' value: {}."
+  + " Please provide value greater than 0. Using default: {}", 
_rpsLimit, DEFAULT_RPS_LIMIT);
+  _rpsLimit = Integer.parseInt(DEFAULT_RPS_LIMIT);
+}
+
 _shardIteratorType =
 ShardIteratorType.fromValue(props.getOrDefault(SHARD_ITERATOR_TYPE, 
DEFAULT_SHARD_ITERATOR_TYPE));
 _accessKey = props.get(ACCESS_KEY);
@@ -124,6 +140,10 @@ public class KinesisConfig {
 return _numMaxRecordsToFetch;
   }
 
+  public int getRpsLimit() {
+return _rpsLimit;
+  }
+
   public ShardIteratorType getShardIteratorType() {
 return _shardIteratorType;
   }
diff --git 
a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
 
b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
index 3fad3a39a6..3ded54ec8f 100644
--- 
a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
+++ 
b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/k

[pinot] branch master updated: Allow segment upload via Metadata in MergeRollup Minion task (#9825)

2022-12-06 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new fb48288ef6 Allow segment upload via Metadata in MergeRollup Minion 
task (#9825)
fb48288ef6 is described below

commit fb48288ef6a22c80a516087d95fea841f69c9af7
Author: Kartik Khare 
AuthorDate: Tue Dec 6 17:34:02 2022 +0530

Allow segment upload via Metadata in MergeRollup Minion task (#9825)

* Add code to push metadata in minion tasks

* Fix metadata push

* Add config for overwrite dir

* Fix download url to generate outputDir

* Fix linting

* Remove MinionPushUtils class

* Do not copy in case of tar push

* Remove duplicate code for filesystem

* Add metadata push to realtime to offline task

* Use data dir as output dir

* Add tests for realtime to offline metadata push

* use controller data dir for output

* Remove redundant code for URI push

* Enforce outputDir in metadata push

* Use SegmentConversionUtils method to push TAR files for backward 
compatibility

* Do not catch exception since it is already handled

Co-authored-by: Kartik Khare 
---
 .../tests/BaseClusterIntegrationTest.java  |   6 +-
 .../MergeRollupMinionClusterIntegrationTest.java   | 145 -
 ...fflineSegmentsMinionClusterIntegrationTest.java | 133 ++-
 .../BaseMultipleSegmentsConversionExecutor.java| 105 ++-
 .../pinot/plugin/minion/tasks/MinionTaskUtils.java | 118 +
 .../mergerollup/MergeRollupTaskGenerator.java  |  11 +-
 .../RealtimeToOfflineSegmentsTaskGenerator.java|   4 +-
 .../SegmentGenerationAndPushTaskExecutor.java  |   9 +-
 .../SegmentGenerationAndPushTaskGenerator.java |   3 +-
 .../SegmentGenerationAndPushTaskUtils.java |  73 ---
 .../segment/local/utils/SegmentPushUtils.java  |  74 ---
 11 files changed, 568 insertions(+), 113 deletions(-)

diff --git 
a/pinot-integration-test-base/src/test/java/org/apache/pinot/integration/tests/BaseClusterIntegrationTest.java
 
b/pinot-integration-test-base/src/test/java/org/apache/pinot/integration/tests/BaseClusterIntegrationTest.java
index b133a27551..11ffabe953 100644
--- 
a/pinot-integration-test-base/src/test/java/org/apache/pinot/integration/tests/BaseClusterIntegrationTest.java
+++ 
b/pinot-integration-test-base/src/test/java/org/apache/pinot/integration/tests/BaseClusterIntegrationTest.java
@@ -607,13 +607,17 @@ public abstract class BaseClusterIntegrationTest extends 
ClusterTest {
   }
 
   protected void waitForDocsLoaded(long timeoutMs, boolean raiseError) {
+waitForDocsLoaded(timeoutMs, raiseError, getTableName());
+  }
+
+  protected void waitForDocsLoaded(long timeoutMs, boolean raiseError, String 
tableName) {
 final long countStarResult = getCountStarResult();
 TestUtils.waitForCondition(new Function() {
   @Nullable
   @Override
   public Boolean apply(@Nullable Void aVoid) {
 try {
-  return getCurrentCountStarResult() == countStarResult;
+  return getCurrentCountStarResult(tableName) == countStarResult;
 } catch (Exception e) {
   return null;
 }
diff --git 
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/MergeRollupMinionClusterIntegrationTest.java
 
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/MergeRollupMinionClusterIntegrationTest.java
index 5846eba2d3..9602275068 100644
--- 
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/MergeRollupMinionClusterIntegrationTest.java
+++ 
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/MergeRollupMinionClusterIntegrationTest.java
@@ -50,6 +50,7 @@ import org.apache.pinot.spi.config.table.TableConfig;
 import org.apache.pinot.spi.config.table.TableTaskConfig;
 import org.apache.pinot.spi.config.table.TableType;
 import org.apache.pinot.spi.data.Schema;
+import org.apache.pinot.spi.ingestion.batch.BatchConfigProperties;
 import org.apache.pinot.spi.utils.builder.TableConfigBuilder;
 import org.apache.pinot.spi.utils.builder.TableNameBuilder;
 import org.apache.pinot.util.TestUtils;
@@ -70,6 +71,7 @@ public class MergeRollupMinionClusterIntegrationTest extends 
BaseClusterIntegrat
   private static final String SINGLE_LEVEL_CONCAT_TEST_TABLE = "myTable1";
   private static final String SINGLE_LEVEL_ROLLUP_TEST_TABLE = "myTable2";
   private static final String MULTI_LEVEL_CONCAT_TEST_TABLE = "myTable3";
+  private static final String SINGLE_LEVEL_CONCAT_METADATA_TEST_TABLE = 
"myTable4";
   private static final long TIMEOUT_IN_MS = 10_000L;
 
   protected PinotHelixTaskResourceManager _helixTaskRes

[pinot] branch master updated: Add memory optimized dimension table (#9802)

2022-12-02 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new 42354206b7 Add memory optimized dimension table (#9802)
42354206b7 is described below

commit 42354206b7cc79aa02ede20e73f6b0f36ee1d9d9
Author: Kartik Khare 
AuthorDate: Fri Dec 2 17:12:25 2022 +0530

Add memory optimized dimension table (#9802)

* Add mem optimized dim table

* rename config and remove template variable

* Add dimensionTable config to table config builder and serializers

* fix linting

* WIP: close segments after memtable is done

* Do not close segments when data is not preLoaded

* Fix segment close logic

* closing segment data manager inside dimension table

* Fix linting

Co-authored-by: Kartik Khare 
---
 .../common/utils/config/TableConfigUtils.java  | 13 ++-
 .../core/data/manager/offline/DimensionTable.java  | 36 ++---
 .../manager/offline/DimensionTableDataManager.java | 85 ++--
 ...ionTable.java => FastLookupDimensionTable.java} | 30 ---
 .../data/manager/offline/LookupRecordLocation.java | 46 +++
 .../offline/MemoryOptimizedDimensionTable.java | 93 ++
 .../offline/DimensionTableDataManagerTest.java | 57 +
 .../spi/config/table/DimensionTableConfig.java | 37 +
 .../apache/pinot/spi/config/table/TableConfig.java | 15 
 .../spi/utils/builder/TableConfigBuilder.java  | 11 ++-
 10 files changed, 371 insertions(+), 52 deletions(-)

diff --git 
a/pinot-common/src/main/java/org/apache/pinot/common/utils/config/TableConfigUtils.java
 
b/pinot-common/src/main/java/org/apache/pinot/common/utils/config/TableConfigUtils.java
index 9735a0f632..8abb0ea964 100644
--- 
a/pinot-common/src/main/java/org/apache/pinot/common/utils/config/TableConfigUtils.java
+++ 
b/pinot-common/src/main/java/org/apache/pinot/common/utils/config/TableConfigUtils.java
@@ -29,6 +29,7 @@ import java.util.Map;
 import org.apache.commons.collections.MapUtils;
 import org.apache.helix.zookeeper.datamodel.ZNRecord;
 import org.apache.pinot.spi.config.table.DedupConfig;
+import org.apache.pinot.spi.config.table.DimensionTableConfig;
 import org.apache.pinot.spi.config.table.FieldConfig;
 import org.apache.pinot.spi.config.table.IndexingConfig;
 import org.apache.pinot.spi.config.table.QueryConfig;
@@ -139,6 +140,12 @@ public class TableConfigUtils {
   dedupConfig = JsonUtils.stringToObject(dedupConfigString, 
DedupConfig.class);
 }
 
+DimensionTableConfig dimensionTableConfig = null;
+String dimensionTableConfigString = 
simpleFields.get(TableConfig.DIMENSION_TABLE_CONFIG_KEY);
+if (dimensionTableConfigString != null) {
+  dimensionTableConfig = 
JsonUtils.stringToObject(dimensionTableConfigString, 
DimensionTableConfig.class);
+}
+
 IngestionConfig ingestionConfig = null;
 String ingestionConfigString = 
simpleFields.get(TableConfig.INGESTION_CONFIG_KEY);
 if (ingestionConfigString != null) {
@@ -175,7 +182,7 @@ public class TableConfigUtils {
 
 return new TableConfig(tableName, tableType, validationConfig, 
tenantConfig, indexingConfig, customConfig,
 quotaConfig, taskConfig, routingConfig, queryConfig, 
instanceAssignmentConfigMap,
-fieldConfigList, upsertConfig, dedupConfig, ingestionConfig, 
tierConfigList, isDimTable,
+fieldConfigList, upsertConfig, dedupConfig, dimensionTableConfig, 
ingestionConfig, tierConfigList, isDimTable,
 tunerConfigList, instancePartitionsMap, segmentAssignmentConfigMap);
   }
 
@@ -227,6 +234,10 @@ public class TableConfigUtils {
 if (dedupConfig != null) {
   simpleFields.put(TableConfig.DEDUP_CONFIG_KEY, 
JsonUtils.objectToString(dedupConfig));
 }
+DimensionTableConfig dimensionTableConfig = 
tableConfig.getDimensionTableConfig();
+if (dimensionTableConfig != null) {
+  simpleFields.put(TableConfig.DIMENSION_TABLE_CONFIG_KEY, 
JsonUtils.objectToString(dimensionTableConfig));
+}
 IngestionConfig ingestionConfig = tableConfig.getIngestionConfig();
 if (ingestionConfig != null) {
   simpleFields.put(TableConfig.INGESTION_CONFIG_KEY, 
JsonUtils.objectToString(ingestionConfig));
diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/offline/DimensionTable.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/offline/DimensionTable.java
index 6485f4456c..b98d1d51e0 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/offline/DimensionTable.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/offline/DimensionTable.java
@@ -18,44 +18,20 @@
  */
 package org.apache.pinot.core.data.manager.offline;
 
-import java.util.HashMap;
+import java.io.Closeable;
 import java.util.L

[pinot] branch master updated (d62a867d46 -> 214095425e)

2022-11-23 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


from d62a867d46 Add max merger and min mergers for partial upsert (#9665)
 add 214095425e Do not create dictionary for high-cardinality columns 
(#9527)

No new revisions were added by this update.

Summary of changes:
 .../creator/impl/SegmentColumnarIndexCreator.java  | 50 +++---
 .../segment/creator/DictionaryOptimiserTest.java   | 35 +--
 .../spi/creator/SegmentGeneratorConfig.java| 11 +
 .../pinot/spi/config/table/IndexingConfig.java | 19 +++-
 4 files changed, 94 insertions(+), 21 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org



[pinot] branch master updated: Add null value index for default column (#9777)

2022-11-17 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new 16704a3cb1 Add null value index for default column (#9777)
16704a3cb1 is described below

commit 16704a3cb1fc04958483fb565cef44f69e51cb17
Author: Kartik Khare 
AuthorDate: Thu Nov 17 21:01:10 2022 +0530

Add null value index for default column (#9777)

* Add null value index for default column

* Add check if the index doesn't already exist

* Add test for null value vector for default values

* Only create null index for default cols when nullHandlingEnabled: true

Co-authored-by: Kartik Khare 
---
 .../loader/defaultcolumn/BaseDefaultColumnHandler.java   | 16 
 .../loader/defaultcolumn/V3DefaultColumnHandler.java |  5 +
 .../segment/index/loader/SegmentPreProcessorTest.java| 11 ++-
 3 files changed, 31 insertions(+), 1 deletion(-)

diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/BaseDefaultColumnHandler.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/BaseDefaultColumnHandler.java
index ac3555d783..1c9646b97b 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/BaseDefaultColumnHandler.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/BaseDefaultColumnHandler.java
@@ -43,6 +43,7 @@ import 
org.apache.pinot.segment.local.segment.creator.impl.fwd.MultiValueUnsorte
 import 
org.apache.pinot.segment.local.segment.creator.impl.fwd.SingleValueSortedForwardIndexCreator;
 import 
org.apache.pinot.segment.local.segment.creator.impl.fwd.SingleValueUnsortedForwardIndexCreator;
 import 
org.apache.pinot.segment.local.segment.creator.impl.inv.BitSlicedRangeIndexCreator;
+import 
org.apache.pinot.segment.local.segment.creator.impl.nullvalue.NullValueVectorCreator;
 import 
org.apache.pinot.segment.local.segment.creator.impl.stats.BytesColumnPredIndexStatsCollector;
 import 
org.apache.pinot.segment.local.segment.creator.impl.stats.DoubleColumnPreIndexStatsCollector;
 import 
org.apache.pinot.segment.local.segment.creator.impl.stats.FloatColumnPreIndexStatsCollector;
@@ -552,6 +553,21 @@ public abstract class BaseDefaultColumnHandler implements 
DefaultColumnHandler {
   }
 }
 
+if (_indexLoadingConfig.getTableConfig() != null
+&& _indexLoadingConfig.getTableConfig().getIndexingConfig() != null
+&& 
_indexLoadingConfig.getTableConfig().getIndexingConfig().isNullHandlingEnabled())
 {
+  if (!_segmentWriter.hasIndexFor(column, 
ColumnIndexType.NULLVALUE_VECTOR)) {
+try (NullValueVectorCreator nullValueVectorCreator =
+new NullValueVectorCreator(_indexDir, fieldSpec.getName())) {
+  for (int docId = 0; docId < totalDocs; docId++) {
+nullValueVectorCreator.setNull(docId);
+  }
+
+  nullValueVectorCreator.seal();
+}
+  }
+}
+
 // Add the column metadata information to the metadata properties.
 SegmentColumnarIndexCreator.addColumnMetadataInfo(_segmentProperties, 
column, columnIndexCreationInfo, totalDocs,
 fieldSpec, true/*hasDictionary*/, dictionaryElementSize);
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/V3DefaultColumnHandler.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/V3DefaultColumnHandler.java
index d444e65bc3..89a92b337d 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/V3DefaultColumnHandler.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/V3DefaultColumnHandler.java
@@ -74,6 +74,11 @@ public class V3DefaultColumnHandler extends 
BaseDefaultColumnHandler {
 LoaderUtils.writeIndexToV3Format(_segmentWriter, column, forwardIndexFile, 
ColumnIndexType.FORWARD_INDEX);
 File dictionaryFile = new File(_indexDir, column + 
V1Constants.Dict.FILE_EXTENSION);
 LoaderUtils.writeIndexToV3Format(_segmentWriter, column, dictionaryFile, 
ColumnIndexType.DICTIONARY);
+
+File nullValueVectorFile = new File(_indexDir, column + 
V1Constants.Indexes.NULLVALUE_VECTOR_FILE_EXTENSION);
+if (nullValueVectorFile.exists()) {
+  LoaderUtils.writeIndexToV3Format(_segmentWriter, column, 
nullValueVectorFile, ColumnIndexType.NULLVALUE_VECTOR);
+}
 return true;
   }
 }
diff --git 
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java
 
b/pinot-segment-local/src/test/java/org/ap

[pinot] branch master updated: Close Filesystem on exit with Minion Tasks (#9681)

2022-11-07 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new 58c1dc9b7d Close Filesystem on exit with Minion Tasks (#9681)
58c1dc9b7d is described below

commit 58c1dc9b7d8e28ac7207fa7565b1ed76f7a0c2ad
Author: Kartik Khare 
AuthorDate: Tue Nov 8 11:59:33 2022 +0530

Close Filesystem on exit with Minion Tasks (#9681)

* Close S3Client upon exit

* Close FS on exit

* Remove unrelated comments

* Allow PinotFSFactory to return a new pinotFS instance

* Use NoClosePinotFS delegate class in factory

* Reduce scope of NoClosePinotFS

* Fix tests

Co-authored-by: Kartik Khare 
---
 .../pinot/plugin/filesystem/HadoopPinotFS.java |   7 +
 .../apache/pinot/plugin/filesystem/S3PinotFS.java  |   1 +
 .../SegmentGenerationAndPushTaskExecutor.java  | 183 +++--
 .../SegmentGenerationAndPushTaskGenerator.java |  91 +-
 .../SegmentGenerationAndPushTaskUtils.java |  10 +-
 .../pinot/spi/filesystem/NoClosePinotFS.java   | 136 +++
 .../pinot/spi/filesystem/PinotFSFactory.java   |   6 +-
 .../pinot/spi/filesystem/PinotFSFactoryTest.java   |  18 +-
 8 files changed, 300 insertions(+), 152 deletions(-)

diff --git 
a/pinot-plugins/pinot-file-system/pinot-hdfs/src/main/java/org/apache/pinot/plugin/filesystem/HadoopPinotFS.java
 
b/pinot-plugins/pinot-file-system/pinot-hdfs/src/main/java/org/apache/pinot/plugin/filesystem/HadoopPinotFS.java
index f65ec1945f..c9858077fa 100644
--- 
a/pinot-plugins/pinot-file-system/pinot-hdfs/src/main/java/org/apache/pinot/plugin/filesystem/HadoopPinotFS.java
+++ 
b/pinot-plugins/pinot-file-system/pinot-hdfs/src/main/java/org/apache/pinot/plugin/filesystem/HadoopPinotFS.java
@@ -298,4 +298,11 @@ public class HadoopPinotFS extends BasePinotFS {
 }
 return hadoopConf;
   }
+
+  @Override
+  public void close()
+  throws IOException {
+_hadoopFS.close();
+super.close();
+  }
 }
diff --git 
a/pinot-plugins/pinot-file-system/pinot-s3/src/main/java/org/apache/pinot/plugin/filesystem/S3PinotFS.java
 
b/pinot-plugins/pinot-file-system/pinot-s3/src/main/java/org/apache/pinot/plugin/filesystem/S3PinotFS.java
index e2e366dfe3..511f0c50b1 100644
--- 
a/pinot-plugins/pinot-file-system/pinot-s3/src/main/java/org/apache/pinot/plugin/filesystem/S3PinotFS.java
+++ 
b/pinot-plugins/pinot-file-system/pinot-s3/src/main/java/org/apache/pinot/plugin/filesystem/S3PinotFS.java
@@ -634,6 +634,7 @@ public class S3PinotFS extends BasePinotFS {
   @Override
   public void close()
   throws IOException {
+_s3Client.close();
 super.close();
   }
 }
diff --git 
a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/segmentgenerationandpush/SegmentGenerationAndPushTaskExecutor.java
 
b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/segmentgenerationandpush/SegmentGenerationAndPushTaskExecutor.java
index a17bbc77f4..a7d5127c09 100644
--- 
a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/segmentgenerationandpush/SegmentGenerationAndPushTaskExecutor.java
+++ 
b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/segmentgenerationandpush/SegmentGenerationAndPushTaskExecutor.java
@@ -176,39 +176,39 @@ public class SegmentGenerationAndPushTaskExecutor extends 
BaseTaskExecutor {
 if (taskConfigs.containsKey(BatchConfigProperties.OUTPUT_SEGMENT_DIR_URI)) 
{
   outputSegmentDirURI = 
URI.create(taskConfigs.get(BatchConfigProperties.OUTPUT_SEGMENT_DIR_URI));
 }
-PinotFS outputFileFS = 
SegmentGenerationAndPushTaskUtils.getOutputPinotFS(taskConfigs, 
outputSegmentDirURI);
-switch 
(BatchConfigProperties.SegmentPushType.valueOf(pushMode.toUpperCase())) {
-  case TAR:
-try {
-  SegmentPushUtils.pushSegments(spec, 
SegmentGenerationAndPushTaskUtils.getLocalPinotFs(),
-  Arrays.asList(outputSegmentTarURI.toString()));
-} catch (RetriableOperationException | AttemptsExceededException e) {
-  throw new RuntimeException(e);
-}
-break;
-  case URI:
-try {
-  List segmentUris = new ArrayList<>();
-  URI updatedURI = SegmentPushUtils
-  .generateSegmentTarURI(outputSegmentDirURI, outputSegmentTarURI, 
pushJobSpec.getSegmentUriPrefix(),
-  pushJobSpec.getSegmentUriSuffix());
-  segmentUris.add(updatedURI.toString());
-  SegmentPushUtils.sendSegmentUris(spec, segmentUris);
-} catch (RetriableOperationException | AttemptsExceededException e) {
-  throw new RuntimeExcep

[pinot] branch master updated (cea282c368 -> 9d16896483)

2022-10-13 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


from cea282c368 [Flaky test] fix PurgeMinionClusterIntegrationTest (#9583)
 add 9d16896483 Enhance SegmentNameGenerator type inference if not 
explicitly set in config (#9550)

No new revisions were added by this update.

Summary of changes:
 .../framework/SegmentProcessorFramework.java   |  2 +-
 .../batch/common/SegmentGenerationTaskRunner.java  | 27 +---
 .../standalone/SegmentGenerationJobRunnerTest.java |  3 +-
 .../spi/creator/SegmentGeneratorConfig.java| 37 +---
 .../spi/creator/SegmentGeneratorConfigTest.java| 51 ++
 5 files changed, 96 insertions(+), 24 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org



[pinot] branch master updated: fix column name for record metadata (#9552)

2022-10-10 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new 7681dff274 fix column name for record metadata (#9552)
7681dff274 is described below

commit 7681dff274abf8405a323859e20e06f473c88ec0
Author: Navina Ramesh 
AuthorDate: Mon Oct 10 13:22:55 2022 +0530

fix column name for record metadata (#9552)
---
 .../resources/examples/stream/meetupRsvp/meetupRsvp_schema.json | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git 
a/pinot-tools/src/main/resources/examples/stream/meetupRsvp/meetupRsvp_schema.json
 
b/pinot-tools/src/main/resources/examples/stream/meetupRsvp/meetupRsvp_schema.json
index 93deb2d137..0c0a087052 100644
--- 
a/pinot-tools/src/main/resources/examples/stream/meetupRsvp/meetupRsvp_schema.json
+++ 
b/pinot-tools/src/main/resources/examples/stream/meetupRsvp/meetupRsvp_schema.json
@@ -12,7 +12,11 @@
 },
 {
   "dataType": "STRING",
-  "name": "header$producerTimestamp"
+  "name": "__metadata$offset"
+},
+{
+  "dataType": "STRING",
+  "name": "__metadata$recordTimestamp"
 },
 {
   "dataType": "STRING",


-
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org



[pinot] branch master updated: [Upsert] Skip removing upsert metadata when shutting down the server (#9551)

2022-10-07 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new 8cdae92502 [Upsert] Skip removing upsert metadata when shutting down 
the server (#9551)
8cdae92502 is described below

commit 8cdae925020dc8ae32777d3e6205340d22bac7fd
Author: Xiaotian (Jackie) Jiang <1751+jackie-ji...@users.noreply.github.com>
AuthorDate: Fri Oct 7 03:12:53 2022 -0700

[Upsert] Skip removing upsert metadata when shutting down the server (#9551)
---
 .../segment/local/upsert/BasePartitionUpsertMetadataManager.java  | 8 
 1 file changed, 8 insertions(+)

diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/BasePartitionUpsertMetadataManager.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/BasePartitionUpsertMetadataManager.java
index 7bcb33cf19..1eb7684c30 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/BasePartitionUpsertMetadataManager.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/BasePartitionUpsertMetadataManager.java
@@ -61,6 +61,8 @@ public abstract class BasePartitionUpsertMetadataManager 
implements PartitionUps
   @VisibleForTesting
   public final Set _replacedSegments = 
ConcurrentHashMap.newKeySet();
 
+  protected volatile boolean _closed = false;
+
   protected long _lastOutOfOrderEventReportTimeNs = Long.MIN_VALUE;
   protected int _numOutOfOrderEvents = 0;
 
@@ -245,6 +247,11 @@ public abstract class BasePartitionUpsertMetadataManager 
implements PartitionUps
 ((ImmutableSegmentImpl) 
segment).persistValidDocIdsSnapshot(validDocIds);
   }
 
+  if (_closed) {
+_logger.info("Skip removing segment: {} because metadata manager is 
already closed", segment);
+return;
+  }
+
   if (validDocIds == null || validDocIds.isEmpty()) {
 _logger.info("Skip removing segment without valid docs: {}", 
segmentName);
 return;
@@ -281,5 +288,6 @@ public abstract class BasePartitionUpsertMetadataManager 
implements PartitionUps
   public void close()
   throws IOException {
 _logger.info("Closing the metadata manager, current primary key count: 
{}", getNumPrimaryKeys());
+_closed = true;
   }
 }


-
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org



[pinot] branch master updated: Improve primary key serialization performance (#9538)

2022-10-07 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new 75892a78be Improve primary key serialization performance (#9538)
75892a78be is described below

commit 75892a78be165a656c0a13b460cfb9e8aacc6c1a
Author: Kartik Khare 
AuthorDate: Fri Oct 7 14:29:47 2022 +0530

Improve primary key serialization performance (#9538)

* Improve primary key serialization performance

* Remove unused import

* Add datasize to serialized value

* Add test for primary key serialization and handle nulls

* fix test

* Add a seperate serializer for single-valued arrays

Co-authored-by: Kartik Khare 
---
 ...rrentMapPartitionUpsertMetadataManagerTest.java |  8 +-
 .../apache/pinot/spi/data/readers/PrimaryKey.java  | 86 +-
 .../pinot/spi/data/readers/PrimaryKeyTest.java | 49 +++-
 3 files changed, 133 insertions(+), 10 deletions(-)

diff --git 
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/upsert/ConcurrentMapPartitionUpsertMetadataManagerTest.java
 
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/upsert/ConcurrentMapPartitionUpsertMetadataManagerTest.java
index 3df1f45caa..f005038608 100644
--- 
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/upsert/ConcurrentMapPartitionUpsertMetadataManagerTest.java
+++ 
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/upsert/ConcurrentMapPartitionUpsertMetadataManagerTest.java
@@ -335,15 +335,15 @@ public class 
ConcurrentMapPartitionUpsertMetadataManagerTest {
   public void testHashPrimaryKey() {
 PrimaryKey pk = new PrimaryKey(new Object[]{"uuid-1", "uuid-2", "uuid-3"});
 assertEquals(BytesUtils.toHexString(((ByteArray) 
HashUtils.hashPrimaryKey(pk, HashFunction.MD5)).getBytes()),
-"58de44997505014e02982846a4d1cbbd");
+"6ca926be8c2d1d980acf48ba48418e24");
 assertEquals(BytesUtils.toHexString(((ByteArray) 
HashUtils.hashPrimaryKey(pk, HashFunction.MURMUR3)).getBytes()),
-"7e6b4a98296292a4012225fff037fa8c");
+"e4540494e43b27e312d01f33208c6a4e");
 // reorder
 pk = new PrimaryKey(new Object[]{"uuid-3", "uuid-2", "uuid-1"});
 assertEquals(BytesUtils.toHexString(((ByteArray) 
HashUtils.hashPrimaryKey(pk, HashFunction.MD5)).getBytes()),
-"d2df12c6dea7b83f965613614eee58e2");
+"fc2159b78d07f803fdfb0b727315a445");
 assertEquals(BytesUtils.toHexString(((ByteArray) 
HashUtils.hashPrimaryKey(pk, HashFunction.MURMUR3)).getBytes()),
-"8d68b314cc0c8de4dbd55f4dad3c3e66");
+"37fab5ef0ea39711feabcdc623cb8a4e");
   }
 
   /**
diff --git 
a/pinot-spi/src/main/java/org/apache/pinot/spi/data/readers/PrimaryKey.java 
b/pinot-spi/src/main/java/org/apache/pinot/spi/data/readers/PrimaryKey.java
index b663cf6b72..27475a3e29 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/data/readers/PrimaryKey.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/data/readers/PrimaryKey.java
@@ -18,8 +18,12 @@
  */
 package org.apache.pinot.spi.data.readers;
 
+import java.math.BigDecimal;
+import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
-import org.apache.commons.lang3.SerializationUtils;
+import org.apache.pinot.spi.utils.BigDecimalUtils;
+import org.apache.pinot.spi.utils.ByteArray;
 
 
 /**
@@ -37,7 +41,85 @@ public class PrimaryKey {
   }
 
   public byte[] asBytes() {
-return SerializationUtils.serialize(_values);
+if (_values.length == 1) {
+  return asBytesSingleVal(_values[0]);
+}
+
+int sizeInBytes = 0;
+byte[][] cache = new byte[_values.length][];
+for (int i = 0; i < _values.length; i++) {
+  Object value = _values[i];
+
+  if (value instanceof Integer) {
+sizeInBytes += Integer.BYTES;
+  } else if (value instanceof Long) {
+sizeInBytes += Long.BYTES;
+  } else if (value instanceof String) {
+cache[i] = ((String) value).getBytes(StandardCharsets.UTF_8);
+sizeInBytes += cache[i].length + Integer.BYTES;
+  } else if (value instanceof ByteArray) {
+cache[i] = ((ByteArray) value).getBytes();
+sizeInBytes += cache[i].length + Integer.BYTES;
+  } else if (value instanceof Float) {
+sizeInBytes += Float.BYTES;
+  } else if (value instanceof Double) {
+sizeInBytes += Double.BYTES;
+  } else if (value instanceof BigDecimal) {
+cache[i] = BigDecimalUtils.serialize((BigDecimal) value);
+sizeInBytes += cache[i].length + Integer.BYTES;
+  } else {
+throw new IllegalStateException(
+String.format("Unsupported value

[pinot] branch master updated: Add a new API to fix segment date time in metadata (#9413)

2022-09-29 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new d2619c1613 Add a new API to fix segment date time in metadata (#9413)
d2619c1613 is described below

commit d2619c1613e143ba4ab81aecb2f2fef3f5dd6539
Author: Kartik Khare 
AuthorDate: Thu Sep 29 17:25:53 2022 +0530

Add a new API to fix segment date time in metadata (#9413)

* Add API to handle change in timestamp format

* Fix linting

* Add support for storing raw segment start and end time in metadata

* Do not store start end time in segment metadata seperately

* remove refresh flag

* Store new start/end time with proper time unit

* Move the API to segment resource

* Fix test failure

* Add test

* Cleanup: Remove duplicate methods

* Store start/end time in milliseconds in zookeeper

* Refactor: change method names and reduce scope of exceptions

* Remove redundant timeunit conversion

* Throw user errors and check for time column

* Fix Segment tests

Co-authored-by: Kartik Khare 
---
 .../common/metadata/segment/SegmentZKMetadata.java | 16 +
 .../api/resources/PinotSegmentRestletResource.java | 68 ++
 .../helix/core/PinotHelixResourceManager.java  | 17 ++
 .../helix/core/util/ZKMetadataUtils.java   | 35 ++-
 .../helix/core/PinotHelixResourceManagerTest.java  | 46 +++
 .../controller/utils/SegmentMetadataMockUtils.java | 10 +++-
 .../apache/pinot/spi/utils/CommonConstants.java|  2 +
 7 files changed, 189 insertions(+), 5 deletions(-)

diff --git 
a/pinot-common/src/main/java/org/apache/pinot/common/metadata/segment/SegmentZKMetadata.java
 
b/pinot-common/src/main/java/org/apache/pinot/common/metadata/segment/SegmentZKMetadata.java
index ff9f5530ba..a891ed1829 100644
--- 
a/pinot-common/src/main/java/org/apache/pinot/common/metadata/segment/SegmentZKMetadata.java
+++ 
b/pinot-common/src/main/java/org/apache/pinot/common/metadata/segment/SegmentZKMetadata.java
@@ -94,6 +94,14 @@ public class SegmentZKMetadata implements ZKMetadata {
 return _endTimeMs;
   }
 
+  public String getRawStartTime() {
+return _simpleFields.get(Segment.RAW_START_TIME);
+  }
+
+  public String getRawEndTime() {
+return _simpleFields.get(Segment.RAW_END_TIME);
+  }
+
   public void setStartTime(long startTime) {
 setNonNegativeValue(Segment.START_TIME, startTime);
 _startTimeMsCached = false;
@@ -104,6 +112,14 @@ public class SegmentZKMetadata implements ZKMetadata {
 _endTimeMsCached = false;
   }
 
+  public void setRawStartTime(String startTime) {
+setValue(Segment.RAW_START_TIME, startTime);
+  }
+
+  public void setRawEndTime(String endTime) {
+setValue(Segment.RAW_END_TIME, endTime);
+  }
+
   public void setTimeUnit(TimeUnit timeUnit) {
 setValue(Segment.TIME_UNIT, timeUnit);
 _startTimeMsCached = false;
diff --git 
a/pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PinotSegmentRestletResource.java
 
b/pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PinotSegmentRestletResource.java
index 9fa65836da..5e94c29ac5 100644
--- 
a/pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PinotSegmentRestletResource.java
+++ 
b/pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PinotSegmentRestletResource.java
@@ -59,6 +59,7 @@ import javax.ws.rs.core.MediaType;
 import javax.ws.rs.core.Response;
 import javax.ws.rs.core.Response.Status;
 import org.apache.commons.httpclient.HttpConnectionManager;
+import org.apache.commons.lang3.StringUtils;
 import org.apache.commons.lang3.tuple.Pair;
 import org.apache.helix.store.zk.ZkHelixPropertyStore;
 import org.apache.helix.zookeeper.datamodel.ZNRecord;
@@ -79,7 +80,10 @@ import 
org.apache.pinot.controller.util.CompletionServiceHelper;
 import org.apache.pinot.controller.util.ConsumingSegmentInfoReader;
 import org.apache.pinot.controller.util.TableMetadataReader;
 import org.apache.pinot.controller.util.TableTierReader;
+import org.apache.pinot.spi.config.table.TableConfig;
 import org.apache.pinot.spi.config.table.TableType;
+import org.apache.pinot.spi.data.DateTimeFieldSpec;
+import org.apache.pinot.spi.data.Schema;
 import org.apache.pinot.spi.utils.CommonConstants;
 import org.apache.pinot.spi.utils.JsonUtils;
 import org.apache.pinot.spi.utils.builder.TableNameBuilder;
@@ -1035,4 +1039,68 @@ public class PinotSegmentRestletResource {
   Response.Status.INTERNAL_SERVER_ERROR, e);
 }
   }
+
+  @POST
+  @Path("/segments/{tableNameWithType}/updateZKTimeInterval")
+  @Authenticate(AccessType.UPDATE)
+  @Produces(MediaType.APPLICATION_JSON)
+  @ApiOperation(value = "Update the start and 

[pinot] branch master updated: fix regression due to refactoring (#9481)

2022-09-29 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new ddc5d43119 fix regression due to refactoring (#9481)
ddc5d43119 is described below

commit ddc5d431197a5453ecdd1928cbeb4419b9f8cf3b
Author: Navina Ramesh 
AuthorDate: Thu Sep 29 15:05:20 2022 +0530

fix regression due to refactoring (#9481)

* fix regression due to refactoring

* lint fix
---
 .../pinot/core/data/manager/realtime/LLRealtimeSegmentDataManager.java | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/LLRealtimeSegmentDataManager.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/LLRealtimeSegmentDataManager.java
index f20a9be217..6f59c79709 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/LLRealtimeSegmentDataManager.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/LLRealtimeSegmentDataManager.java
@@ -32,7 +32,6 @@ import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.Semaphore;
 import java.util.concurrent.TimeUnit;
-import java.util.concurrent.TimeoutException;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicLong;
 import javax.annotation.Nullable;
@@ -1433,7 +1432,7 @@ public class LLRealtimeSegmentDataManager extends 
RealtimeSegmentDataManager {
 try {
   return 
_partitionMetadataProvider.fetchStreamPartitionOffset(OffsetCriteria.LARGEST_OFFSET_CRITERIA,
   maxWaitTimeMs);
-} catch (TimeoutException e) {
+} catch (Exception e) {
   _segmentLogger.warn(
   "Cannot fetch latest stream offset for clientId {} and 
partitionGroupId {} with maxWaitTime {}", _clientId,
   _partitionGroupId, maxWaitTimeMs);


-
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org



[pinot] branch master updated (fea74a4f15 -> 165e26f5ef)

2022-09-22 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


from fea74a4f15 [multistage][hotfix] fix filter operator type convert 
(#9450)
 add 165e26f5ef Handle Invalid timestamps (#9355)

No new revisions were added by this update.

Summary of changes:
 .../apache/pinot/core/util/SchemaUtilsTest.java| 27 +
 .../pinot/segment/local/utils/SchemaUtils.java | 18 +++
 .../apache/pinot/spi/data/DateTimeFieldSpec.java   | 35 +++---
 3 files changed, 76 insertions(+), 4 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org



[pinot] branch master updated (69722b0dbd -> bdf632ca44)

2022-09-22 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


from 69722b0dbd Add support for spaces in file paths (#9426)
 add bdf632ca44 Add support for gracefully handling the errors while 
transformations (#9377)

No new revisions were added by this update.

Summary of changes:
 .../apache/pinot/common/metrics/ServerMeter.java   |  1 +
 .../realtime/LLRealtimeSegmentDataManager.java |  6 
 .../recordtransformer/ComplexTypeTransformer.java  | 32 -
 .../recordtransformer/DataTypeTransformer.java |  1 +
 .../recordtransformer/ExpressionTransformer.java   | 18 +++-
 .../local/recordtransformer/FilterTransformer.java | 31 
 .../local/segment/creator/TransformPipeline.java   | 13 +
 .../impl/SegmentIndexCreationDriverImpl.java   | 33 ++
 .../ComplexTypeTransformerTest.java| 14 -
 .../ExpressionTransformerTest.java |  2 +-
 .../apache/pinot/spi/data/readers/GenericRow.java  |  6 
 11 files changed, 123 insertions(+), 34 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org



[pinot] branch master updated (7af0d20aaa -> 69722b0dbd)

2022-09-22 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


from 7af0d20aaa use MinionEventObserver to track finer grained task 
progress status on worker (#9432)
 add 69722b0dbd Add support for spaces in file paths (#9426)

No new revisions were added by this update.

Summary of changes:
 .../common/segment/generation/SegmentGenerationUtils.java | 11 +--
 .../common/segment/generation/SegmentGenerationUtilsTest.java |  7 +++
 2 files changed, 16 insertions(+), 2 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org



[pinot] branch master updated (9b3ac2a63e -> 3633495f5d)

2022-09-16 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


from 9b3ac2a63e Use ISODateTimeFormat as default for SIMPLE_DATE_FORMAT  
(#9378)
 add 3633495f5d Add missing properties in CSV plugin (#9399)

No new revisions were added by this update.

Summary of changes:
 .../plugin/inputformat/csv/CSVMessageDecoder.java  | 37 +++
 .../plugin/inputformat/csv/CSVRecordReader.java| 20 ++--
 .../inputformat/csv/CSVRecordReaderConfig.java | 55 ++
 3 files changed, 109 insertions(+), 3 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org



[pinot] branch master updated (bfa2a5a23c -> 9b3ac2a63e)

2022-09-16 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


from bfa2a5a23c [multistage] Initial commit to support h2 testing (#9406)
 add 9b3ac2a63e Use ISODateTimeFormat as default for SIMPLE_DATE_FORMAT  
(#9378)

No new revisions were added by this update.

Summary of changes:
 .../pinot/common/data/DateTimeFormatSpecTest.java  | 27 
 .../apache/pinot/common/data/FieldSpecTest.java|  7 ++-
 .../apache/pinot/core/util/SchemaUtilsTest.java|  2 +-
 .../pinot/spi/data/DateTimeFormatPatternSpec.java  | 50 +++---
 .../apache/pinot/spi/data/DateTimeFormatSpec.java  | 27 ++--
 .../pinot/spi/data/DateTimeFormatSpecTest.java |  1 -
 6 files changed, 83 insertions(+), 31 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org



[pinot] branch master updated: Improve segment name check in metadata push (#9359)

2022-09-10 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new ff98c5ad1b Improve segment name check in metadata push (#9359)
ff98c5ad1b is described below

commit ff98c5ad1b4a50032f53c2b9e6b634f26f0b4785
Author: Haitao Zhang 
AuthorDate: Sat Sep 10 03:29:45 2022 -0700

Improve segment name check in metadata push (#9359)

* Improve segment name check in metadata push

* address comments

* fix broken tests
---
 .../segment/local/utils/SegmentPushUtils.java  |  7 --
 .../creator/name/FixedSegmentNameGenerator.java|  4 +--
 .../name/NormalizedDateSegmentNameGenerator.java   |  8 +++---
 .../spi/creator/name/SegmentNameGenerator.java | 12 -
 .../spi/creator/name/SegmentNameUtils.java}| 29 --
 .../creator/name/SimpleSegmentNameGenerator.java   | 18 --
 .../name/FixedSegmentNameGeneratorTest.java|  2 +-
 .../name/SimpleSegmentNameGeneratorTest.java   |  2 +-
 8 files changed, 38 insertions(+), 44 deletions(-)

diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/SegmentPushUtils.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/SegmentPushUtils.java
index b756c7f760..cee66b2b9a 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/SegmentPushUtils.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/SegmentPushUtils.java
@@ -44,6 +44,7 @@ import org.apache.pinot.common.utils.SimpleHttpResponse;
 import org.apache.pinot.common.utils.TarGzCompressionUtils;
 import org.apache.pinot.common.utils.http.HttpClient;
 import org.apache.pinot.segment.spi.V1Constants;
+import org.apache.pinot.segment.spi.creator.name.SegmentNameUtils;
 import org.apache.pinot.spi.auth.AuthProvider;
 import org.apache.pinot.spi.config.table.TableType;
 import org.apache.pinot.spi.filesystem.PinotFS;
@@ -237,8 +238,10 @@ public class SegmentPushUtils implements Serializable {
 for (String segmentUriPath : segmentUriToTarPathMap.keySet()) {
   String tarFilePath = segmentUriToTarPathMap.get(segmentUriPath);
   String fileName = new File(tarFilePath).getName();
-  
Preconditions.checkArgument(fileName.endsWith(Constants.TAR_GZ_FILE_EXT));
-  String segmentName = fileName.substring(0, fileName.length() - 
Constants.TAR_GZ_FILE_EXT.length());
+  // segments stored in Pinot deep store do not have .tar.gz extension
+  String segmentName = fileName.endsWith(Constants.TAR_GZ_FILE_EXT)
+  ? fileName.substring(0, fileName.length() - 
Constants.TAR_GZ_FILE_EXT.length()) : fileName;
+  SegmentNameUtils.validatePartialOrFullSegmentName(segmentName);
   File segmentMetadataFile = generateSegmentMetadataFile(fileSystem, 
URI.create(tarFilePath));
   AuthProvider authProvider = 
AuthProviderUtils.makeAuthProvider(spec.getAuthToken());
   try {
diff --git 
a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/name/FixedSegmentNameGenerator.java
 
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/name/FixedSegmentNameGenerator.java
index cc4acb0c0b..cf9eaed03a 100644
--- 
a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/name/FixedSegmentNameGenerator.java
+++ 
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/name/FixedSegmentNameGenerator.java
@@ -31,9 +31,7 @@ public class FixedSegmentNameGenerator implements 
SegmentNameGenerator {
 
   public FixedSegmentNameGenerator(String segmentName) {
 Preconditions.checkArgument(segmentName != null, "Missing segmentName for 
FixedSegmentNameGenerator");
-Preconditions
-.checkArgument(isValidSegmentName(segmentName), "Invalid segmentName: 
%s for FixedSegmentNameGenerator",
-segmentName);
+SegmentNameUtils.validatePartialOrFullSegmentName(segmentName);
 _segmentName = segmentName;
   }
 
diff --git 
a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/name/NormalizedDateSegmentNameGenerator.java
 
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/name/NormalizedDateSegmentNameGenerator.java
index 4382413314..f313d65829 100644
--- 
a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/name/NormalizedDateSegmentNameGenerator.java
+++ 
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/name/NormalizedDateSegmentNameGenerator.java
@@ -66,14 +66,14 @@ public class NormalizedDateSegmentNameGenerator implements 
SegmentNameGenerator
 _segmentNamePrefix = segmentNamePrefix != null ? segmentNamePrefix.trim() 
: tableName;
 Preconditions
 .checkArgument(_segmentNamePrefix != null, "Missing segmentNamePrefix 
for Normalize

[pinot] branch master updated (15e939818c -> 06b76e6dd1)

2022-09-07 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


from 15e939818c Add support for Auth in controller requests in java query 
client (#9230)
 add 06b76e6dd1 Add Spark Job Launcher tool (#9288)

No new revisions were added by this update.

Summary of changes:
 pinot-tools/pom.xml|  20 ++
 .../pinot/tools/admin/PinotAdministrator.java  |   2 +
 .../LaunchSparkDataIngestionJobCommand.java| 369 +
 3 files changed, 391 insertions(+)
 create mode 100644 
pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/LaunchSparkDataIngestionJobCommand.java


-
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org



[pinot] branch master updated (77c1f697d3 -> 15e939818c)

2022-09-07 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


from 77c1f697d3 Update requirements-db.txt (#9341)
 add 15e939818c Add support for Auth in controller requests in java query 
client (#9230)

No new revisions were added by this update.

Summary of changes:
 pinot-clients/pinot-java-client/pom.xml|  5 ++
 .../java/org/apache/pinot/client/BrokerCache.java  | 53 -
 .../pinot/client/BrokerCacheUpdaterPeriodic.java   | 14 +++--
 .../org/apache/pinot/client/ConnectionFactory.java | 36 ---
 .../client/ControllerBasedBrokerSelector.java  | 25 +++-
 .../client/JsonAsyncHttpPinotClientTransport.java  | 19 +-
 .../JsonAsyncHttpPinotClientTransportFactory.java  | 13 
 .../apache/pinot/client/utils/ConnectionUtils.java | 69 ++
 .../src/main/resources/version.properties  |  1 +
 9 files changed, 202 insertions(+), 33 deletions(-)
 create mode 100644 
pinot-clients/pinot-java-client/src/main/java/org/apache/pinot/client/utils/ConnectionUtils.java


-
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org



[pinot] branch master updated: Allow ingestion of errored records with incorrect datatype (#9320)

2022-09-02 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new 6047b06ac6 Allow ingestion of errored records with incorrect datatype 
(#9320)
6047b06ac6 is described below

commit 6047b06ac6f62ab7349e854a94f50199512ad973
Author: Kartik Khare 
AuthorDate: Fri Sep 2 14:44:59 2022 +0530

Allow ingestion of errored records with incorrect datatype (#9320)

* Allow ingestion of errored records with incorrect datatype

* Add new config to SegmentGenerator as well

* Handle outOfRange timestamps and replace them with nulls

* Rename config and add a seperate config for timeValue check

* Add exceptions for incorrect time columns

Co-authored-by: Kartik Khare 
---
 .../recordtransformer/CompositeTransformer.java|  2 +-
 .../recordtransformer/DataTypeTransformer.java | 52 +-
 .../ExpressionTransformerTest.java |  2 +-
 .../recordtransformer/RecordTransformerTest.java   | 84 +-
 .../spi/creator/SegmentGeneratorConfig.java| 20 ++
 .../pinot/spi/config/table/IndexingConfig.java | 18 +
 .../spi/utils/builder/TableConfigBuilder.java  | 14 
 7 files changed, 187 insertions(+), 5 deletions(-)

diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/CompositeTransformer.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/CompositeTransformer.java
index e21340a0fb..563359aa09 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/CompositeTransformer.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/CompositeTransformer.java
@@ -61,7 +61,7 @@ public class CompositeTransformer implements 
RecordTransformer {
   public static CompositeTransformer getDefaultTransformer(TableConfig 
tableConfig, Schema schema) {
 return new CompositeTransformer(Arrays
 .asList(new ExpressionTransformer(tableConfig, schema), new 
FilterTransformer(tableConfig),
-new DataTypeTransformer(schema), new 
NullValueTransformer(tableConfig, schema),
+new DataTypeTransformer(tableConfig, schema), new 
NullValueTransformer(tableConfig, schema),
 new SanitizationTransformer(schema)));
   }
 
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/DataTypeTransformer.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/DataTypeTransformer.java
index de7a6795d7..14f68f1394 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/DataTypeTransformer.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/DataTypeTransformer.java
@@ -27,10 +27,17 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import javax.annotation.Nullable;
+import org.apache.commons.lang3.StringUtils;
 import org.apache.pinot.common.utils.PinotDataType;
+import org.apache.pinot.spi.config.table.TableConfig;
+import org.apache.pinot.spi.data.DateTimeFieldSpec;
+import org.apache.pinot.spi.data.DateTimeFormatSpec;
 import org.apache.pinot.spi.data.FieldSpec;
 import org.apache.pinot.spi.data.Schema;
 import org.apache.pinot.spi.data.readers.GenericRow;
+import org.apache.pinot.spi.utils.TimeUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 
 /**
@@ -40,14 +47,33 @@ import org.apache.pinot.spi.data.readers.GenericRow;
  */
 @SuppressWarnings("rawtypes")
 public class DataTypeTransformer implements RecordTransformer {
+  private static final Logger LOGGER = 
LoggerFactory.getLogger(DataTypeTransformer.class);
+
   private final Map _dataTypes = new HashMap<>();
+  private final boolean _continueOnError;
+  private final boolean _validateTimeValues;
+  private final String _timeColumnName;
+  private final DateTimeFormatSpec _timeFormatSpec;
 
-  public DataTypeTransformer(Schema schema) {
+  public DataTypeTransformer(TableConfig tableConfig, Schema schema) {
 for (FieldSpec fieldSpec : schema.getAllFieldSpecs()) {
   if (!fieldSpec.isVirtualColumn()) {
 _dataTypes.put(fieldSpec.getName(), 
PinotDataType.getPinotDataTypeForIngestion(fieldSpec));
   }
 }
+
+_continueOnError = tableConfig.getIndexingConfig().isContinueOnError();
+_validateTimeValues = 
tableConfig.getIndexingConfig().isValidateTimeValue();
+_timeColumnName = tableConfig.getValidationConfig().getTimeColumnName();
+
+DateTimeFormatSpec timeColumnSpec = null;
+if (StringUtils.isNotEmpty(_timeColumnName)) {
+  DateTimeFieldSpec dateTimeFieldSpec = 
schema.getSpecForTimeColumn(_timeColumnName);
+  Preconditions.checkState(d

[pinot] branch master updated (a5227873d6 -> 04c5a1af18)

2022-09-01 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


from a5227873d6 add cardinality function (#9300)
 add 04c5a1af18 Add support for custom null values in CSV record reader 
(#9318)

No new revisions were added by this update.

Summary of changes:
 .../plugin/inputformat/csv/CSVRecordReader.java|  5 +++
 .../inputformat/csv/CSVRecordReaderConfig.java |  9 +
 .../inputformat/csv/CSVRecordReaderTest.java   | 38 ++
 3 files changed, 52 insertions(+)


-
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org



[pinot] branch master updated: Add Support for Schema Registry in Protobuf Decoder (#9220)

2022-08-17 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new 42a2fc70ff Add Support for Schema Registry in Protobuf Decoder (#9220)
42a2fc70ff is described below

commit 42a2fc70ff37fddaaba300becd36b41f170e77bd
Author: Kartik Khare 
AuthorDate: Thu Aug 18 12:17:59 2022 +0530

Add Support for Schema Registry in Protobuf Decoder (#9220)

* working on schema registry. refactor pending

* Add tests for confluent protobuf and delete duplicate files

* move dependency versions to parent pom

* Fix protobuf decoder bug: Not honouring offset and length

Co-authored-by: Kartik Khare 
---
 .../v0_deprecated/pinot-ingestion-common/pom.xml   |   6 +
 .../pinot-confluent-avro/pom.xml   |   1 -
 .../pinot-input-format/pinot-protobuf/pom.xml  |  97 +-
 ...fluentSchemaRegistryProtoBufMessageDecoder.java | 143 +
 .../protobuf/ProtoBufMessageDecoder.java   |  11 +-
 .../protobuf/ProtoBufConfluentSchemaTest.java  | 130 +++
 .../schemaregistry/SchemaRegistryStarter.java  | 105 +++
 pom.xml|   1 +
 8 files changed, 486 insertions(+), 8 deletions(-)

diff --git 
a/pinot-plugins/pinot-batch-ingestion/v0_deprecated/pinot-ingestion-common/pom.xml
 
b/pinot-plugins/pinot-batch-ingestion/v0_deprecated/pinot-ingestion-common/pom.xml
index 8fb34e6fe3..4d0f323e2b 100644
--- 
a/pinot-plugins/pinot-batch-ingestion/v0_deprecated/pinot-ingestion-common/pom.xml
+++ 
b/pinot-plugins/pinot-batch-ingestion/v0_deprecated/pinot-ingestion-common/pom.xml
@@ -33,6 +33,12 @@
   pinot-ingestion-common
   Pinot Ingestion Common
   https://pinot.apache.org/
+  
+
+  confluent
+  https://packages.confluent.io/maven/
+
+  
   
 ${basedir}/../../../..
 package
diff --git a/pinot-plugins/pinot-input-format/pinot-confluent-avro/pom.xml 
b/pinot-plugins/pinot-input-format/pinot-confluent-avro/pom.xml
index 57d46b0d5b..e1e4f87b8a 100644
--- a/pinot-plugins/pinot-input-format/pinot-confluent-avro/pom.xml
+++ b/pinot-plugins/pinot-input-format/pinot-confluent-avro/pom.xml
@@ -35,7 +35,6 @@
   
 ${basedir}/../../..
 2.8.1
-5.3.1
 package
   
   
diff --git a/pinot-plugins/pinot-input-format/pinot-protobuf/pom.xml 
b/pinot-plugins/pinot-input-format/pinot-protobuf/pom.xml
index d3befbbc91..c1e25bb9b6 100644
--- a/pinot-plugins/pinot-input-format/pinot-protobuf/pom.xml
+++ b/pinot-plugins/pinot-input-format/pinot-protobuf/pom.xml
@@ -35,9 +35,16 @@
   https://pinot.apache.org/
   
 ${basedir}/../../..
-3.11.4
+2.8.1
+1.17.3
 package
   
+  
+
+  confluent
+  https://packages.confluent.io/maven/
+
+  
   
 
   commons-lang
@@ -46,7 +53,6 @@
 
   com.google.protobuf
   protobuf-java
-  ${proto.version}
 
 
   com.github.os72
@@ -59,6 +65,93 @@
 
   
 
+
+
+  org.apache.kafka
+  kafka-clients
+  ${kafka.lib.version}
+  
+
+  org.slf4j
+  slf4j-api
+
+
+  org.slf4j
+  slf4j-log4j12
+
+
+  net.sf.jopt-simple
+  jopt-simple
+
+
+  org.scala-lang
+  scala-library
+
+
+  org.apache.zookeeper
+  zookeeper
+
+  
+
+
+
+  io.confluent
+  kafka-schema-registry-client
+  ${confluent.version}
+  
+
+  org.apache.kafka
+  kafka-clients
+
+
+  org.slf4j
+  slf4j-api
+
+
+  org.slf4j
+  slf4j-log4j12
+
+
+  org.apache.zookeeper
+  zookeeper
+
+
+  io.swagger
+  swagger-annotations
+
+  
+
+
+  io.confluent
+  kafka-protobuf-serializer
+  ${confluent.version}
+  
+
+  org.jetbrains.kotlin
+  kotlin-stdlib-common
+
+
+  com.squareup.okio
+  okio
+
+
+  com.google.errorprone
+  error_prone_annotations
+
+  
+
+
+  org.testcontainers
+  testcontainers
+  ${testcontainers.version}
+  test
+
+
+  org.testcontainers
+  kafka
+  ${testcontainers.version}
+  test
+
   
   
 
diff --git 
a/pinot-plugins/pinot-input-format/pinot-protobuf/src/main/java/org/apache/pinot/plugin/inputformat/protobuf/KafkaConfluentSchemaRegistryProtoBufMessageDecoder.java
 
b/pinot-plugins/pinot-input-format/pinot-protobuf/src/main/java/org/apache/pinot/plugin/inputformat/protobuf/KafkaConfluentSchemaRegistryProtoBufMessageDecoder.java
new file mode 100644
index 00

[pinot] branch master updated: Add close method to upsert interfaces (#9212)

2022-08-17 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new 5c703695d2 Add close method to upsert interfaces (#9212)
5c703695d2 is described below

commit 5c703695d245278e4a47ab066c6d6477dfcf487c
Author: Kartik Khare 
AuthorDate: Wed Aug 17 15:43:38 2022 +0530

Add close method to upsert interfaces (#9212)

* Add close method to upsert interfaces

* Extend with Closeable

Co-authored-by: Kartik Khare 
---
 .../core/data/manager/realtime/RealtimeTableDataManager.java  | 7 +++
 .../local/upsert/ConcurrentMapPartitionUpsertMetadataManager.java | 6 ++
 .../local/upsert/ConcurrentMapTableUpsertMetadataManager.java | 8 
 .../segment/local/upsert/PartitionUpsertMetadataManager.java  | 3 ++-
 .../pinot/segment/local/upsert/TableUpsertMetadataManager.java| 3 ++-
 5 files changed, 25 insertions(+), 2 deletions(-)

diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeTableDataManager.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeTableDataManager.java
index af7f366ebd..f41f7edba5 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeTableDataManager.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeTableDataManager.java
@@ -199,6 +199,13 @@ public class RealtimeTableDataManager extends 
BaseTableDataManager {
   @Override
   protected void doShutdown() {
 _segmentAsyncExecutorService.shutdown();
+if (_tableUpsertMetadataManager != null) {
+  try {
+_tableUpsertMetadataManager.close();
+  } catch (IOException e) {
+_logger.warn("Cannot close upsert metadata manager properly for table: 
{}", _tableNameWithType, e);
+  }
+}
 for (SegmentDataManager segmentDataManager : 
_segmentDataManagerMap.values()) {
   segmentDataManager.destroy();
 }
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/ConcurrentMapPartitionUpsertMetadataManager.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/ConcurrentMapPartitionUpsertMetadataManager.java
index e6890c8a6c..1654eaca0d 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/ConcurrentMapPartitionUpsertMetadataManager.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/ConcurrentMapPartitionUpsertMetadataManager.java
@@ -428,6 +428,12 @@ public class ConcurrentMapPartitionUpsertMetadataManager 
implements PartitionUps
 }
   }
 
+  @Override
+  public void close() {
+_logger.info("Closing metadata manager for table {} and partition {}, 
current primary key count: {}",
+_tableNameWithType, _partitionId, 
_primaryKeyToRecordLocationMap.size());
+  }
+
   @VisibleForTesting
   static class RecordLocation {
 private final IndexSegment _segment;
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/ConcurrentMapTableUpsertMetadataManager.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/ConcurrentMapTableUpsertMetadataManager.java
index 67474e145d..5c5c357079 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/ConcurrentMapTableUpsertMetadataManager.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/ConcurrentMapTableUpsertMetadataManager.java
@@ -37,4 +37,12 @@ public class ConcurrentMapTableUpsertMetadataManager extends 
BaseTableUpsertMeta
 k -> new 
ConcurrentMapPartitionUpsertMetadataManager(_tableNameWithType, k, 
_primaryKeyColumns,
 _comparisonColumn, _hashFunction, _partialUpsertHandler, 
_serverMetrics));
   }
+
+  @Override
+  public void close() {
+for (ConcurrentMapPartitionUpsertMetadataManager 
partitionUpsertMetadataManager
+: _partitionMetadataManagerMap.values()) {
+  partitionUpsertMetadataManager.close();
+}
+  }
 }
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/PartitionUpsertMetadataManager.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/PartitionUpsertMetadataManager.java
index 2c5f68df45..ef5ec7c414 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/PartitionUpsertMetadataManager.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/PartitionUpsertMetadataManager.java
@@ -18,6 +18,7 @@
  */
 package org.apache.pinot.segment.local.upsert;
 
+import java.io.Closeable;
 import java.util.List;
 import javax.annotation.concurrent.ThreadSafe;
 import org.apache.pinot.segment.spi.ImmutableSegment;
@@ -51,7 +52,7 @@ import org.apache.pinot.spi.

[pinot] branch master updated: Make upsert metadata manager pluggable (#9186)

2022-08-12 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new 6bf696ec25 Make upsert metadata manager pluggable (#9186)
6bf696ec25 is described below

commit 6bf696ec25f2571c252c07099b81f9876f839b91
Author: Xiaotian (Jackie) Jiang <1751+jackie-ji...@users.noreply.github.com>
AuthorDate: Fri Aug 12 02:26:46 2022 -0700

Make upsert metadata manager pluggable (#9186)
---
 .../manager/realtime/RealtimeTableDataManager.java |  26 +-
 ...adataAndDictionaryAggregationPlanMakerTest.java |   4 +-
 .../indexsegment/mutable/MutableSegmentImpl.java   |   2 +-
 .../upsert/BaseTableUpsertMetadataManager.java |  79 
 ...ncurrentMapPartitionUpsertMetadataManager.java} | 124 ++
 ...> ConcurrentMapTableUpsertMetadataManager.java} |  36 +-
 .../upsert/PartitionUpsertMetadataManager.java | 430 +
 .../local/{utils => upsert}/RecordInfo.java|   5 +-
 .../local/upsert/TableUpsertMetadataManager.java   |  38 +-
 .../upsert/TableUpsertMetadataManagerFactory.java  |  65 
 .../pinot/segment/local/upsert/UpsertUtils.java|  76 
 .../dedup/PartitionDedupMetadataManagerTest.java   |   2 +-
 .../MutableSegmentImplUpsertComparisonColTest.java |   8 +-
 .../mutable/MutableSegmentImplUpsertTest.java  |   7 +-
 ...rentMapPartitionUpsertMetadataManagerTest.java} |  16 +-
 .../pinot/spi/config/table/UpsertConfig.java   |  24 ++
 16 files changed, 338 insertions(+), 604 deletions(-)

diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeTableDataManager.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeTableDataManager.java
index f51da5195b..af7f366ebd 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeTableDataManager.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeTableDataManager.java
@@ -57,9 +57,9 @@ import 
org.apache.pinot.segment.local.realtime.impl.RealtimeSegmentStatsHistory;
 import org.apache.pinot.segment.local.segment.index.loader.IndexLoadingConfig;
 import org.apache.pinot.segment.local.segment.index.loader.LoaderUtils;
 import 
org.apache.pinot.segment.local.segment.virtualcolumn.VirtualColumnProviderFactory;
-import org.apache.pinot.segment.local.upsert.PartialUpsertHandler;
 import org.apache.pinot.segment.local.upsert.PartitionUpsertMetadataManager;
 import org.apache.pinot.segment.local.upsert.TableUpsertMetadataManager;
+import org.apache.pinot.segment.local.upsert.TableUpsertMetadataManagerFactory;
 import org.apache.pinot.segment.local.utils.SchemaUtils;
 import org.apache.pinot.segment.local.utils.tablestate.TableStateUtils;
 import org.apache.pinot.segment.spi.ImmutableSegment;
@@ -188,26 +188,7 @@ public class RealtimeTableDataManager extends 
BaseTableDataManager {
   _tableUpsertMetadataManager);
   Schema schema = ZKMetadataProvider.getTableSchema(_propertyStore, 
_tableNameWithType);
   Preconditions.checkState(schema != null, "Failed to find schema for 
table: %s", _tableNameWithType);
-
-  List primaryKeyColumns = schema.getPrimaryKeyColumns();
-  Preconditions.checkState(!CollectionUtils.isEmpty(primaryKeyColumns),
-  "Primary key columns must be configured for upsert");
-
-  String comparisonColumn = upsertConfig.getComparisonColumn();
-  if (comparisonColumn == null) {
-comparisonColumn = 
tableConfig.getValidationConfig().getTimeColumnName();
-  }
-
-  PartialUpsertHandler partialUpsertHandler = null;
-  if (upsertConfig.getMode() == UpsertConfig.Mode.PARTIAL) {
-assert upsertConfig.getPartialUpsertStrategies() != null;
-partialUpsertHandler = new PartialUpsertHandler(schema, 
upsertConfig.getPartialUpsertStrategies(),
-upsertConfig.getDefaultPartialUpsertStrategy(), comparisonColumn);
-  }
-
-  _tableUpsertMetadataManager =
-  new TableUpsertMetadataManager(_tableNameWithType, 
primaryKeyColumns, comparisonColumn,
-  upsertConfig.getHashFunction(), partialUpsertHandler, 
_serverMetrics);
+  _tableUpsertMetadataManager = 
TableUpsertMetadataManagerFactory.create(tableConfig, schema, this, 
_serverMetrics);
 }
   }
 
@@ -264,7 +245,8 @@ public class RealtimeTableDataManager extends 
BaseTableDataManager {
   }
 
   public boolean isPartialUpsertEnabled() {
-return _tableUpsertMetadataManager != null && 
_tableUpsertMetadataManager.isPartialUpsertEnabled();
+return _tableUpsertMetadataManager != null
+&& _tableUpsertMetadataManager.getUpsertMode() == 
UpsertConfig.Mode.PARTIAL;
   }
 
   /*
diff --git 
a/pinot-core/src/test/java/org/apache/pinot/core/plan/maker/MetadataAndDictionaryAggregationPlanMakerTest.java
 

[pinot] branch master updated: Add TLS1.3, Remove TLS1.0, Add User Agent Header, Add Timeouts for Pi… (#9008)

2022-08-02 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new 55db06fab1 Add TLS1.3, Remove TLS1.0, Add User Agent Header, Add 
Timeouts for Pi… (#9008)
55db06fab1 is described below

commit 55db06fab16682fcac40d12cbdb7fc1df3ca6a91
Author: rino-kadijk <106676047+rino-kad...@users.noreply.github.com>
AuthorDate: Tue Aug 2 13:27:33 2022 +0200

Add TLS1.3, Remove TLS1.0, Add User Agent Header, Add Timeouts for Pi… 
(#9008)
---
 pinot-clients/pinot-java-client/pom.xml|  6 +++
 .../org/apache/pinot/client/ConnectionFactory.java | 12 +++--
 .../apache/pinot/client/ConnectionTimeouts.java| 53 ++
 .../client/JsonAsyncHttpPinotClientTransport.java  | 49 +++-
 .../JsonAsyncHttpPinotClientTransportFactory.java  | 31 -
 .../java/org/apache/pinot/client/TlsProtocols.java | 52 +
 .../src/main/resources/version.properties  | 20 
 pinot-clients/pinot-jdbc-client/pom.xml|  4 ++
 .../org/apache/pinot/client/PinotConnection.java   |  3 +-
 .../java/org/apache/pinot/client/PinotDriver.java  |  6 ++-
 .../controller/PinotControllerTransport.java   | 32 -
 .../PinotControllerTransportFactory.java   | 32 -
 .../src/main/resources/version.properties  | 20 
 .../client/DummyPinotControllerTransport.java  | 13 ++
 .../apache/pinot/client/PinotConnectionTest.java   |  2 +-
 .../pinot/client/PinotPreparedStatementTest.java   |  2 +-
 .../apache/pinot/client/PinotStatementTest.java|  2 +-
 17 files changed, 304 insertions(+), 35 deletions(-)

diff --git a/pinot-clients/pinot-java-client/pom.xml 
b/pinot-clients/pinot-java-client/pom.xml
index 0a4b1eca51..d25c369ac7 100644
--- a/pinot-clients/pinot-java-client/pom.xml
+++ b/pinot-clients/pinot-java-client/pom.xml
@@ -49,6 +49,12 @@
 maven-enforcer-plugin
   
 
+
+  
+src/main/resources
+true
+  
+
   
   
 
diff --git 
a/pinot-clients/pinot-java-client/src/main/java/org/apache/pinot/client/ConnectionFactory.java
 
b/pinot-clients/pinot-java-client/src/main/java/org/apache/pinot/client/ConnectionFactory.java
index f228c9e28b..0469c12b14 100644
--- 
a/pinot-clients/pinot-java-client/src/main/java/org/apache/pinot/client/ConnectionFactory.java
+++ 
b/pinot-clients/pinot-java-client/src/main/java/org/apache/pinot/client/ConnectionFactory.java
@@ -123,7 +123,7 @@ public class ConnectionFactory {
* @return A connection that connects to the brokers specified in the 
properties
*/
   public static Connection fromProperties(Properties properties) {
-return fromProperties(properties, getDefault());
+return fromProperties(properties, getDefault(properties));
   }
 
   /**
@@ -171,14 +171,20 @@ public class ConnectionFactory {
 return new Connection(properties, brokers, transport);
   }
 
-  private static PinotClientTransport getDefault() {
+  private static PinotClientTransport getDefault(Properties 
connectionProperties) {
 if (_defaultTransport == null) {
   synchronized (ConnectionFactory.class) {
 if (_defaultTransport == null) {
-  _defaultTransport = new 
JsonAsyncHttpPinotClientTransportFactory().buildTransport();
+  _defaultTransport = new JsonAsyncHttpPinotClientTransportFactory()
+  .withConnectionProperties(connectionProperties)
+  .buildTransport();
 }
   }
 }
 return _defaultTransport;
   }
+
+  private static PinotClientTransport getDefault() {
+return getDefault(new Properties());
+  }
 }
diff --git 
a/pinot-clients/pinot-java-client/src/main/java/org/apache/pinot/client/ConnectionTimeouts.java
 
b/pinot-clients/pinot-java-client/src/main/java/org/apache/pinot/client/ConnectionTimeouts.java
new file mode 100644
index 00..90502467fc
--- /dev/null
+++ 
b/pinot-clients/pinot-java-client/src/main/java/org/apache/pinot/client/ConnectionTimeouts.java
@@ -0,0 +1,53 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions

[pinot] branch master updated: Add support for IAM role based credentials in Kinesis Plugin (#9071)

2022-07-27 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new 49c0e24e3d Add support for IAM role based credentials in Kinesis 
Plugin (#9071)
49c0e24e3d is described below

commit 49c0e24e3d85a97911a80352f93e8ebf34214812
Author: Kartik Khare 
AuthorDate: Wed Jul 27 21:36:08 2022 +0530

Add support for IAM role based credentials in Kinesis Plugin (#9071)

* Add support for IAM roles

* Add support for externalId

* Provide proper credentials to STS client

* Add default session id

* Add javadoc

Co-authored-by: Kartik Khare 
---
 .../pinot/plugin/stream/kinesis/KinesisConfig.java | 87 ++
 .../stream/kinesis/KinesisConnectionHandler.java   | 53 +++--
 2 files changed, 119 insertions(+), 21 deletions(-)

diff --git 
a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConfig.java
 
b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConfig.java
index 95221ff3bf..d8a3795a2a 100644
--- 
a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConfig.java
+++ 
b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConfig.java
@@ -18,8 +18,10 @@
  */
 package org.apache.pinot.plugin.stream.kinesis;
 
+import com.google.common.base.Joiner;
 import com.google.common.base.Preconditions;
 import java.util.Map;
+import java.util.UUID;
 import org.apache.pinot.spi.stream.StreamConfig;
 import software.amazon.awssdk.services.kinesis.model.ShardIteratorType;
 
@@ -36,9 +38,32 @@ public class KinesisConfig {
   public static final String MAX_RECORDS_TO_FETCH = "maxRecordsToFetch";
   public static final String ENDPOINT = "endpoint";
 
+  // IAM role configs
+  /**
+   * Enable Role based access to AWS.
+   * iamRoleBasedAccessEnabled - Set it to `true` to enable role based access, 
default: false
+   * roleArn - Required. specify the ARN of the role the client should assume.
+   * roleSessionName - session name to be used when creating a role based 
session. default: pinot-kineis-uuid
+   * externalId - string external id value required by role's policy. default: 
null
+   * sessionDurationSeconds - The duration, in seconds, of the role session. 
Default: 900
+   * asyncSessionUpdateEnabled -
+   *Configure whether the provider should fetch credentials 
asynchronously in the background.
+   *   If this is true, threads are less likely to block when credentials 
are loaded,
+   *   but additional resources are used to maintain the provider. Default 
- `true`
+   */
+  public static final String IAM_ROLE_BASED_ACCESS_ENABLED = 
"iamRoleBasedAccessEnabled";
+  public static final String ROLE_ARN = "roleArn";
+  public static final String ROLE_SESSION_NAME = "roleSessionName";
+  public static final String EXTERNAL_ID = "externalId";
+  public static final String SESSION_DURATION_SECONDS = 
"sessionDurationSeconds";
+  public static final String ASYNC_SESSION_UPDATED_ENABLED = 
"asyncSessionUpdateEnabled";
+
   // TODO: this is a starting point, until a better default is figured out
   public static final String DEFAULT_MAX_RECORDS = "20";
   public static final String DEFAULT_SHARD_ITERATOR_TYPE = 
ShardIteratorType.LATEST.toString();
+  public static final String DEFAULT_IAM_ROLE_BASED_ACCESS_ENABLED = "false";
+  public static final String DEFAULT_SESSION_DURATION_SECONDS = "900";
+  public static final String DEFAULT_ASYNC_SESSION_UPDATED_ENABLED = "true";
 
   private final String _streamTopicName;
   private final String _awsRegion;
@@ -48,6 +73,14 @@ public class KinesisConfig {
   private final String _secretKey;
   private final String _endpoint;
 
+  // IAM Role values
+  private boolean _iamRoleBasedAccess;
+  private String _roleArn;
+  private String _roleSessionName;
+  private String _externalId;
+  private int _sessionDurationSeconds;
+  private boolean _asyncSessionUpdateEnabled;
+
   public KinesisConfig(StreamConfig streamConfig) {
 Map props = streamConfig.getStreamConfigsMap();
 _streamTopicName = streamConfig.getTopicName();
@@ -60,23 +93,23 @@ public class KinesisConfig {
 _accessKey = props.get(ACCESS_KEY);
 _secretKey = props.get(SECRET_KEY);
 _endpoint = props.get(ENDPOINT);
-  }
 
-  public KinesisConfig(String streamTopicName, String awsRegion, 
ShardIteratorType shardIteratorType, String accessKey,
-  String secretKey, String endpoint) {
-this(streamTopicName, awsRegion, shardIteratorType, accessKey, secretKey, 
Integer.parseI

[pinot] branch master updated: Fix the NPE for ADLSGen2PinotFS (#9088)

2022-07-22 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new 24130673d8 Fix the NPE for ADLSGen2PinotFS (#9088)
24130673d8 is described below

commit 24130673d8f23f297de30fa1600f42954184ba62
Author: Seunghyun Lee 
AuthorDate: Fri Jul 22 08:18:29 2022 -0700

Fix the NPE for ADLSGen2PinotFS (#9088)

When the proxyPort is null, we have had the issue with NPE.
This PR fixes the issue.
---
 .../pinot/plugin/filesystem/ADLSGen2PinotFS.java   | 18 --
 .../plugin/filesystem/test/ADLSGen2PinotFSTest.java|  2 +-
 2 files changed, 9 insertions(+), 11 deletions(-)

diff --git 
a/pinot-plugins/pinot-file-system/pinot-adls/src/main/java/org/apache/pinot/plugin/filesystem/ADLSGen2PinotFS.java
 
b/pinot-plugins/pinot-file-system/pinot-adls/src/main/java/org/apache/pinot/plugin/filesystem/ADLSGen2PinotFS.java
index ff53cb35d5..3a3a684b21 100644
--- 
a/pinot-plugins/pinot-file-system/pinot-adls/src/main/java/org/apache/pinot/plugin/filesystem/ADLSGen2PinotFS.java
+++ 
b/pinot-plugins/pinot-file-system/pinot-adls/src/main/java/org/apache/pinot/plugin/filesystem/ADLSGen2PinotFS.java
@@ -75,10 +75,9 @@ public class ADLSGen2PinotFS extends BasePinotFS {
   private static final Logger LOGGER = 
LoggerFactory.getLogger(ADLSGen2PinotFS.class);
 
   private enum AuthenticationType {
-ACCESS_KEY,
-AZURE_AD,
-AZURE_AD_WITH_PROXY
+ACCESS_KEY, AZURE_AD, AZURE_AD_WITH_PROXY
   }
+
   private static final String AUTHENTICATION_TYPE = "authenticationType";
   private static final String ACCOUNT_NAME = "accountName";
   private static final String ACCESS_KEY = "accessKey";
@@ -139,7 +138,7 @@ public class ADLSGen2PinotFS extends BasePinotFS {
 String proxyHost = config.getProperty(PROXY_HOST);
 String proxyUsername = config.getProperty(PROXY_USERNAME);
 String proxyPassword = config.getProperty(PROXY_PASSWORD);
-Integer proxyPort = Integer.parseInt(config.getProperty(PROXY_PORT));
+String proxyPort = config.getProperty(PROXY_PORT);
 
 String dfsServiceEndpointUrl = HTTPS_URL_PREFIX + accountName + 
AZURE_STORAGE_DNS_SUFFIX;
 String blobServiceEndpointUrl = HTTPS_URL_PREFIX + accountName + 
AZURE_BLOB_DNS_SUFFIX;
@@ -183,9 +182,9 @@ public class ADLSGen2PinotFS extends BasePinotFS {
 Preconditions.checkNotNull(proxyPassword, "Proxy Password cannot be 
null");
 
 NettyAsyncHttpClientBuilder builder = new 
NettyAsyncHttpClientBuilder();
-builder.proxy(
-new ProxyOptions(ProxyOptions.Type.HTTP, new 
InetSocketAddress(proxyHost, proxyPort)).setCredentials(
-proxyUsername, proxyPassword));
+builder.proxy(new ProxyOptions(ProxyOptions.Type.HTTP,
+new InetSocketAddress(proxyHost, 
Integer.parseInt(proxyPort))).setCredentials(proxyUsername,
+proxyPassword));
 ClientSecretCredentialBuilder clientSecretCredentialBuilder =
 new 
ClientSecretCredentialBuilder().clientId(clientId).clientSecret(clientSecret).tenantId(tenantId);
 clientSecretCredentialBuilder.httpClient(builder.build());
@@ -246,9 +245,8 @@ public class ADLSGen2PinotFS extends BasePinotFS {
   // By default, create directory call will overwrite if the path already 
exists. Setting IfNoneMatch = "*" to
   // prevent overwrite. 
https://docs.microsoft.com/en-us/rest/api/storageservices/datalakestoragegen2/path/create
   DataLakeRequestConditions requestConditions = new 
DataLakeRequestConditions().setIfNoneMatch("*");
-  _fileSystemClient
-  
.createDirectoryWithResponse(AzurePinotFSUtil.convertUriToUrlEncodedAzureStylePath(uri),
 null, null, null,
-  null, requestConditions, null, null);
+  
_fileSystemClient.createDirectoryWithResponse(AzurePinotFSUtil.convertUriToUrlEncodedAzureStylePath(uri),
 null,
+  null, null, null, requestConditions, null, null);
   return true;
 } catch (DataLakeStorageException e) {
   // If the path already exists, doing nothing and return true
diff --git 
a/pinot-plugins/pinot-file-system/pinot-adls/src/test/java/org/apache/pinot/plugin/filesystem/test/ADLSGen2PinotFSTest.java
 
b/pinot-plugins/pinot-file-system/pinot-adls/src/test/java/org/apache/pinot/plugin/filesystem/test/ADLSGen2PinotFSTest.java
index b56179637c..6a4ec9b9fd 100644
--- 
a/pinot-plugins/pinot-file-system/pinot-adls/src/test/java/org/apache/pinot/plugin/filesystem/test/ADLSGen2PinotFSTest.java
+++ 
b/pinot-plugins/pinot-file-system/pinot-adls/src/test/java/org/apache/pinot/plugin/filesystem/test/ADLSGen2PinotFSTest.java
@@ -102,7 +102,7 @@ public class ADLSGen2PinotFSTest {
 _mockFileClient, _mockBlobContainerClient, _mockBlobClient, 
_mockBlobServiceClien

[pinot] branch master updated: Remove segments using valid doc ids instead of primary key (#8674)

2022-07-16 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new 83410f69b4 Remove segments using valid doc ids instead of primary key 
(#8674)
83410f69b4 is described below

commit 83410f69b49988f4a1c1f0f373c8bafcd25404ba
Author: Kartik Khare 
AuthorDate: Sat Jul 16 15:04:19 2022 +0530

Remove segments using valid doc ids instead of primary key (#8674)

* Remove segments using valid doc ids instead of primary key

* Handle concurrent updates

* Refactor

* Reuse primary key

* Hash primary key before checking

* Reuse primary key

* Refactor: move methods for readability and cache values in local variable

Co-authored-by: Kartik Khare 
---
 .../manager/realtime/RealtimeTableDataManager.java |  17 +--
 ...adataAndDictionaryAggregationPlanMakerTest.java |   3 +-
 .../plan/maker/QueryOverrideWithHintsTest.java |   5 +
 .../indexsegment/immutable/EmptyIndexSegment.java  |   6 +
 .../immutable/ImmutableSegmentImpl.java|  14 +++
 .../indexsegment/mutable/IntermediateSegment.java  |  14 +++
 .../indexsegment/mutable/MutableSegmentImpl.java   |  13 ++
 .../segment/readers/PinotSegmentRecordReader.java  |  15 +++
 .../upsert/PartitionUpsertMetadataManager.java |  38 --
 .../local/upsert/TableUpsertMetadataManager.java   |   8 +-
 .../MutableSegmentImplUpsertComparisonColTest.java |   2 +-
 .../mutable/MutableSegmentImplUpsertTest.java  |   8 +-
 .../upsert/PartitionUpsertMetadataManagerTest.java | 132 ++---
 .../org/apache/pinot/segment/spi/IndexSegment.java |  10 ++
 14 files changed, 222 insertions(+), 63 deletions(-)

diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeTableDataManager.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeTableDataManager.java
index 195e5e023e..4fa98e5720 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeTableDataManager.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeTableDataManager.java
@@ -134,8 +134,9 @@ public class RealtimeTableDataManager extends 
BaseTableDataManager {
 try {
   _statsHistory = RealtimeSegmentStatsHistory.deserialzeFrom(statsFile);
 } catch (IOException | ClassNotFoundException e) {
-  _logger.error("Error reading history object for table {} from {}", 
_tableNameWithType,
-  statsFile.getAbsolutePath(), e);
+  _logger
+  .error("Error reading history object for table {} from {}", 
_tableNameWithType, statsFile.getAbsolutePath(),
+  e);
   File savedFile = new File(_tableDataDir, STATS_FILE_NAME + "." + 
UUID.randomUUID());
   try {
 FileUtils.moveFile(statsFile, savedFile);
@@ -198,6 +199,7 @@ public class RealtimeTableDataManager extends 
BaseTableDataManager {
   _primaryKeyColumns = schema.getPrimaryKeyColumns();
   Preconditions.checkState(!CollectionUtils.isEmpty(_primaryKeyColumns),
   "Primary key columns must be configured for upsert");
+
   String comparisonColumn = upsertConfig.getComparisonColumn();
   _upsertComparisonColumn =
   comparisonColumn != null ? comparisonColumn : 
tableConfig.getValidationConfig().getTimeColumnName();
@@ -211,7 +213,7 @@ public class RealtimeTableDataManager extends 
BaseTableDataManager {
 
   _tableUpsertMetadataManager =
   new TableUpsertMetadataManager(_tableNameWithType, _serverMetrics, 
partialUpsertHandler,
-  upsertConfig.getHashFunction());
+  upsertConfig.getHashFunction(), _primaryKeyColumns);
 }
   }
 
@@ -369,6 +371,7 @@ public class RealtimeTableDataManager extends 
BaseTableDataManager {
   }
 }
   }
+
   segmentDataManager =
   new LLRealtimeSegmentDataManager(segmentZKMetadata, tableConfig, 
this, _indexDir.getAbsolutePath(),
   indexLoadingConfig, schema, llcSegmentName, semaphore, 
_serverMetrics, partitionUpsertMetadataManager,
@@ -428,8 +431,8 @@ public class RealtimeTableDataManager extends 
BaseTableDataManager {
 for (String primaryKeyColumn : _primaryKeyColumns) {
   columnToReaderMap.put(primaryKeyColumn, new 
PinotSegmentColumnReader(immutableSegment, primaryKeyColumn));
 }
-columnToReaderMap.put(_upsertComparisonColumn,
-new PinotSegmentColumnReader(immutableSegment, 
_upsertComparisonColumn));
+columnToReaderMap
+.put(_upsertComparisonColumn, new 
PinotSegmentColumnReader(immutableSegment, _upsertComparisonColumn));
 int numTotalDocs = immutableSegment.getSegmentMetadata().getTotalDocs();
 int numPrimaryKeyColumns = _primaryKeyColumns.size();
 Iterator recordInfoIter

[pinot] branch master updated: Enable key value byte stitching in PulsarMessageBatch (#8897)

2022-07-07 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new 1bcc032d0b Enable key value byte stitching in PulsarMessageBatch 
(#8897)
1bcc032d0b is described below

commit 1bcc032d0b03215e08d5200bd1974af3d00d6476
Author: icefury71 
AuthorDate: Thu Jul 7 03:49:10 2022 -0700

Enable key value byte stitching in PulsarMessageBatch (#8897)

* Adding ability to stitch key and value bytes when retrieving single 
message in PulsarMessageBatch

* Adding unit test for key value stitching in PulsarMessageBatch class

* Fixing checkstyle errors

* Bug fix in clearing byte buffer

* Addressing review comments
---
 .../pinot/plugin/stream/pulsar/PulsarConfig.java   |  14 +-
 .../plugin/stream/pulsar/PulsarMessageBatch.java   |  47 -
 .../pulsar/PulsarPartitionLevelConsumer.java   |  14 +-
 .../stream/pulsar/PulsarMessageBatchTest.java  | 199 +
 4 files changed, 265 insertions(+), 9 deletions(-)

diff --git 
a/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/src/main/java/org/apache/pinot/plugin/stream/pulsar/PulsarConfig.java
 
b/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/src/main/java/org/apache/pinot/plugin/stream/pulsar/PulsarConfig.java
index 78db1b766d..c0f210cbbe 100644
--- 
a/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/src/main/java/org/apache/pinot/plugin/stream/pulsar/PulsarConfig.java
+++ 
b/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/src/main/java/org/apache/pinot/plugin/stream/pulsar/PulsarConfig.java
@@ -36,6 +36,7 @@ public class PulsarConfig {
   public static final String BOOTSTRAP_SERVERS = "bootstrap.servers";
   public static final String AUTHENTICATION_TOKEN = "authenticationToken";
   public static final String TLS_TRUST_CERTS_FILE_PATH = 
"tlsTrustCertsFilePath";
+  public static final String ENABLE_KEY_VALUE_STITCH = "enableKeyValueStitch";
 
   private String _pulsarTopicName;
   private String _subscriberId;
@@ -44,6 +45,7 @@ public class PulsarConfig {
   private SubscriptionInitialPosition _subscriptionInitialPosition;
   private String _authenticationToken;
   private String _tlsTrustCertsFilePath;
+  private boolean _enableKeyValueStitch;
 
   public PulsarConfig(StreamConfig streamConfig, String subscriberId) {
 Map streamConfigMap = streamConfig.getStreamConfigsMap();
@@ -56,9 +58,13 @@ public class PulsarConfig {
 _authenticationToken = streamConfigMap.get(authenticationTokenKey);
 
 String tlsTrustCertsFilePathKey = StreamConfigProperties.
-constructStreamProperty(STREAM_TYPE, TLS_TRUST_CERTS_FILE_PATH);
+constructStreamProperty(STREAM_TYPE, TLS_TRUST_CERTS_FILE_PATH);
 _tlsTrustCertsFilePath = streamConfigMap.get(tlsTrustCertsFilePathKey);
 
+String enableKeyValueStitchKey = StreamConfigProperties.
+constructStreamProperty(STREAM_TYPE, ENABLE_KEY_VALUE_STITCH);
+_enableKeyValueStitch = 
Boolean.parseBoolean(streamConfigMap.get(enableKeyValueStitchKey));
+
 Preconditions.checkNotNull(_bootstrapServers, "No brokers provided in the 
config");
 
 OffsetCriteria offsetCriteria = streamConfig.getOffsetCriteria();
@@ -84,7 +90,7 @@ public class PulsarConfig {
   }
 
   public SubscriptionInitialPosition getInitialSubscriberPosition() {
-   return _subscriptionInitialPosition;
+return _subscriptionInitialPosition;
   }
 
   public String getAuthenticationToken() {
@@ -94,4 +100,8 @@ public class PulsarConfig {
   public String getTlsTrustCertsFilePath() {
 return _tlsTrustCertsFilePath;
   }
+
+  public boolean getEnableKeyValueStitch() {
+return _enableKeyValueStitch;
+  }
 }
diff --git 
a/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/src/main/java/org/apache/pinot/plugin/stream/pulsar/PulsarMessageBatch.java
 
b/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/src/main/java/org/apache/pinot/plugin/stream/pulsar/PulsarMessageBatch.java
index 514773719f..9655990721 100644
--- 
a/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/src/main/java/org/apache/pinot/plugin/stream/pulsar/PulsarMessageBatch.java
+++ 
b/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/src/main/java/org/apache/pinot/plugin/stream/pulsar/PulsarMessageBatch.java
@@ -21,6 +21,7 @@ package org.apache.pinot.plugin.stream.pulsar;
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.List;
+import org.apache.commons.io.output.ByteArrayOutputStream;
 import org.apache.pinot.spi.stream.MessageBatch;
 import org.apache.pinot.spi.stream.StreamPartitionMsgOffset;
 import org.apache.pulsar.client.api.Message;
@@ -28,17 +29,26 @@ import org.apache.pulsar.client.api.MessageId;
 import org.apache.pulsar.client.impl.BatchMessageIdImpl;
 import org.apache.pulsar.client.impl.MessageIdImpl;
 import org.a

[pinot] branch master updated: Adding constructor override for KinesisDataProducer (#8975)

2022-07-06 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new 04cc62b363 Adding constructor override for KinesisDataProducer (#8975)
04cc62b363 is described below

commit 04cc62b363b3d9360b00d6174af04b7bf0088ba0
Author: Navina Ramesh 
AuthorDate: Wed Jul 6 00:52:58 2022 -0700

Adding constructor override for KinesisDataProducer (#8975)

* Adding constructor overrides for KinesisDataProducer

* Addressing feedback
---
 .../stream/kafka20/server/KafkaDataProducer.java   |  6 +-
 .../pinot/plugin/stream/kinesis/KinesisConfig.java | 17 ++
 .../stream/kinesis/server/KinesisDataProducer.java | 65 +-
 3 files changed, 59 insertions(+), 29 deletions(-)

diff --git 
a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/server/KafkaDataProducer.java
 
b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/server/KafkaDataProducer.java
index ed019d40d2..dd9790ec92 100644
--- 
a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/server/KafkaDataProducer.java
+++ 
b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/server/KafkaDataProducer.java
@@ -58,15 +58,13 @@ public class KafkaDataProducer implements 
StreamDataProducer {
 
   @Override
   public void produce(String topic, byte[] payload) {
-ProducerRecord record = new ProducerRecord(topic, payload);
-_producer.send(record);
+_producer.send(new ProducerRecord<>(topic, payload));
 _producer.flush();
   }
 
   @Override
   public void produce(String topic, byte[] key, byte[] payload) {
-ProducerRecord record = new ProducerRecord(topic, key, 
payload);
-_producer.send(record);
+_producer.send(new ProducerRecord<>(topic, key, payload));
 _producer.flush();
   }
 
diff --git 
a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConfig.java
 
b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConfig.java
index fa344aa065..95221ff3bf 100644
--- 
a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConfig.java
+++ 
b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConfig.java
@@ -62,6 +62,23 @@ public class KinesisConfig {
 _endpoint = props.get(ENDPOINT);
   }
 
+  public KinesisConfig(String streamTopicName, String awsRegion, 
ShardIteratorType shardIteratorType, String accessKey,
+  String secretKey, String endpoint) {
+this(streamTopicName, awsRegion, shardIteratorType, accessKey, secretKey, 
Integer.parseInt(DEFAULT_MAX_RECORDS),
+endpoint);
+  }
+
+  public KinesisConfig(String streamTopicName, String awsRegion, 
ShardIteratorType shardIteratorType, String accessKey,
+  String secretKey, int maxRecords, String endpoint) {
+_streamTopicName = streamTopicName;
+_awsRegion = awsRegion;
+_shardIteratorType = shardIteratorType;
+_accessKey = accessKey;
+_secretKey = secretKey;
+_numMaxRecordsToFetch = maxRecords;
+_endpoint = endpoint;
+  }
+
   public String getStreamTopicName() {
 return _streamTopicName;
   }
diff --git 
a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/server/KinesisDataProducer.java
 
b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/server/KinesisDataProducer.java
index a8e057ff24..6133a8c4f3 100644
--- 
a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/server/KinesisDataProducer.java
+++ 
b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/server/KinesisDataProducer.java
@@ -64,37 +64,52 @@ public class KinesisDataProducer implements 
StreamDataProducer {
   private KinesisClient _kinesisClient;
   private RetryPolicy _retryPolicy;
 
+  public KinesisDataProducer() { }
+
+  public KinesisDataProducer(KinesisClient kinesisClient) {
+this(kinesisClient, new FixedDelayRetryPolicy(
+Integer.parseInt(DEFAULT_NUM_RETRIES + 1), 
Integer.parseInt(DEFAULT_RETRY_DELAY_MILLIS)));
+  }
+
+  public KinesisDataProducer(KinesisClient kinesisClient, RetryPolicy 
retryPolicy) {
+_kinesisClient = kinesisClient;
+_retryPolicy = retryPolicy;
+  }
+
   @Override
   public void init(Properties props) {
-try {
-  KinesisClientBuilder kinesisClientBuilder;
-  if (props.containsKey(ACCESS) && prop

[pinot] branch master updated: Fix allSegmentLoaded check (#9010)

2022-07-04 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new 792ff66eaf Fix allSegmentLoaded check (#9010)
792ff66eaf is described below

commit 792ff66eaf74357d00e98b921c6fbcb242cde807
Author: Saurabh Dubey 
AuthorDate: Mon Jul 4 17:05:43 2022 +0530

Fix allSegmentLoaded check (#9010)

* Fix allSegmentLoaded check

* Comments + dedup ITs

* Review comments

Co-authored-by: Saurabh Dubey 
---
 .../tests/BaseClusterIntegrationTest.java  |  21 +++
 .../integration/tests/DedupIntegrationTest.java| 153 +
 .../src/test/resources/dedupIngestionTestData.avro | Bin 0 -> 294 bytes
 .../test/resources/dedupIngestionTestData.tar.gz   | Bin 0 -> 321 bytes
 .../test/resources/dedupIngestionTestSchema.schema |  23 
 .../local/utils/tablestate/TableStateUtils.java|  69 ++
 6 files changed, 241 insertions(+), 25 deletions(-)

diff --git 
a/pinot-integration-test-base/src/test/java/org/apache/pinot/integration/tests/BaseClusterIntegrationTest.java
 
b/pinot-integration-test-base/src/test/java/org/apache/pinot/integration/tests/BaseClusterIntegrationTest.java
index f8fe1a42d6..8308435a47 100644
--- 
a/pinot-integration-test-base/src/test/java/org/apache/pinot/integration/tests/BaseClusterIntegrationTest.java
+++ 
b/pinot-integration-test-base/src/test/java/org/apache/pinot/integration/tests/BaseClusterIntegrationTest.java
@@ -38,7 +38,9 @@ import org.apache.pinot.common.utils.TarGzCompressionUtils;
 import org.apache.pinot.common.utils.config.TagNameUtils;
 import org.apache.pinot.plugin.stream.kafka.KafkaStreamConfigProperties;
 import org.apache.pinot.spi.config.table.ColumnPartitionConfig;
+import org.apache.pinot.spi.config.table.DedupConfig;
 import org.apache.pinot.spi.config.table.FieldConfig;
+import org.apache.pinot.spi.config.table.HashFunction;
 import org.apache.pinot.spi.config.table.QueryConfig;
 import org.apache.pinot.spi.config.table.ReplicaGroupStrategyConfig;
 import org.apache.pinot.spi.config.table.RoutingConfig;
@@ -398,6 +400,25 @@ public abstract class BaseClusterIntegrationTest extends 
ClusterTest {
 .setUpsertConfig(new UpsertConfig(UpsertConfig.Mode.FULL, null, null, 
null, null)).build();
   }
 
+  /**
+   * Creates a new Dedup enabled table config
+   */
+  protected TableConfig createDedupTableConfig(File sampleAvroFile, String 
primaryKeyColumn, int numPartitions) {
+AvroFileSchemaKafkaAvroMessageDecoder._avroFile = sampleAvroFile;
+Map columnPartitionConfigMap = new 
HashMap<>();
+columnPartitionConfigMap.put(primaryKeyColumn, new 
ColumnPartitionConfig("Murmur", numPartitions));
+
+return new 
TableConfigBuilder(TableType.REALTIME).setTableName(getTableName()).setSchemaName(getSchemaName())
+
.setTimeColumnName(getTimeColumnName()).setFieldConfigList(getFieldConfigs()).setNumReplicas(getNumReplicas())
+
.setSegmentVersion(getSegmentVersion()).setLoadMode(getLoadMode()).setTaskConfig(getTaskConfig())
+
.setBrokerTenant(getBrokerTenant()).setServerTenant(getServerTenant()).setIngestionConfig(getIngestionConfig())
+
.setLLC(useLlc()).setStreamConfigs(getStreamConfigs()).setNullHandlingEnabled(getNullHandlingEnabled())
+.setRoutingConfig(new RoutingConfig(null, null, 
RoutingConfig.STRICT_REPLICA_GROUP_INSTANCE_SELECTOR_TYPE))
+.setSegmentPartitionConfig(new 
SegmentPartitionConfig(columnPartitionConfigMap))
+.setReplicaGroupStrategyConfig(new 
ReplicaGroupStrategyConfig(primaryKeyColumn, 1))
+.setDedupConfig(new DedupConfig(true, HashFunction.NONE)).build();
+  }
+
   /**
* Returns the REALTIME table config in the cluster.
*/
diff --git 
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/DedupIntegrationTest.java
 
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/DedupIntegrationTest.java
new file mode 100644
index 00..d133c5c02b
--- /dev/null
+++ 
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/DedupIntegrationTest.java
@@ -0,0 +1,153 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS 

[pinot] branch master updated (81bda1d26a -> ea564f0add)

2022-06-08 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


from 81bda1d26a fix merge conflicts
 add ea564f0add Allow appending UUID at end of segment name (#8789)

No new revisions were added by this update.

Summary of changes:
 .../batch/common/SegmentGenerationTaskRunner.java  | 14 +---
 .../pinot/segment/local/utils/IngestionUtils.java  |  6 +++---
 .../name/InputFileSegmentNameGenerator.java| 25 +++---
 .../name/NormalizedDateSegmentNameGenerator.java   | 16 --
 .../creator/name/SimpleSegmentNameGenerator.java   | 13 +--
 .../pinot/spi/ingestion/batch/BatchConfig.java |  7 ++
 .../spi/ingestion/batch/BatchConfigProperties.java |  2 ++
 7 files changed, 65 insertions(+), 18 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org



[pinot] branch master updated: Update ZK config in helm chart (#8817)

2022-06-03 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new cd55015c0a Update ZK config in helm chart (#8817)
cd55015c0a is described below

commit cd55015c0ad0875c47061ce196e2373164771243
Author: Kartik Khare 
AuthorDate: Sat Jun 4 00:14:31 2022 +0530

Update ZK config in helm chart (#8817)

* Revert "Revert "Bug fix: Complex type transformer should not be created 
for empty config (#8600)" (#8646)"

This reverts commit 74ee7252b4e0c765983af799f17b620195b05efd.

* Revert "Revert "Revert "Bug fix: Complex type transformer should not be 
created for empty config (#8600)" (#8646)""

This reverts commit 8b0adc7ac06762198651ccfd0cd027947bd32990.

* Update zk config in pinot helm chart

Co-authored-by: Kartik Khare 
---
 kubernetes/helm/pinot/values.yaml | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/kubernetes/helm/pinot/values.yaml 
b/kubernetes/helm/pinot/values.yaml
index 503aa21747..5e7a0811bd 100644
--- a/kubernetes/helm/pinot/values.yaml
+++ b/kubernetes/helm/pinot/values.yaml
@@ -441,6 +441,16 @@ zookeeper:
   ## Replicas
   replicaCount: 1
 
+  ## Ongoing data directory cleanup configuration
+  autopurge:
+
+## The time interval (in hours) for which the purge task has to be 
triggered
+## Set to a positive integer to enable the auto purging
+purgeInterval: 1
+
+## The most recent snapshots amount (and corresponding transaction logs) 
to retain
+snapRetainCount: 5
+
   ## Environmental variables to set in Zookeeper
   env:
 ## The JVM heap size to allocate to Zookeeper


-
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org



[pinot] branch master updated: Add option to skip statree index evaluation during query (#8737)

2022-05-23 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new 15416e5e8a Add option to skip statree index evaluation during query 
(#8737)
15416e5e8a is described below

commit 15416e5e8ada7e8ab546017f048a011862d65ec2
Author: Kartik Khare 
AuthorDate: Tue May 24 11:43:34 2022 +0530

Add option to skip statree index evaluation during query (#8737)

* Add option to skip statree index evaluation during query

* Make backward compatible

* Refactor: use same key for query option

* removed unused methods

Co-authored-by: Kartik Khare 
---
 .../pinot/core/plan/AggregationGroupByOrderByPlanNode.java|  2 +-
 .../java/org/apache/pinot/core/plan/AggregationPlanNode.java  |  2 +-
 .../apache/pinot/core/plan/maker/InstancePlanMakerImplV2.java |  4 
 .../apache/pinot/core/query/request/context/QueryContext.java | 10 ++
 .../java/org/apache/pinot/core/startree/StarTreeUtils.java| 11 ---
 .../java/org/apache/pinot/core/util/QueryOptionsUtils.java|  8 
 .../main/java/org/apache/pinot/spi/utils/CommonConstants.java |  1 +
 7 files changed, 25 insertions(+), 13 deletions(-)

diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/plan/AggregationGroupByOrderByPlanNode.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/plan/AggregationGroupByOrderByPlanNode.java
index 6451b43c83..10fd34f343 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/plan/AggregationGroupByOrderByPlanNode.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/plan/AggregationGroupByOrderByPlanNode.java
@@ -64,7 +64,7 @@ public class AggregationGroupByOrderByPlanNode implements 
PlanNode {
 
 // Use star-tree to solve the query if possible
 List starTrees = _indexSegment.getStarTrees();
-if (starTrees != null && !StarTreeUtils.isStarTreeDisabled(_queryContext)) 
{
+if (starTrees != null && !_queryContext.isSkipStarTree()) {
   AggregationFunctionColumnPair[] aggregationFunctionColumnPairs =
   StarTreeUtils.extractAggregationFunctionPairs(aggregationFunctions);
   if (aggregationFunctionColumnPairs != null) {
diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/plan/AggregationPlanNode.java 
b/pinot-core/src/main/java/org/apache/pinot/core/plan/AggregationPlanNode.java
index e0ea896609..2f26535d1e 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/plan/AggregationPlanNode.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/plan/AggregationPlanNode.java
@@ -198,7 +198,7 @@ public class AggregationPlanNode implements PlanNode {
 
 // Use star-tree to solve the query if possible
 List starTrees = _indexSegment.getStarTrees();
-if (starTrees != null && !StarTreeUtils.isStarTreeDisabled(_queryContext)) 
{
+if (starTrees != null && !_queryContext.isSkipStarTree()) {
   AggregationFunctionColumnPair[] aggregationFunctionColumnPairs =
   StarTreeUtils.extractAggregationFunctionPairs(aggregationFunctions);
   if (aggregationFunctionColumnPairs != null) {
diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/plan/maker/InstancePlanMakerImplV2.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/plan/maker/InstancePlanMakerImplV2.java
index 259a7b78f0..4d9558f6f5 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/plan/maker/InstancePlanMakerImplV2.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/plan/maker/InstancePlanMakerImplV2.java
@@ -186,10 +186,14 @@ public class InstancePlanMakerImplV2 implements PlanMaker 
{
 
   private void applyQueryOptions(QueryContext queryContext) {
 Map queryOptions = queryContext.getQueryOptions();
+Map debugOptions = queryContext.getDebugOptions();
 
 // Set skipUpsert
 queryContext.setSkipUpsert(QueryOptionsUtils.isSkipUpsert(queryOptions));
 
+// Set skipStarTree
+
queryContext.setSkipStarTree(QueryOptionsUtils.isSkipStarTree(queryOptions, 
debugOptions));
+
 // Set maxExecutionThreads
 int maxExecutionThreads;
 Integer maxExecutionThreadsFromQuery = 
QueryOptionsUtils.getMaxExecutionThreads(queryOptions);
diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/query/request/context/QueryContext.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/query/request/context/QueryContext.java
index fe9311e928..cff843d282 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/query/request/context/QueryContext.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/query/request/context/QueryContext.java
@@ -101,6 +101,8 @@ public class QueryContext {
   private boolean _enablePrefetch;
   // Whether to skip upsert for the query
   private boolean _skipUpsert;
+  // Whether to skip star-tree index for the query
+  private boolean _skipS

[pinot] branch master updated: Add Pulsar plugin back to distribution (#8657)

2022-05-15 Thread kharekartik
This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
 new d3c97d54d2 Add Pulsar plugin back to distribution (#8657)
d3c97d54d2 is described below

commit d3c97d54d24ab42203c760d1ed5cbef981b13f44
Author: Kartik Khare 
AuthorDate: Mon May 16 10:07:59 2022 +0530

Add Pulsar plugin back to distribution (#8657)

* Bug fix: Complex type transformer should not be created for empty configs

* Add pulsar plugin to distribution
---
 pinot-distribution/pinot-assembly.xml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pinot-distribution/pinot-assembly.xml 
b/pinot-distribution/pinot-assembly.xml
index 72f47d15bf..0ccb5c6a24 100644
--- a/pinot-distribution/pinot-assembly.xml
+++ b/pinot-distribution/pinot-assembly.xml
@@ -69,12 +69,10 @@
   
plugins/pinot-stream-ingestion/pinot-kinesis/pinot-kinesis-${project.version}-shaded.jar
 
 
-
 
 
 


-
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org



  1   2   >