DixitThinkbiz opened a new issue, #15295:
URL: https://github.com/apache/pinot/issues/15295
### Description
I am working on a proof-of-concept (POC) where I need to ingest data into
Pinot and use S3 as deep storage. Although data ingestion to Pinot via S3 is
successful, the segments are not being uploaded to S3 (deep storage).
### Table Schema
```json
{
"schemaName": "employee_attendance",
"dimensionFieldSpecs": [
{ "name": "attendance_id", "dataType": "INT" },
{ "name": "employee_id", "dataType": "INT" }
],
"dateTimeFieldSpecs": [
{
"name": "punch_time",
"dataType": "TIMESTAMP",
"format": "1:MILLISECONDS:EPOCH",
"granularity": "1:MILLISECONDS"
}
],
"primaryKeyColumns": ["attendance_id"]
}
```
### Table Configuration
```json
{
"tableName": "employee_attendance",
"tableType": "OFFLINE",
"segmentsConfig": {
"timeColumnName": "punch_time",
"schemaName": "employee_attendance",
"replication": "1"
},
"tableIndexConfig": {
"starTreeIndexConfigs": [
{
"dimensionsSplitOrder": ["employee_id"],
"skipStarNodeCreationForDimensions": [],
"maxLeafRecords": "24",
"aggregationConfigs": [
{ "columnName": "employee_id", "aggregationFunction": "COUNT" }
]
}
]
},
"routing": { "instanceSelectorType": "strictReplicaGroup" },
"tenants": {
"broker": "DefaultTenant",
"server": "DefaultTenant"
},
"ingestionConfig": {
"batchIngestionConfig": {
"segmentIngestionType": "APPEND",
"segmentIngestionFrequency": "DAILY",
"batchConfigMaps": [
{
"inputDirURI": "s3://bucket-name/",
"includeFileNamePattern": "glob:**/*.json",
"excludeFileNamePattern": "glob:**/*.tmp",
"inputFormat": "json",
"input.fs.className":
"org.apache.pinot.plugin.filesystem.S3PinotFS",
"input.fs.prop.region": "ap-northeast-1",
"input.fs.prop.accessKey": "****",
"input.fs.prop.secretKey": "****"
}
],
"segmentNameSpec": {},
"pushSpec": {}
}
},
"task": {
"taskTypeConfigsMap": {
"SegmentGenerationAndPushTask": {
"schedule": "0 */1 * * * ?",
"tableMaxNumTasks": "10"
}
}
},
"metadata": {}
}
```
### Controller Configuration (controller.conf)
```properties
# Pinot Role
pinot.service.role=CONTROLLER
# Pinot Cluster name
pinot.cluster.name=pinot-quickstart
# Pinot Zookeeper Server
pinot.zk.server=localhost:2181
# Use hostname as Pinot Instance ID
pinot.set.instance.id.to.hostname=true
# Pinot Controller Port
controller.port=9000
controller.zk.str=pinot-zookeeper:2181
controller.vip.host=127.0.0.1
controller.vip.port=9000
controller.task.scheduler.enabled=true
controller.local.temp.dir=/var/pinot/controller/data
# Deep storage configuration
pinot.controller.storage.factory.class.s3=org.apache.pinot.plugin.filesystem.S3PinotFS
pinot.controller.storage.factory.s3.disableAcl=false
pinot.controller.storage.factory.s3.region=ap-northeast-1
controller.data.dir=s3://bucket-name/
pinot.controller.segment.fetcher.protocols=file,http,s3
pinot.controller.segment.fetcher.s3.class=org.apache.pinot.common.utils.fetcher.PinotFSSegmentFetcher
pinot.controller.storage.factory.s3.accessKey=****
pinot.controller.storage.factory.s3.secretKey=****
```
### Server Configuration (server.conf)
```properties
# Pinot Role
pinot.service.role=SERVER
# Pinot Cluster name
pinot.cluster.name=pinot-quickstart
# Pinot Zookeeper Server
pinot.zk.server=localhost:2181
pinot.set.instance.id.to.hostname=true
# Pinot Server Ports
pinot.server.netty.port=8098
pinot.server.adminapi.port=8097
# Data directories and deep storage
pinot.server.instance.dataDir=/tmp/pinot/data/server/index
pinot.server.instance.segmentTarDir=/tmp/pinot/data/server/segmentTar
pinot.server.segment.store.uri=s3://bucket-name/
pinot.server.storage.factory.s3.disableAcl=false
pinot.server.storage.factory.class.s3=org.apache.pinot.plugin.filesystem.S3PinotFS
pinot.server.storage.factory.s3.region=ap-northeast-1
pinot.server.segment.fetcher.protocols=file,http,s3
pinot.server.segment.fetcher.s3.class=org.apache.pinot.common.utils.fetcher.PinotFSSegmentFetcher
pinot.server.storage.factory.s3.accessKey=****
pinot.server.storage.factory.s3.secretKey=****
```
### Minion Configuration (minion.conf)
```properties
pinot.set.instance.id.to.hostname=true
pinot.minion.storage.factory.class.s3=org.apache.pinot.plugin.filesystem.S3PinotFS
pinot.minion.storage.factory.s3.region=us-east-1
pinot.minion.segment.fetcher.protocols=file,http,s3
pinot.minion.segment.fetcher.s3.class=org.apache.pinot.common.utils.fetcher.PinotFSSegmentFetcher
```
### Request for Assistance
Any insights or suggestions on why the segments might not be uploading to S3
(deep storage) would be greatly appreciated.
Please let me know if further logs or configuration details are needed.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]