Keshava11 opened a new issue, #15940:
URL: https://github.com/apache/druid/issues/15940
OOM error with data upload from MySQL table into Apache Druid using JSON
task.
### Affected Version
28.0.1
### Description
A digital ocean droplet was spinned off to run Apache Druid(v28.0.1) with
**8 GB of RAM**.
Installed MySQL database and created a table with 11 columns
(datatype-varchar and most of the fields are nullable).
Have imported 4.8 million records in it the same table.
I wanted to import data **(all 4.8 million rows)** into the Druid by
submitting a JSON task but that returns out of memory error.
However it works when I try use the same JSON task to upload **only 30
thousand rows** (which brings an alternative for me to submit JSON task 160
times to upload all the 48 lac records but there should be a better way).
My assumption is that these issues occurs due to the memory parameters
defined in the start-druid script present in the bin dir.
Or do I need to change something in the submitted JSON task itself?
What exactly should I change in the configuration?
Following is the JSON task that I submitted
`{
"type": "index_parallel",
"spec": {
"dataSchema": {
"dataSource": "mydata_sql",
"timestampSpec": {
"column": "__time",
"format": "iso",
"missingValue": "2024-02-18T00:00:00.000Z"
},
"dimensionsSpec": {
"dimensions": [],
"dimensionExclusions": [
"__time"
],
"includeAllDimensions": false,
"useSchemaDiscovery": false
},
"metricsSpec": [],
"granularitySpec": {
"type": "uniform",
"segmentGranularity": "DAY",
"queryGranularity": {
"type": "none"
},
"rollup": false,
"intervals": []
},
"transformSpec": {
"filter": null,
"transforms": []
}
},
"ioConfig": {
"type": "index_parallel",
"inputSource": {
"type": "sql",
"sqls": [
"SELECT * FROM mydatatable"
],
"foldCase": false,
"database": {
"type": "mysql",
"connectorConfig": {
"createTables": true,
"host": "localhost",
"port": 1527,
"connectURI": "jdbc:mysql://localhost:3306/mydb",
"user": "mydbuser",
"password": "mydbpass",
"dbcp": null
},
"driverClassName": null
}
},
"inputFormat": null,
"appendToExisting": false,
"dropExisting": false
},
"tuningConfig": {
"type": "index_parallel",
"maxRowsPerSegment": null,
"appendableIndexSpec": {
"type": "onheap",
"preserveExistingMetrics": false
},
"maxRowsInMemory": 1000000,
"maxBytesInMemory": 0,
"skipBytesInMemoryOverheadCheck": false,
"maxTotalRows": null,
"numShards": null,
"splitHintSpec": null,
"partitionsSpec": null,
"indexSpec": {
"bitmap": {
"type": "roaring"
},
"dimensionCompression": "lz4",
"stringDictionaryEncoding": {
"type": "utf8"
},
"metricCompression": "lz4",
"longEncoding": "longs"
},
"indexSpecForIntermediatePersists": {
"bitmap": {
"type": "roaring"
},
"dimensionCompression": "lz4",
"stringDictionaryEncoding": {
"type": "utf8"
},
"metricCompression": "lz4",
"longEncoding": "longs"
},
"maxPendingPersists": 0,
"forceGuaranteedRollup": false,
"reportParseExceptions": false,
"pushTimeout": 0,
"segmentWriteOutMediumFactory": null,
"maxNumConcurrentSubTasks": 1,
"maxRetry": 3,
"taskStatusCheckPeriodMs": 1000,
"chatHandlerTimeout": "PT10S",
"chatHandlerNumRetries": 5,
"maxNumSegmentsToMerge": 100,
"totalNumMergeTasks": 10,
"logParseExceptions": false,
"maxParseExceptions": 2147483647,
"maxSavedParseExceptions": 0,
"maxColumnsToMerge": -1,
"awaitSegmentAvailabilityTimeoutMillis": 0,
"maxAllowedLockCount": -1,
"partitionDimensions": []
}
},
"context": {
"forceTimeChunkLock": true,
"useLineageBasedSegmentAllocation": true
}
}`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]