ege-st commented on code in PR #11776:
URL: https://github.com/apache/pinot/pull/11776#discussion_r1367009788
##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java:
##########
@@ -328,12 +332,76 @@ public void indexRow(GenericRow row)
String columnName = entry.getKey();
// If row has null value for given column name, add to null value
vector
if (row.isNullValue(columnName)) {
- _nullValueVectorCreatorMap.get(columnName).setNull(_docIdCounter);
+ _nullValueVectorCreatorMap.get(columnName).setNull(_docPosOnDisk);
}
}
}
- _docIdCounter++;
+ _docPosOnDisk++;
+ }
+
+ @Override
+ public void indexColumn(String columnName, @Nullable int[] sortedDocIds,
IndexSegment segment,
+ boolean skipDefaultNullValues)
+ throws IOException {
+ long startNS = System.nanoTime();
+
+ // Iterate over each value in the column
+ try (PinotSegmentColumnReader colReader = new
PinotSegmentColumnReader(segment, columnName)) {
+ int numDocs = segment.getSegmentMetadata().getTotalDocs();
+ Map<IndexType<?, ?, ?>, IndexCreator> creatorsByIndex =
_creatorsByColAndIndex.get(columnName);
+ NullValueVectorCreator nullVec =
_nullValueVectorCreatorMap.get(columnName);
+ FieldSpec fieldSpec = _schema.getFieldSpecFor(columnName);
+ SegmentDictionaryCreator dictionaryCreator =
_dictionaryCreatorMap.get(columnName);
+ if (sortedDocIds != null) {
+ int onDiskDocId = 0;
+ for (int docId : sortedDocIds) {
+ indexColumnValue(colReader, creatorsByIndex, columnName, fieldSpec,
dictionaryCreator, docId, onDiskDocId,
+ nullVec, skipDefaultNullValues);
+ onDiskDocId += 1;
+ }
+ } else {
+ for (int docId = 0; docId < numDocs; docId++) {
+ indexColumnValue(colReader, creatorsByIndex, columnName, fieldSpec,
dictionaryCreator, docId, docId, nullVec,
+ skipDefaultNullValues);
+ }
+ }
+ }
+
+ _docPosOnDisk++;
+ }
+
+ private void indexColumnValue(PinotSegmentColumnReader colReader,
+ Map<IndexType<?, ?, ?>, IndexCreator> creatorsByIndex, String
columnName, FieldSpec fieldSpec,
+ SegmentDictionaryCreator dictionaryCreator, int sourceDocId, int
onDiskDocPos, NullValueVectorCreator nullVec,
+ boolean skipDefaultNullValues)
+ throws IOException {
+ Object columnValueToIndex = colReader.getValue(sourceDocId);
+ if (columnValueToIndex == null) {
+ throw new RuntimeException("Null value for column:" + columnName);
+ }
+
+ if (fieldSpec.isSingleValueField()) {
+ indexSingleValueRow(dictionaryCreator, columnValueToIndex,
creatorsByIndex);
+ } else {
+ indexMultiValueRow(dictionaryCreator, (Object[]) columnValueToIndex,
creatorsByIndex);
+ }
+
+ if (_nullHandlingEnabled && !skipDefaultNullValues) {
+ //handling null values
+// In row oriented:
+// - this.indexRow iterates over each column and checks if it
isNullValue. If it is then it sets the null
+// value vector for that doc id
+// - This null value comes from the GenericRow that is created by
PinotSegmentRecordReader
+// - PinotSegmentRecordReader:L224 is where we figure out the
null value stuff
+// - PSegRecReader calls PinotSegmentColumnReader.isNull on the
doc id to determine if the value for that
+// column of that docId is null
+// - if it returns true and we are NOT skipping null values we
put the default null value into that field
+// of the GenericRow
Review Comment:
I put it in because I wanted to see if it would help people better
understand the different steps that are involved in the null value logic. But
since it didn't help, I'll remove it.
##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java:
##########
@@ -303,6 +305,8 @@ public static ChunkCompressionType
getDefaultCompressionType(FieldType fieldType
@Override
public void indexRow(GenericRow row)
throws IOException {
+ long startNS = System.nanoTime();
Review Comment:
Removed.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]