taiyang-li commented on code in PR #2321:
URL: https://github.com/apache/orc/pull/2321#discussion_r2220989989


##########
c++/src/ColumnWriter.cc:
##########
@@ -991,61 +977,33 @@ namespace orc {
   // insert a new string into dictionary, return its insertion order
   size_t SortedStringDictionary::insert(const char* str, size_t len) {
     size_t index = flatDict_.size();
-    auto ret = keyToIndex_.emplace(std::string(str, len), index);
-    if (ret.second) {
-      flatDict_.emplace_back(ret.first->first.data(), ret.first->first.size(), 
index);
+
+    auto it = keyToIndex_.find(std::string_view{str, len});
+    if (it != keyToIndex_.end()) {
+      return it->second;
+    } else {
+      flatDict_.emplace_back(str, len);
       totalLength_ += len;
+
+      const auto& lastEntry = flatDict_.back();
+      keyToIndex_.emplace(std::string_view{lastEntry.data->data(), 
lastEntry.data->size()}, index);
+      return index;
     }
-    return ret.first->second;
   }
 
   // write dictionary data & length to output buffer
   void SortedStringDictionary::flush(AppendOnlyBufferedStream* dataStream,
                                      RleEncoder* lengthEncoder) const {
-    std::sort(flatDict_.begin(), flatDict_.end(), LessThan());

Review Comment:
   @wgtmac  @ffacs I will fix it in another PR. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to