klsince commented on code in PR #12440: URL: https://github.com/apache/pinot/pull/12440#discussion_r1523970058
########## pinot-spi/src/main/java/org/apache/pinot/spi/env/SegmentMetadataPropertyWriter.java: ########## @@ -0,0 +1,67 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.spi.env; + +import java.io.IOException; +import java.io.Writer; +import org.apache.commons.configuration2.PropertiesConfiguration.PropertiesWriter; +import org.apache.commons.configuration2.convert.ListDelimiterHandler; + + +/** + * SegmentMetadataPropertyWriter extends the PropertiesWriter + * <p> + * Purpose: custom property writer for writing the segment metadata faster by skipping the escaping of key. + */ +public class SegmentMetadataPropertyWriter extends PropertiesWriter { + private boolean _skipEscapePropertyName; + private final String _segmentMetadataVersionHeader; + + public SegmentMetadataPropertyWriter(final Writer writer, ListDelimiterHandler handler, + String segmentMetadataVersionHeader) { + super(writer, handler); + _segmentMetadataVersionHeader = segmentMetadataVersionHeader; + } + + @Override + protected String escapeKey(final String key) { + // skip the escapeKey functionality, if segment metadata has a newer version + // if not newer version, follow the escape for backward compatibility. + if (_skipEscapePropertyName) { + return key; + } + return super.escapeKey(key); + } + + @Override + public void writeln(final String s) throws IOException { Review Comment: Thanks for the detailed explanation. I still didn't follow why we have to decide version while parsing or writing the lines, which may be too late as we already parse/write many lines in the expensive ways. Could you provide a example segment metadata file with version header? I feel my understanding of adding comments into segment metadata file to include version info seems different from yours (or how the config lib works). For `SegmentMetadataPropertyReader`, I was thinking that we could read comment ourselves (just 1st line to be simple as we control the writing logic and we can just write a single comment to the config file), and then check if there is version header there. If there is a version, then use the new reader, which does no escaping and no regex; if no version info in the comment, the we continue to use the old reader to read segment metadata. ########## pinot-spi/src/main/java/org/apache/pinot/spi/env/CommonsConfigurationUtils.java: ########## @@ -100,24 +103,39 @@ public static PropertiesConfiguration fromPath(String path, boolean setIOFactory * @return a {@link PropertiesConfiguration} instance. */ public static PropertiesConfiguration fromInputStream(InputStream stream, boolean setIOFactory, - boolean setDefaultDelimiter) + boolean setDefaultDelimiter, PropertyIOFactoryKind ioFactoryKind) throws ConfigurationException { - PropertiesConfiguration config = createPropertiesConfiguration(setIOFactory, setDefaultDelimiter); + PropertiesConfiguration config = createPropertiesConfiguration(setIOFactory, setDefaultDelimiter, ioFactoryKind); FileHandler fileHandler = new FileHandler(config); fileHandler.load(stream); return config; } + public static PropertiesConfiguration segmentMetadataFromFile(File file, boolean setIOFactory, + boolean setDefaultDelimiter, PropertyIOFactoryKind ioFactoryKind, String segmentVersionHeader) + throws ConfigurationException { + return fromFile(file, setIOFactory, setDefaultDelimiter, ioFactoryKind, segmentVersionHeader); + } + + public static PropertiesConfiguration fromFile(File file, boolean setIOFactory, + boolean setDefaultDelimiter, PropertyIOFactoryKind ioFactoryKind) + throws ConfigurationException { + return fromFile(file, setIOFactory, setDefaultDelimiter, ioFactoryKind, ""); Review Comment: I see both null and `""` are used by default, anything special to use either one? I'd assume we pass null by default ########## pinot-spi/src/main/java/org/apache/pinot/spi/env/CommonsConfigurationUtils.java: ########## @@ -287,4 +321,23 @@ private static PropertiesConfiguration createPropertiesConfiguration(boolean set return config; } + + /** + * Creates the IOFactory based on the provided kind. + * + * @param ioFactoryKind IOFactory kind + * @param headerContentToCheck header content to validate. + * @return IOFactory + */ + private static IOFactory createPropertyIOFactory(PropertyIOFactoryKind ioFactoryKind, String headerContentToCheck) { + switch (ioFactoryKind) { + case ConfigFileIOFactory: + return new ConfigFilePropertyIOFactory(); + case SegmentMetadataIOFactory: + return new SegmentMetadataPropertyIOFactory(headerContentToCheck); + case DefaultPropertyConfigurationIOFactory: Review Comment: remove this case as it falls through to default handling ########## pinot-spi/src/main/java/org/apache/pinot/spi/env/CommonsConfigurationUtils.java: ########## @@ -272,12 +290,28 @@ public static String recoverSpecialCharacterInPropertyValue(String value) { } private static PropertiesConfiguration createPropertiesConfiguration(boolean setIOFactory, - boolean setDefaultDelimiter) { + boolean setDefaultDelimiter, PropertyIOFactoryKind ioFactoryKind) { + return createPropertiesConfiguration(setIOFactory, setDefaultDelimiter, ioFactoryKind, null); + } + + /** + * creates the instance of the {@link org.apache.commons.configuration2.PropertiesConfiguration} + * with custom od default {@link org.apache.commons.configuration2.PropertiesConfiguration.IOFactory} + * and legacy list delimiter {@link org.apache.commons.configuration2.convert.LegacyListDelimiterHandler} + * + * @param setIOFactory sets the IOFactory + * @param setDefaultDelimiter sets the default list delimiter. + * @param ioFactoryKind IOFactory kind + * @param headerContentToCheck header content to validate. + * @return PropertiesConfiguration + */ + private static PropertiesConfiguration createPropertiesConfiguration(boolean setIOFactory, + boolean setDefaultDelimiter, PropertyIOFactoryKind ioFactoryKind, String headerContentToCheck) { Review Comment: nit: just `headerToCheck` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org