JackieTien97 commented on code in PR #12447: URL: https://github.com/apache/iotdb/pull/12447#discussion_r1584008968
########## iotdb-core/datanode/src/main/java/org/apache/iotdb/db/schemaengine/schemaregion/tag/TagLogFile.java: ########## @@ -95,22 +102,85 @@ public synchronized void copyTo(File targetFile) throws IOException { * @return tags map, attributes map * @throws IOException error occurred when reading disk */ - public Pair<Map<String, String>, Map<String, String>> read(int size, long position) - throws IOException { + public Pair<Map<String, String>, Map<String, String>> read(long position) throws IOException { if (position < 0) { return new Pair<>(Collections.emptyMap(), Collections.emptyMap()); } - ByteBuffer byteBuffer = ByteBuffer.allocate(size); + ByteBuffer byteBuffer = parseByteBuffer(fileChannel, position); + return new Pair<>(ReadWriteIOUtils.readMap(byteBuffer), ReadWriteIOUtils.readMap(byteBuffer)); + } + + public Map<String, String> readTag(long position) throws IOException { + ByteBuffer byteBuffer = parseByteBuffer(fileChannel, position); + return ReadWriteIOUtils.readMap(byteBuffer); + } + + public static ByteBuffer parseByteBuffer(FileChannel fileChannel, long position) + throws IOException { + // Read the first block + ByteBuffer byteBuffer = ByteBuffer.allocate(MAX_LENGTH); fileChannel.read(byteBuffer, position); byteBuffer.flip(); - return new Pair<>(ReadWriteIOUtils.readMap(byteBuffer), ReadWriteIOUtils.readMap(byteBuffer)); + if (byteBuffer.limit() > 0) { // This indicates that there is data at this position + int firstInt = ReadWriteIOUtils.readInt(byteBuffer); // first int + byteBuffer.position(0); + if (firstInt < -1) { // This position is blockNum, the original data occupies multiple blocks + int blockNum = -firstInt; + ByteBuffer byteBuffers = ByteBuffer.allocate(blockNum * MAX_LENGTH); + byteBuffers.put(byteBuffer); + byteBuffers.position(4); // Skip blockNum + List<Long> blockOffset = new ArrayList<>(); + blockOffset.add(position); + for (int i = 1; i < blockNum; i++) { + blockOffset.add(ReadWriteIOUtils.readLong(byteBuffers)); + // read one offset, then use filechannel's read to read it + byteBuffers.position(MAX_LENGTH * i); + byteBuffers.limit(MAX_LENGTH * (i + 1)); + fileChannel.read(byteBuffers, blockOffset.get(i)); + byteBuffers.position(4 + i * Long.BYTES); + } + byteBuffers.limit(byteBuffers.capacity()); + return byteBuffers; + } + } + return byteBuffer; } - public Map<String, String> readTag(int size, long position) throws IOException { - ByteBuffer byteBuffer = ByteBuffer.allocate(size); + private List<Long> ParseOffsetList(long position) throws IOException { + List<Long> blockOffset = new ArrayList<>(); + blockOffset.add(position); + // Read the first block + ByteBuffer byteBuffer = ByteBuffer.allocate(MAX_LENGTH); fileChannel.read(byteBuffer, position); byteBuffer.flip(); - return ReadWriteIOUtils.readMap(byteBuffer); + if (byteBuffer.limit() > 0) { // This indicates that there is data at this position + int firstInt = ReadWriteIOUtils.readInt(byteBuffer); // first int + byteBuffer.position(0); + if (firstInt < -1) { // This position is blockNum, the original data occupies multiple blocks + int blockNum = -firstInt; + int blockOffsetStoreLen = + (((blockNum - 1) * Long.BYTES + 4) / MAX_LENGTH + 1) + * MAX_LENGTH; // blockOffset storage length + ByteBuffer blockBuffer = ByteBuffer.allocate(blockOffsetStoreLen); + blockBuffer.put(byteBuffer); + blockBuffer.position(4); // Skip blockNum + + for (int i = 1; i < blockNum; i++) { + blockOffset.add(ReadWriteIOUtils.readLong(blockBuffer)); + // Every time you read an offset, use filechannel's read to read it + if (MAX_LENGTH * (i + 1) + <= blockOffsetStoreLen) { // Compared with directly reading bytebuffer, some reading + // operations are reduced, only the content of offset is + // read + blockBuffer.position(MAX_LENGTH * i); + blockBuffer.limit(MAX_LENGTH * (i + 1)); + fileChannel.read(blockBuffer, blockOffset.get(i)); + blockBuffer.position(4 + i * Long.BYTES); + } + } Review Comment: You've already read all the first block content into memory in previous `fileChannel.read(byteBuffer, position);` ########## iotdb-core/datanode/src/main/java/org/apache/iotdb/db/schemaengine/schemaregion/tag/TagLogFile.java: ########## @@ -95,22 +102,85 @@ public synchronized void copyTo(File targetFile) throws IOException { * @return tags map, attributes map * @throws IOException error occurred when reading disk */ - public Pair<Map<String, String>, Map<String, String>> read(int size, long position) - throws IOException { + public Pair<Map<String, String>, Map<String, String>> read(long position) throws IOException { if (position < 0) { return new Pair<>(Collections.emptyMap(), Collections.emptyMap()); } - ByteBuffer byteBuffer = ByteBuffer.allocate(size); + ByteBuffer byteBuffer = parseByteBuffer(fileChannel, position); + return new Pair<>(ReadWriteIOUtils.readMap(byteBuffer), ReadWriteIOUtils.readMap(byteBuffer)); + } + + public Map<String, String> readTag(long position) throws IOException { + ByteBuffer byteBuffer = parseByteBuffer(fileChannel, position); + return ReadWriteIOUtils.readMap(byteBuffer); + } + + public static ByteBuffer parseByteBuffer(FileChannel fileChannel, long position) + throws IOException { + // Read the first block + ByteBuffer byteBuffer = ByteBuffer.allocate(MAX_LENGTH); fileChannel.read(byteBuffer, position); byteBuffer.flip(); - return new Pair<>(ReadWriteIOUtils.readMap(byteBuffer), ReadWriteIOUtils.readMap(byteBuffer)); + if (byteBuffer.limit() > 0) { // This indicates that there is data at this position + int firstInt = ReadWriteIOUtils.readInt(byteBuffer); // first int + byteBuffer.position(0); + if (firstInt < -1) { // This position is blockNum, the original data occupies multiple blocks + int blockNum = -firstInt; + ByteBuffer byteBuffers = ByteBuffer.allocate(blockNum * MAX_LENGTH); + byteBuffers.put(byteBuffer); + byteBuffers.position(4); // Skip blockNum + List<Long> blockOffset = new ArrayList<>(); + blockOffset.add(position); Review Comment: no need to save offset in a list, read it, use it then gc it. ########## iotdb-core/datanode/src/main/java/org/apache/iotdb/db/schemaengine/schemaregion/tag/TagManager.java: ########## @@ -171,10 +206,29 @@ public void addIndex(Map<String, String> tagsMap, IMeasurementMNode<?> measureme } public void removeIndex(String tagKey, String tagValue, IMeasurementMNode<?> measurementMNode) { - tagIndex.get(tagKey).get(tagValue).remove(measurementMNode); + if (tagKey == null || tagValue == null || measurementMNode == null) { + return; + } + // init memory size + int memorySize = 0; + if (tagIndex.get(tagKey).get(tagValue).remove(measurementMNode)) { + memorySize += 8; + } if (tagIndex.get(tagKey).get(tagValue).isEmpty()) { - tagIndex.get(tagKey).remove(tagValue); + if (tagIndex.get(tagKey).remove(tagValue) != null) { + // 4 is the memory occupied by the length of the string, tagValue.length() is the length of + // tagValue, and the last 4 is the memory occupied by the size of IMeasurementMNodeSet + memorySize += 4 + tagValue.length() + 4; + } + } + if (tagIndex.get(tagKey).isEmpty()) { + if (tagIndex.remove(tagKey) != null) { + // 4 is the memory occupied by the length of the string, tagKey.length() is the length of + // tagKey, and the last 4 is the memory occupied by the size of tagValueMap + memorySize += 4 + tagKey.length() + 4; + } Review Comment: same as above. ########## iotdb-core/datanode/src/main/java/org/apache/iotdb/db/schemaengine/schemaregion/tag/TagLogFile.java: ########## @@ -137,11 +207,84 @@ public void write(Map<String, String> tagMap, Map<String, String> attributeMap, * @return beginning position of the record in tagFile */ private synchronized long write(ByteBuffer byteBuffer, long position) throws IOException { + // Correct the initial offset of the write if (position < 0) { // append the record to file tail position = fileChannel.size(); } - fileChannel.write(byteBuffer, position); + // Read the original data to get the original space offset + List<Long> blockOffset = ParseOffsetList(position); + // write read data + int blockNumReal = byteBuffer.capacity() / MAX_LENGTH; + if (blockNumReal < 1) { + throw new RuntimeException( + "ByteBuffer capacity is smaller than tagAttributeTotalSize, which is not allowed."); + } + if (blockNumReal == 1 && blockOffset.size() == 1) { + // If the original data occupies only one block and the new data occupies only one block, the + // original space is used + fileChannel.write(byteBuffer, blockOffset.get(0)); + } else { + if (blockOffset.size() + > blockNumReal) { // if the original space is larger than the new space, the original Review Comment: `>=` or `>`? ########## iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/conf/CommonConfig.java: ########## @@ -251,6 +251,8 @@ public class CommonConfig { // Max size for tag and attribute of one time series private int tagAttributeTotalSize = 700; + private int tagAttributeEachMaxNum = 20; + private int tagAttributeEachMaxSize = 100; Review Comment: Add all of these two config into iotdb-common.properties in `iotdb-core/node-commons/src/assembly/resources/conf/iotdb-common.properties` and load these two in `CommonDescriptor.loadCommonProps`. ########## iotdb-core/datanode/src/main/java/org/apache/iotdb/db/schemaengine/schemaregion/tag/TagManager.java: ########## @@ -156,10 +164,37 @@ public void addIndex(String tagKey, String tagValue, IMeasurementMNode<?> measur if (tagKey == null || tagValue == null || measurementMNode == null) { return; } - tagIndex - .computeIfAbsent(tagKey, k -> new ConcurrentHashMap<>()) - .computeIfAbsent(tagValue, v -> Collections.synchronizedSet(new HashSet<>())) - .add(measurementMNode); + + int tagIndexOldSize = tagIndex.size(); + Map<String, Set<IMeasurementMNode<?>>> tagValueMap = + tagIndex.computeIfAbsent(tagKey, k -> new ConcurrentHashMap<>()); + int tagIndexNewSize = tagIndex.size(); + + int tagValueMapOldSize = tagValueMap.size(); + Set<IMeasurementMNode<?>> measurementsSet = + tagValueMap.computeIfAbsent(tagValue, v -> Collections.synchronizedSet(new HashSet<>())); + int tagValueMapNewSize = tagValueMap.size(); + + int measurementsSetOldSize = measurementsSet.size(); + measurementsSet.add(measurementMNode); + int measurementsSetNewSize = measurementsSet.size(); + + int memorySize = 0; + if (tagIndexNewSize - tagIndexOldSize == 1) { + // 4 is the memory occupied by the length of the string, tagKey.length() is the length of + // tagKey, and the last 4 is the memory occupied by the size of tagvaluemap + memorySize += 4 + tagKey.length() + 4; + } + if (tagValueMapNewSize - tagValueMapOldSize == 1) { + // 4 is the memory occupied by the length of the string, tagValue.length() is the length of + // tagValue, and the last 4 is the memory occupied by the size of measurementsSet + memorySize += 4 + tagValue.length() + 4; Review Comment: same as above. ########## iotdb-core/datanode/src/main/java/org/apache/iotdb/db/schemaengine/schemaregion/tag/TagManager.java: ########## @@ -156,10 +164,37 @@ public void addIndex(String tagKey, String tagValue, IMeasurementMNode<?> measur if (tagKey == null || tagValue == null || measurementMNode == null) { return; } - tagIndex - .computeIfAbsent(tagKey, k -> new ConcurrentHashMap<>()) - .computeIfAbsent(tagValue, v -> Collections.synchronizedSet(new HashSet<>())) - .add(measurementMNode); + + int tagIndexOldSize = tagIndex.size(); + Map<String, Set<IMeasurementMNode<?>>> tagValueMap = + tagIndex.computeIfAbsent(tagKey, k -> new ConcurrentHashMap<>()); + int tagIndexNewSize = tagIndex.size(); + + int tagValueMapOldSize = tagValueMap.size(); + Set<IMeasurementMNode<?>> measurementsSet = + tagValueMap.computeIfAbsent(tagValue, v -> Collections.synchronizedSet(new HashSet<>())); + int tagValueMapNewSize = tagValueMap.size(); + + int measurementsSetOldSize = measurementsSet.size(); + measurementsSet.add(measurementMNode); + int measurementsSetNewSize = measurementsSet.size(); + + int memorySize = 0; + if (tagIndexNewSize - tagIndexOldSize == 1) { + // 4 is the memory occupied by the length of the string, tagKey.length() is the length of + // tagKey, and the last 4 is the memory occupied by the size of tagvaluemap + memorySize += 4 + tagKey.length() + 4; Review Comment: call `RamUsageEstimator.sizeOf(tagKey)` to directly get memory usage of String object. ########## iotdb-core/datanode/src/main/java/org/apache/iotdb/db/schemaengine/schemaregion/tag/TagLogFile.java: ########## @@ -95,22 +102,85 @@ public synchronized void copyTo(File targetFile) throws IOException { * @return tags map, attributes map * @throws IOException error occurred when reading disk */ - public Pair<Map<String, String>, Map<String, String>> read(int size, long position) - throws IOException { + public Pair<Map<String, String>, Map<String, String>> read(long position) throws IOException { if (position < 0) { return new Pair<>(Collections.emptyMap(), Collections.emptyMap()); } - ByteBuffer byteBuffer = ByteBuffer.allocate(size); + ByteBuffer byteBuffer = parseByteBuffer(fileChannel, position); + return new Pair<>(ReadWriteIOUtils.readMap(byteBuffer), ReadWriteIOUtils.readMap(byteBuffer)); + } + + public Map<String, String> readTag(long position) throws IOException { + ByteBuffer byteBuffer = parseByteBuffer(fileChannel, position); + return ReadWriteIOUtils.readMap(byteBuffer); + } + + public static ByteBuffer parseByteBuffer(FileChannel fileChannel, long position) + throws IOException { + // Read the first block + ByteBuffer byteBuffer = ByteBuffer.allocate(MAX_LENGTH); fileChannel.read(byteBuffer, position); byteBuffer.flip(); - return new Pair<>(ReadWriteIOUtils.readMap(byteBuffer), ReadWriteIOUtils.readMap(byteBuffer)); + if (byteBuffer.limit() > 0) { // This indicates that there is data at this position + int firstInt = ReadWriteIOUtils.readInt(byteBuffer); // first int + byteBuffer.position(0); + if (firstInt < -1) { // This position is blockNum, the original data occupies multiple blocks + int blockNum = -firstInt; + ByteBuffer byteBuffers = ByteBuffer.allocate(blockNum * MAX_LENGTH); + byteBuffers.put(byteBuffer); + byteBuffers.position(4); // Skip blockNum + List<Long> blockOffset = new ArrayList<>(); + blockOffset.add(position); + for (int i = 1; i < blockNum; i++) { + blockOffset.add(ReadWriteIOUtils.readLong(byteBuffers)); + // read one offset, then use filechannel's read to read it + byteBuffers.position(MAX_LENGTH * i); + byteBuffers.limit(MAX_LENGTH * (i + 1)); + fileChannel.read(byteBuffers, blockOffset.get(i)); + byteBuffers.position(4 + i * Long.BYTES); + } + byteBuffers.limit(byteBuffers.capacity()); + return byteBuffers; + } + } + return byteBuffer; } - public Map<String, String> readTag(int size, long position) throws IOException { - ByteBuffer byteBuffer = ByteBuffer.allocate(size); + private List<Long> ParseOffsetList(long position) throws IOException { Review Comment: ```suggestion private List<Long> parseOffsetList(long position) throws IOException { ``` ########## iotdb-core/datanode/src/main/java/org/apache/iotdb/db/schemaengine/schemaregion/tag/TagManager.java: ########## @@ -711,9 +746,24 @@ public Map<String, String> readTags(IMeasurementMNode<?> node) { public void clear() throws IOException { this.tagIndex.clear(); + releaseMemory((int) memoryUsage.get()); Review Comment: before `TagManager.clear()` is called, the `this.regionStatistics.clear();` is already called in `SchemaRegionXXXXImpl` ########## iotdb-core/datanode/src/main/java/org/apache/iotdb/db/schemaengine/schemaregion/tag/TagManager.java: ########## @@ -156,10 +164,37 @@ public void addIndex(String tagKey, String tagValue, IMeasurementMNode<?> measur if (tagKey == null || tagValue == null || measurementMNode == null) { return; } - tagIndex - .computeIfAbsent(tagKey, k -> new ConcurrentHashMap<>()) - .computeIfAbsent(tagValue, v -> Collections.synchronizedSet(new HashSet<>())) - .add(measurementMNode); + + int tagIndexOldSize = tagIndex.size(); + Map<String, Set<IMeasurementMNode<?>>> tagValueMap = + tagIndex.computeIfAbsent(tagKey, k -> new ConcurrentHashMap<>()); + int tagIndexNewSize = tagIndex.size(); + + int tagValueMapOldSize = tagValueMap.size(); + Set<IMeasurementMNode<?>> measurementsSet = + tagValueMap.computeIfAbsent(tagValue, v -> Collections.synchronizedSet(new HashSet<>())); + int tagValueMapNewSize = tagValueMap.size(); + + int measurementsSetOldSize = measurementsSet.size(); + measurementsSet.add(measurementMNode); + int measurementsSetNewSize = measurementsSet.size(); + + int memorySize = 0; + if (tagIndexNewSize - tagIndexOldSize == 1) { + // 4 is the memory occupied by the length of the string, tagKey.length() is the length of + // tagKey, and the last 4 is the memory occupied by the size of tagvaluemap + memorySize += 4 + tagKey.length() + 4; + } + if (tagValueMapNewSize - tagValueMapOldSize == 1) { + // 4 is the memory occupied by the length of the string, tagValue.length() is the length of + // tagValue, and the last 4 is the memory occupied by the size of measurementsSet + memorySize += 4 + tagValue.length() + 4; + } + if (measurementsSetNewSize - measurementsSetOldSize == 1) { + // 8 is the memory occupied by the length of the IMeasurementMNode + memorySize += 8; Review Comment: use `RamUsageEstimator .NUM_BYTES_OBJECT_REF` to represent object reference memory usgae. ########## iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/conf/CommonConfig.java: ########## @@ -251,6 +251,8 @@ public class CommonConfig { // Max size for tag and attribute of one time series private int tagAttributeTotalSize = 700; + private int tagAttributeEachMaxNum = 20; + private int tagAttributeEachMaxSize = 100; Review Comment: also remember to change the comments about `tag_attribute_total_size` in `iotdb-core/node-commons/src/assembly/resources/conf/iotdb-common.properties` ########## iotdb-core/datanode/src/main/java/org/apache/iotdb/db/schemaengine/schemaregion/tag/TagLogFile.java: ########## @@ -95,22 +102,85 @@ public synchronized void copyTo(File targetFile) throws IOException { * @return tags map, attributes map * @throws IOException error occurred when reading disk */ - public Pair<Map<String, String>, Map<String, String>> read(int size, long position) - throws IOException { + public Pair<Map<String, String>, Map<String, String>> read(long position) throws IOException { if (position < 0) { return new Pair<>(Collections.emptyMap(), Collections.emptyMap()); } - ByteBuffer byteBuffer = ByteBuffer.allocate(size); + ByteBuffer byteBuffer = parseByteBuffer(fileChannel, position); + return new Pair<>(ReadWriteIOUtils.readMap(byteBuffer), ReadWriteIOUtils.readMap(byteBuffer)); + } + + public Map<String, String> readTag(long position) throws IOException { + ByteBuffer byteBuffer = parseByteBuffer(fileChannel, position); + return ReadWriteIOUtils.readMap(byteBuffer); + } + + public static ByteBuffer parseByteBuffer(FileChannel fileChannel, long position) + throws IOException { + // Read the first block + ByteBuffer byteBuffer = ByteBuffer.allocate(MAX_LENGTH); fileChannel.read(byteBuffer, position); byteBuffer.flip(); - return new Pair<>(ReadWriteIOUtils.readMap(byteBuffer), ReadWriteIOUtils.readMap(byteBuffer)); + if (byteBuffer.limit() > 0) { // This indicates that there is data at this position + int firstInt = ReadWriteIOUtils.readInt(byteBuffer); // first int + byteBuffer.position(0); + if (firstInt < -1) { // This position is blockNum, the original data occupies multiple blocks + int blockNum = -firstInt; + ByteBuffer byteBuffers = ByteBuffer.allocate(blockNum * MAX_LENGTH); + byteBuffers.put(byteBuffer); + byteBuffers.position(4); // Skip blockNum + List<Long> blockOffset = new ArrayList<>(); + blockOffset.add(position); + for (int i = 1; i < blockNum; i++) { + blockOffset.add(ReadWriteIOUtils.readLong(byteBuffers)); + // read one offset, then use filechannel's read to read it + byteBuffers.position(MAX_LENGTH * i); + byteBuffers.limit(MAX_LENGTH * (i + 1)); + fileChannel.read(byteBuffers, blockOffset.get(i)); + byteBuffers.position(4 + i * Long.BYTES); + } + byteBuffers.limit(byteBuffers.capacity()); + return byteBuffers; Review Comment: it seems that you need to call `byteBuffer.flip();` ########## iotdb-core/datanode/src/main/java/org/apache/iotdb/db/schemaengine/schemaregion/tag/TagLogFile.java: ########## @@ -152,15 +295,69 @@ private synchronized long write(ByteBuffer byteBuffer, long position) throws IOE private ByteBuffer convertMapToByteBuffer( Map<String, String> tagMap, Map<String, String> attributeMap) throws MetadataException { - ByteBuffer byteBuffer = ByteBuffer.allocate(MAX_LENGTH); + int TotalMapSize = calculateMapSize(tagMap) + calculateMapSize(attributeMap); Review Comment: ```suggestion int totalMapSize = calculateMapSize(tagMap) + calculateMapSize(attributeMap); ``` ########## iotdb-core/datanode/src/main/java/org/apache/iotdb/db/schemaengine/schemaregion/tag/TagLogFile.java: ########## @@ -152,15 +295,69 @@ private synchronized long write(ByteBuffer byteBuffer, long position) throws IOE private ByteBuffer convertMapToByteBuffer( Map<String, String> tagMap, Map<String, String> attributeMap) throws MetadataException { - ByteBuffer byteBuffer = ByteBuffer.allocate(MAX_LENGTH); + int TotalMapSize = calculateMapSize(tagMap) + calculateMapSize(attributeMap); + ByteBuffer byteBuffer; + if (TotalMapSize <= MAX_LENGTH) { + byteBuffer = ByteBuffer.allocate(MAX_LENGTH); + } else { + // get from Num*MAX_LENGTH < TotalMapSize + 4 + Long.BYTES*Num <= MAX_LENGTH*(Num + 1) Review Comment: why not `Num*MAX_LENGTH <= TotalMapSize + 4 + Long.BYTES*Num`? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@iotdb.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org