qiaojialin commented on a change in pull request #1400: URL: https://github.com/apache/incubator-iotdb/pull/1400#discussion_r448842280
########## File path: server/src/main/java/org/apache/iotdb/db/engine/modification/io/LocalTextModificationAccessor.java ########## @@ -124,28 +124,42 @@ private static Modification decodeModification(String src) throws IOException { private static String encodeDeletion(Deletion del) { return del.getType().toString() + SEPARATOR + del.getPathString() + SEPARATOR + del.getVersionNum() + SEPARATOR - + del.getTimestamp(); + + del.getStartTime() + SEPARATOR + del.getEndTime(); } private static Deletion decodeDeletion(String[] fields) throws IOException { - if (fields.length != 4) { + if (fields.length != 5 && fields.length != 4) { throw new IOException("Incorrect deletion fields number: " + fields.length); } String path = fields[1]; long versionNum; - long timestamp; + long startTimestamp = Long.MIN_VALUE; + long endTimestamp; try { versionNum = Long.parseLong(fields[2]); } catch (NumberFormatException e) { throw new IOException("Invalid version number: " + fields[2]); } - try { - timestamp = Long.parseLong(fields[3]); - } catch (NumberFormatException e) { - throw new IOException("Invalid timestamp: " + fields[3]); + if (fields.length == 4) { + try { + endTimestamp = Long.parseLong(fields[3]); + } catch (NumberFormatException e) { + throw new IOException("Invalid timestamp: " + fields[3]); + } + } else { + try { + startTimestamp = Long.parseLong(fields[3]); + } catch (NumberFormatException e) { + throw new IOException("Invalid timestamp: " + fields[3]); + } + try { + endTimestamp = Long.parseLong(fields[4]); + } catch (NumberFormatException e) { + throw new IOException("Invalid timestamp: " + fields[4]); + } Review comment: You can combine the two parsing and put the whole deletion line into IOException ########## File path: server/src/main/java/org/apache/iotdb/db/qp/physical/crud/DeletePlan.java ########## @@ -45,7 +46,22 @@ public DeletePlan() { */ public DeletePlan(long deleteTime, Path path) { super(false, Operator.OperatorType.DELETE); - this.deleteTime = deleteTime; + this.deleteStartTime = Long.MIN_VALUE; + this.deleteEndTime = deleteTime; + this.paths.add(path); + } Review comment: It's better to remove this constructor, it may misleading others that this means delete time = deleteTime. ########## File path: docs/UserGuide/Operation Manual/DML Data Manipulation Language.md ########## @@ -783,6 +783,12 @@ The wf02 plant's wt02 device has many segments of errors in its power supply sta delete from root.ln.wf02.wt02.status where time<=2017-11-01T16:26:00; ``` +In case we hope to merely delete the data before 2017-11-01 16:26:00 in the year of 2017, The SQL statement is: +``` +delete from root.ln.wf02.wt02.status where time>=2017-01-01T00:00:00 and time<=2017-11-01T16:26:00; Review comment: give more examples put the sqls in pr here ########## File path: server/src/main/java/org/apache/iotdb/db/utils/QueryUtils.java ########## @@ -45,46 +49,52 @@ private QueryUtils() { */ public static void modifyChunkMetaData(List<ChunkMetadata> chunkMetaData, List<Modification> modifications) { - int modIndex = 0; + List<Modification> sortedModifications = sortModifications(modifications); for (int metaIndex = 0; metaIndex < chunkMetaData.size(); metaIndex++) { ChunkMetadata metaData = chunkMetaData.get(metaIndex); - for (int j = modIndex; j < modifications.size(); j++) { - // iterate each modification to find the max deletion time - Modification modification = modifications.get(j); + for (Modification modification : sortedModifications) { if (modification.getVersionNum() > metaData.getVersion()) { - // this modification is after the Chunk, try modifying the chunk - // if this modification succeeds, update modIndex so in the next loop the previous - // modifications will not be examined - modIndex = doModifyChunkMetaData(modification, metaData)? j : modIndex; - } else { - // skip old modifications for next metadata - modIndex++; + doModifyChunkMetaData(modification, metaData); } } } // remove chunks that are completely deleted chunkMetaData.removeIf(metaData -> { - if (metaData.getDeletedAt() >= metaData.getEndTime()) { - return true; - } else { - if (metaData.getDeletedAt() >= metaData.getStartTime()) { - metaData.setModified(true); + long lower = metaData.getStartTime(); + long upper = metaData.getEndTime(); + if (metaData.getDeleteIntervalList() != null) { + for (Pair<Long, Long> range : metaData.getDeleteIntervalList()) { + if (upper < range.left) { + break; + } + if (range.left <= lower && lower <= range.right) { + metaData.setModified(true); + if (upper <= range.right) { + return true; + } + lower = range.right; + } else if (lower < range.left) { + metaData.setModified(true); + break; + } } - return false; } + return false; }); } - private static boolean doModifyChunkMetaData(Modification modification, ChunkMetadata metaData) { + private static LinkedList<Modification> sortModifications(List<Modification> modifications) { Review comment: This should be sortAndMerge Suppose we have [1,10], [5,12], [15,20], [16,21] It's better to merge them to [1,12] and [15,21] ########## File path: server/src/main/java/org/apache/iotdb/db/utils/datastructure/TVList.java ########## @@ -519,7 +518,7 @@ public boolean hasNextTimeValuePair() { while (cur < size) { long time = getTime(cur); - if (time < getTimeOffset() || (cur + 1 < size() && (time == getTime(cur + 1)))) { + if (isPointDeleted(time) || (cur + 1 < size() && (time == getTime(cur + 1)))) { Review comment: you record the deletion pair in the TVList, when iterating the TVList, it's better to sortAndMerge all deletions. When calling the Ite, the TVList should be sorted already, you can store the deletions inside the Ite and record and index of current checked deletion. Then you could use a more efficient way to check which point is deleted. ########## File path: tsfile/src/main/java/org/apache/iotdb/tsfile/read/reader/chunk/ChunkReaderByTimestamp.java ########## @@ -35,7 +35,10 @@ public ChunkReaderByTimestamp(Chunk chunk) throws IOException { public boolean pageSatisfied(PageHeader pageHeader) { long maxTimestamp = pageHeader.getEndTime(); // if maxTimestamp > currentTimestamp, this page should NOT be skipped - return maxTimestamp >= currentTimestamp && maxTimestamp > deletedAt; + // return maxTimestamp >= currentTimestamp && super.pageSatisfied(pageHeader); Review comment: remove this ########## File path: server/src/main/java/org/apache/iotdb/db/qp/strategy/LogicalGenerator.java ########## @@ -1550,18 +1552,52 @@ long parseTimeFormat(String timestampStr) throws SQLParserException { * * @param operator delete logical plan */ - private long parseDeleteTimeFilter(DeleteDataOperator operator) { + private Pair<Long, Long> parseDeleteTimeRange(DeleteDataOperator operator) { FilterOperator filterOperator = operator.getFilterOperator(); - if (filterOperator.getTokenIntType() != SQLConstant.LESSTHAN - && filterOperator.getTokenIntType() != SQLConstant.LESSTHANOREQUALTO) { + if (!filterOperator.isLeaf() && filterOperator.getTokenIntType() != SQLConstant.KW_AND) { throw new SQLParserException( - "For delete command, where clause must be like : time < XXX or time <= XXX"); + "For delete command, where clause can only contain atomic expressions like : " + + "time > XXX, time <= XXX, or And with two atomic expressions"); } + + if (filterOperator.isLeaf()) { + return calcOperatorRange(filterOperator); + } + + List<FilterOperator> children = filterOperator.getChildren(); + FilterOperator lOperator = children.get(0); Review comment: add more check for BinaryOperator, it must be a AndOperator and the lower bound must <= the upper bound ########## File path: server/src/main/java/org/apache/iotdb/db/engine/modification/Deletion.java ########## @@ -28,21 +28,36 @@ public class Deletion extends Modification { /** - * data whose timestamp <= this field are to be deleted. + * data within the interval [startTime, endTime] are to be deleted. */ - private long timestamp; + private long startTime; + private long endTime; - public Deletion(Path path, long versionNum, long timestamp) { + public Deletion(Path path, long versionNum, long endTime) { super(Type.DELETION, path, versionNum); - this.timestamp = timestamp; + this.endTime = endTime; Review comment: this.startTime = Long.MIN_VALUE; ########## File path: server/src/main/java/org/apache/iotdb/db/engine/memtable/AbstractMemTable.java ########## @@ -203,38 +204,40 @@ public ReadOnlyMemChunk query(String deviceId, String measurement, TSDataType da if (!checkPath(deviceId, measurement)) { return null; } - long undeletedTime = findUndeletedTime(deviceId, measurement, timeLowerBound); + List<Pair<Long, Long>> deletionList = constructDeletionList(deviceId, measurement, timeLowerBound); IWritableMemChunk memChunk = memTableMap.get(deviceId).get(measurement); TVList chunkCopy = memChunk.getTVList().clone(); - chunkCopy.setTimeOffset(undeletedTime); + chunkCopy.setDeletionList(deletionList); return new ReadOnlyMemChunk(measurement, dataType, encoding, chunkCopy, props, getVersion()); } - private long findUndeletedTime(String deviceId, String measurement, long timeLowerBound) { - long undeletedTime = Long.MIN_VALUE; + private List<Pair<Long, Long>> constructDeletionList(String deviceId, String measurement, + long timeLowerBound) { + List<Pair<Long, Long>> deletionList = new ArrayList<>(); for (Modification modification : modifications) { if (modification instanceof Deletion) { Deletion deletion = (Deletion) modification; if (deletion.getDevice().equals(deviceId) && deletion.getMeasurement().equals(measurement) - && deletion.getTimestamp() > undeletedTime) { - undeletedTime = deletion.getTimestamp(); + && deletion.getEndTime() > timeLowerBound) { + long lowerBound = Math.max(deletion.getStartTime(), timeLowerBound); + deletionList.add(new Pair<>(lowerBound, deletion.getEndTime())); Review comment: Don't we need to add a [-infinity, lowerBound] into deletionList? ########## File path: server/src/main/java/org/apache/iotdb/db/qp/physical/crud/DeletePlan.java ########## @@ -57,16 +73,39 @@ public DeletePlan(long deleteTime, Path path) { */ public DeletePlan(long deleteTime, List<Path> paths) { super(false, Operator.OperatorType.DELETE); - this.deleteTime = deleteTime; + this.deleteStartTime = Long.MIN_VALUE; + this.deleteEndTime = deleteTime; + this.paths = paths; + } Review comment: also delete this ########## File path: tsfile/src/main/java/org/apache/iotdb/tsfile/read/reader/chunk/ChunkReader.java ########## @@ -131,10 +132,25 @@ private void skipBytesInStreamByLength(long length) { } public boolean pageSatisfied(PageHeader pageHeader) { - if (pageHeader.getEndTime() <= deletedAt) { - return false; - } else if (pageHeader.getStartTime() <= deletedAt) { - pageHeader.setModified(true); + long lower = pageHeader.getStartTime(); + long upper = pageHeader.getEndTime(); + // deleteIntervalList is sorted in terms of startTime + if (deleteIntervalList != null) { + for (Pair<Long, Long> range : deleteIntervalList) { + if (upper < range.left) { + break; + } + if (range.left <= lower && lower <= range.right) { + pageHeader.setModified(true); + if (upper <= range.right) { + return true; Review comment: if the page is totally deleted, this page is not satisfied, return false. ########## File path: tsfile/src/main/java/org/apache/iotdb/tsfile/read/reader/page/PageReader.java ########## @@ -159,12 +161,23 @@ public void setFilter(Filter filter) { this.filter = filter; } - public void setDeletedAt(long deletedAt) { - this.deletedAt = deletedAt; + public void setDeleteIntervalList(List<Pair<Long, Long>> list) { + this.deleteIntervalList = list; } @Override public boolean isModified() { return pageHeader.isModified(); } + + private boolean isDeleted(long timestamp) { Review comment: also, the Deletions should be sortAndMerge, then optimize this check ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org