Github user zd-project commented on a diff in the pull request: https://github.com/apache/storm/pull/2754#discussion_r209023115 --- Diff: storm-webapp/src/main/java/org/apache/storm/daemon/logviewer/handler/LogviewerLogSearchHandler.java --- @@ -388,63 +414,83 @@ private Integer tryParseIntParam(String paramName, String value) throws InvalidR } } + /** + * Find the first N matches of target string in files. + * @param logs all candidate log files to search + * @param numMatches number of matches expected + * @param fileOffset Unclear metrics + * @param startByteOffset number of byte to be ignored in each log file + * @param targetStr searched string + * @return all matched results + */ @VisibleForTesting - Matched findNMatches(List<File> logs, int numMatches, int fileOffset, int offset, String search) { + Matched findNMatches(List<File> logs, int numMatches, int fileOffset, int startByteOffset, String targetStr) { logs = drop(logs, fileOffset); + LOG.debug("{} files to scan", logs.size()); List<Map<String, Object>> matches = new ArrayList<>(); int matchCount = 0; + int scannedFiles = 0; + //TODO: Unnecessarily convoluted loop that should be optimized while (true) { if (logs.isEmpty()) { break; } File firstLog = logs.get(0); - Map<String, Object> theseMatches; + Map<String, Object> matchInLog; try { LOG.debug("Looking through {}", firstLog); - theseMatches = substringSearch(firstLog, search, numMatches - matchCount, offset); + matchInLog = substringSearch(firstLog, targetStr, numMatches - matchCount, startByteOffset); + scannedFiles++; } catch (InvalidRequestException e) { LOG.error("Can't search past end of file.", e); - theseMatches = new HashMap<>(); + matchInLog = new HashMap<>(); } String fileName = WorkerLogs.getTopologyPortWorkerLog(firstLog); + //This section simply put the formatted log filename and corresponding port in the matching. final List<Map<String, Object>> newMatches = new ArrayList<>(matches); - Map<String, Object> currentFileMatch = new HashMap<>(theseMatches); + Map<String, Object> currentFileMatch = new HashMap<>(matchInLog); currentFileMatch.put("fileName", fileName); Path firstLogAbsPath; try { firstLogAbsPath = firstLog.getCanonicalFile().toPath(); } catch (IOException e) { throw new RuntimeException(e); } + //Why do we need to start from scratch to retrieve just the port here? currentFileMatch.put("port", truncatePathToLastElements(firstLogAbsPath, 2).getName(0).toString()); newMatches.add(currentFileMatch); - int newCount = matchCount + ((List<?>)theseMatches.get("matches")).size(); + int newCount = matchCount + ((List<?>)matchInLog.get("matches")).size(); - //theseMatches is never empty! As guaranteed by the #get().size() method above + //matchInLog is never empty! As guaranteed by the #get().size() method above if (newCount == matchCount) { // matches and matchCount is not changed logs = rest(logs); - offset = 0; + startByteOffset = 0; fileOffset = fileOffset + 1; } else if (newCount >= numMatches) { matches = newMatches; break; } else { matches = newMatches; logs = rest(logs); - offset = 0; + startByteOffset = 0; fileOffset = fileOffset + 1; matchCount = newCount; } } - return new Matched(fileOffset, search, matches); + LOG.info("scanned {} files", scannedFiles); + //fileOffset is not being used and it behaves inconsistently (showing + // (index of files search ends on - 1) if [enough matches] else (index of files search ends on)) + // I don't think we should expose the data to public if it's not used. + // can I dropped this field or change its behavior so it's used for metrics [numScannedFiles]? --- End diff -- See https://issues.apache.org/jira/browse/STORM-3189
---