sergey-chugunov-1985 commented on a change in pull request #8926:
URL: https://github.com/apache/ignite/pull/8926#discussion_r612479885
##########
File path:
modules/core/src/main/java/org/apache/ignite/internal/processors/diagnostic/DiagnosticProcessor.java
##########
@@ -53,164 +56,127 @@
DFLT_DUMP_PAGE_LOCK_ON_FAILURE);
/** Time formatter for dump file name. */
- private static final DateTimeFormatter TIME_FORMATTER =
DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH-mm-ss_SSS");
+ private static final DateTimeFormatter TIME_FORMATTER =
DateTimeFormatter.ofPattern("yyyy-MM-dd'_'HH-mm-ss_SSS");
/** Folder name for store diagnostic info. **/
public static final String DEFAULT_TARGET_FOLDER = "diagnostic";
- /** File format. */
- static final String FILE_FORMAT = ".txt";
-
- /** Raw file format. */
- static final String RAW_FILE_FORMAT = ".raw";
-
/** Full path for store dubug info. */
private final Path diagnosticPath;
- /** */
- private final PageHistoryDiagnoster pageHistoryDiagnoster;
-
/**
+ * Constructor.
+ *
* @param ctx Kernal context.
*/
public DiagnosticProcessor(GridKernalContext ctx) throws
IgniteCheckedException {
super(ctx);
- diagnosticPath =
U.resolveWorkDirectory(ctx.config().getWorkDirectory(), DEFAULT_TARGET_FOLDER,
false).toPath();
-
- pageHistoryDiagnoster = new PageHistoryDiagnoster(ctx,
this::diagnosticFile);
-
- }
-
- /** {@inheritDoc} */
- @Override public void onKernalStart(boolean active) throws
IgniteCheckedException {
- super.onKernalStart(active);
-
- pageHistoryDiagnoster.onStart();
- }
-
- /**
- * Dump all history caches of given page.
- *
- * @param builder Parameters of dumping.
- * @throws IgniteCheckedException If scanning was failed.
- */
- public void dumpPageHistory(
- @NotNull PageHistoryDiagnoster.DiagnosticPageBuilder builder
- ) throws IgniteCheckedException {
- logTime(log, "DiagnosticPageHistory", () ->
pageHistoryDiagnoster.dumpPageHistory(builder));
+ diagnosticPath =
U.resolveWorkDirectory(ctx.config().getWorkDirectory(), DEFAULT_TARGET_FOLDER,
false)
+ .toPath();
}
/**
* Print diagnostic info about failure occurred on {@code ignite} instance.
* Failure details is contained in {@code failureCtx}.
*
- * @param ignite Ignite instance.
* @param failureCtx Failure context.
*/
- public void onFailure(Ignite ignite, FailureContext failureCtx) {
+ public void onFailure(FailureContext failureCtx) {
// Dump data structures page locks.
if (IGNITE_DUMP_PAGE_LOCK_ON_FAILURE)
ctx.cache().context().diagnostic().pageLockTracker().dumpLocksToLog();
- // If we have some corruption in data structure,
- // we should scan WAL and print to log and save to file all pages
related to corruption for
- // future investigation.
- if (X.hasCause(failureCtx.error(), CorruptedTreeException.class)) {
- CorruptedTreeException corruptedTreeException =
X.cause(failureCtx.error(), CorruptedTreeException.class);
-
- T2<Integer, Long>[] pageIds = corruptedTreeException.pages();
-
- try {
- dumpPageHistory(
- new PageHistoryDiagnoster.DiagnosticPageBuilder()
- .pageIds(pageIds)
- .addAction(PRINT_TO_LOG)
- .addAction(PRINT_TO_FILE)
- .addAction(PRINT_TO_RAW_FILE)
- );
+ CorruptedTreeException corruptedTreeE = X.cause(failureCtx.error(),
CorruptedTreeException.class);
+
+ if (corruptedTreeE != null && !F.isEmpty(corruptedTreeE.pages())) {
+ File[] walDirs = walDirs(ctx);
+
+ if (F.isEmpty(walDirs)) {
+ if (log.isInfoEnabled())
+ log.info("Skipping dump diagnostic info due to WAL not
configured");
}
- catch (IgniteCheckedException e) {
- SB sb = new SB();
- sb.a("[");
+ else {
+ try {
+ File corruptedPagesFile =
corruptedPagesFile(diagnosticPath, corruptedTreeE.pages());
+
+ String walDirsStr =
Arrays.stream(walDirs).map(File::getAbsolutePath)
+ .collect(joining(", ", "[", "]"));
+
+ String args = "walDir=" + walDirs[0].getAbsolutePath() +
(walDirs.length == 1 ? "" :
+ " walArchiveDir=" + walDirs[1].getAbsolutePath());
- for (int i = 0; i < pageIds.length; i++)
-
sb.a("(").a(pageIds[i].get1()).a(",").a(pageIds[i].get2()).a(")");
+ if
(ctx.config().getDataStorageConfiguration().getPageSize() != DFLT_PAGE_SIZE)
+ args += " pageSize=" +
ctx.config().getDataStorageConfiguration().getPageSize();
- sb.a("]");
+ args += " pages=" + corruptedPagesFile.getAbsolutePath();
- ignite.log().error(
- "Failed to dump diagnostic info on tree corruption.
PageIds=" + sb, e);
+ log.warning(corruptedTreeE.getClass().getSimpleName() + "
has occurred. " +
+ "To diagnose it, make a backup of the following
directories: " + walDirsStr + ". " +
+ "Then, run the following command: java -cp <classpath>
" +
+
"org.apache.ignite.development.utils.IgniteWalConverter " + args);
+ }
+ catch (Throwable t) {
+ String pages = Arrays.stream(corruptedTreeE.pages())
+ .map(t2 -> "(" + t2.get1() + ',' + t2.get2() +
')').collect(joining("", "[", "]"));
+
+ log.error("Failed to dump diagnostic info on tree
corruption. PageIds=" + pages, t);
+ }
}
}
}
/**
- * Resolve file to store diagnostic info.
+ * Creation and filling of a file with pages that can be corrupted.
+ * Pages are written on each line in format "grpId:pageId".
+ * File name format "corruptedPages_TIMESTAMP.txt".
*
- * @param customFile Custom file if customized.
- * @param writeMode Diagnostic file write mode.
- * @return File to store diagnostic info.
+ * @param dirPath Path to the directory where the file will be created.
+ * @param pages Pages that could be corrupted. Mapping: cache group id ->
page id.
+ * @return Created and filled file.
+ * @throws IOException If an I/O error occurs.
*/
- private File diagnosticFile(File customFile, DiagnosticFileWriteMode
writeMode) {
- if (customFile == null)
- return finalizeFile(diagnosticPath, writeMode);
+ public static File corruptedPagesFile(Path dirPath, T2<Integer, Long>...
pages) throws IOException {
+ dirPath.toFile().mkdirs();
- if (customFile.isAbsolute())
- return finalizeFile(customFile.toPath(), writeMode);
+ File f = dirPath.resolve("corruptedPages_" +
LocalDateTime.now().format(TIME_FORMATTER) + ".txt").toFile();
- return finalizeFile(diagnosticPath.resolve(customFile.toPath()),
writeMode);
- }
+ assert !f.exists();
- /**
- * @param diagnosticPath Path to diagnostic file.
- * @param writeMode Diagnostic file write mode.
- * @return File to store diagnostic info.
- */
- private static File finalizeFile(Path diagnosticPath,
DiagnosticFileWriteMode writeMode) {
- diagnosticPath.toFile().mkdirs();
+ try (BufferedWriter bw = new BufferedWriter(new FileWriter(f))) {
Review comment:
I would suggest to use our FileIO interfaces to get more control over
forcing fsync of diagnostics file.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]