This is an automated email from the ASF dual-hosted git repository.

afs pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/jena.git


The following commit(s) were added to refs/heads/main by this push:
     new 2ceda72e0f GH-3877: Reset writePosition in 
BinaryDataFileRandomAccess.truncate()
2ceda72e0f is described below

commit 2ceda72e0f4627a7e13053e187d9d042538b4aa3
Author: Lance Paine <[email protected]>
AuthorDate: Wed Apr 22 15:08:40 2026 +0100

    GH-3877: Reset writePosition in BinaryDataFileRandomAccess.truncate()
    
    truncate() called file.setLength(length) but left the class's own
    writePosition field unchanged. A later switchToWriteMode() triggered by
    a read->write transition (e.g. a write following a read in the same
    transaction, or the first write in a new transaction after the previous
    one aborted) seeks the underlying RandomAccessFile to the stale
    writePosition past the truncated EOF. The next file.write() extends the
    file at that position; the filesystem zero-fills the range between the
    truncated length and the stale seek, leaving a contiguous zero gap
    inside an otherwise append-only binary data file.
    
    Under TDB2 this produces zero regions inside nodes-data.obj after an
    aborted write transaction followed by a read-then-write sequence in the
    next transaction. NodeTable reads that resolve NodeIds into one of those
    regions fail with:
    
        TDBException: NodeTableTRDF/Read
          caused by: TProtocolException: Unrecognized type 0
    
    The fix is to reset writePosition inside truncate(). Adds a regression
    test TestBinaryDataFileRandomAccessTruncate exercising both the plain
    truncate-then-write case and the truncate-then-read-then-write case
    that actually surfaces the bug.
---
 .../dboe/base/file/BinaryDataFileRandomAccess.java |   5 +-
 .../TestBinaryDataFileRandomAccessTruncate.java    | 155 +++++++++++++++++++++
 2 files changed, 159 insertions(+), 1 deletion(-)

diff --git 
a/jena-db/jena-dboe-base/src/main/java/org/apache/jena/dboe/base/file/BinaryDataFileRandomAccess.java
 
b/jena-db/jena-dboe-base/src/main/java/org/apache/jena/dboe/base/file/BinaryDataFileRandomAccess.java
index 206555a6c7..b3e2473a4e 100644
--- 
a/jena-db/jena-dboe-base/src/main/java/org/apache/jena/dboe/base/file/BinaryDataFileRandomAccess.java
+++ 
b/jena-db/jena-dboe-base/src/main/java/org/apache/jena/dboe/base/file/BinaryDataFileRandomAccess.java
@@ -103,7 +103,10 @@ public class BinaryDataFileRandomAccess implements 
BinaryDataFile {
     public void truncate(long length) {
         checkOpen();
         switchToWriteMode();
-        try { file.setLength(length); }
+        try {
+            file.setLength(length);
+            writePosition = length;
+        }
         catch (IOException ex) { IO.exception(ex); }
     }
 
diff --git 
a/jena-db/jena-dboe-base/src/test/java/org/apache/jena/dboe/base/file/TestBinaryDataFileRandomAccessTruncate.java
 
b/jena-db/jena-dboe-base/src/test/java/org/apache/jena/dboe/base/file/TestBinaryDataFileRandomAccessTruncate.java
new file mode 100644
index 0000000000..233325ef1a
--- /dev/null
+++ 
b/jena-db/jena-dboe-base/src/test/java/org/apache/jena/dboe/base/file/TestBinaryDataFileRandomAccessTruncate.java
@@ -0,0 +1,155 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ *   SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.apache.jena.dboe.base.file;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.util.Arrays;
+
+import org.apache.jena.atlas.lib.FileOps;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+/**
+ * Regression test: {@link BinaryDataFileRandomAccess#truncate(long)} must 
reset
+ * the internal {@code writePosition} field.
+ *
+ * <p>Without the reset, a subsequent {@code switchToWriteMode()} triggered by 
a
+ * read-then-write sequence seeks the underlying {@link RandomAccessFile} to 
the
+ * stale {@code writePosition} (past the just-truncated EOF). The next
+ * {@code file.write(...)} extends the file at that position; the filesystem
+ * zero-fills the range between the truncated length and the stale seek
+ * position, leaving a contiguous zero gap inside an otherwise append-only
+ * binary data file.
+ *
+ * <p>Under TDB2 this surfaces as {@code NodeTableTRDF/Read ->
+ * TProtocolException: Unrecognized type 0} on any {@code nodes-data.obj}
+ * lookup whose NodeId resolves into the zero gap.
+ */
+public class TestBinaryDataFileRandomAccessTruncate {
+
+    private static final String FILE = "target/test-bdfra-truncate-writepos";
+
+    private BinaryDataFile file;
+
+    @BeforeEach public void before() {
+        FileOps.delete(FILE);
+        file = new BinaryDataFileRandomAccess(FILE);
+        file.open();
+    }
+
+    @AfterEach public void after() {
+        file.close();
+        FileOps.delete(FILE);
+    }
+
+    /**
+     * truncate() then write() (no intervening read) — file must contain only
+     * the written bytes up to the new length.
+     */
+    @Test public void truncate_then_write_no_gap() {
+        byte[] block = filled(300 * 1024, (byte) 0xAB);
+        file.write(block);
+        assertEquals(300 * 1024L, file.length());
+
+        file.truncate(100 * 1024);
+        assertEquals(100 * 1024L, file.length());
+
+        byte[] tail = filled(100, (byte) 0xCD);
+        long writtenAt = file.write(tail);
+        assertEquals(100 * 1024L, writtenAt,
+                "write() after truncate() must start at the truncated length, 
not at a stale writePosition");
+        assertEquals(100 * 1024L + 100, file.length());
+
+        assertNoZeroRun(FILE);
+    }
+
+    /**
+     * truncate() then read() then write() — the read flips the internal
+     * readMode flag, so the subsequent write goes through switchToWriteMode
+     * and seeks to the stored writePosition. Without the fix, that position
+     * is stale and writes past the truncated EOF, zero-filling the gap.
+     */
+    @Test public void truncate_then_read_then_write_no_gap() {
+        byte[] block = filled(300 * 1024, (byte) 0xAB);
+        file.write(block);
+
+        file.truncate(100 * 1024);
+        assertEquals(100 * 1024L, file.length());
+
+        byte[] sample = new byte[16];
+        file.read(0, sample);
+
+        byte[] tail = filled(100, (byte) 0xCD);
+        long writtenAt = file.write(tail);
+        assertEquals(100 * 1024L, writtenAt,
+                "write() after truncate()+read() must start at the truncated 
length, not at a stale writePosition");
+        assertEquals(100 * 1024L + 100, file.length());
+
+        assertNoZeroRun(FILE);
+    }
+
+    private static byte[] filled(int size, byte v) {
+        byte[] b = new byte[size];
+        Arrays.fill(b, v);
+        return b;
+    }
+
+    /**
+     * Fail the enclosing test if the on-disk file contains a contiguous run
+     * of zero bytes of length >= 16. The source blocks are all 0xAB/0xCD so
+     * any zero byte is evidence of filesystem zero-fill past a stale seek.
+     */
+    private static void assertNoZeroRun(String path) {
+        try (RandomAccessFile raf = new RandomAccessFile(path, "r")) {
+            long total = raf.length();
+            byte[] buf = new byte[(int) total];
+            raf.readFully(buf);
+            int runStart = -1;
+            int runLen = 0;
+            for (int i = 0; i < buf.length; i++) {
+                if (buf[i] == 0) {
+                    if (runStart < 0) { runStart = i; runLen = 1; }
+                    else runLen++;
+                } else {
+                    if (runLen >= 16) {
+                        assertTrue(false,
+                                "Zero-byte run of " + runLen + " bytes 
starting at offset " + runStart
+                                        + " indicates a filesystem zero-fill 
(stale writePosition after truncate).");
+                    }
+                    runStart = -1;
+                    runLen = 0;
+                }
+            }
+            if (runLen >= 16) {
+                assertTrue(false,
+                        "Zero-byte run of " + runLen + " bytes at end of file 
starting at offset " + runStart);
+            }
+        } catch (IOException ex) {
+            throw new RuntimeException(ex);
+        }
+    }
+}

Reply via email to