I've been running jena-4.7.0 for quite a while, nightly loading a big
(>1b triples) database using tdb2.xloader. The load process has been
extremely reliable.
Two days ago the loader failed with a "Broken pipe" io exception,
followed by dozens of "Stream closed" exceptions, as shown in below traces.
I ruled out data errors--all the input passes riot --validate with only
a few warnings.
The process runs on Linux Red Hat Enterprise release 8.10, with plenty
of memory and CPU. Java runtime is openjdk Corretto 11.
I increased the tdb2.xloader threads to 3 (there are 4 cores available)
and java max heap to 16g, but process still fails. It does not always
fail at the same point--anywhere between 850m and 1.1b triples.
I don't believe there were any changes to the server configuration 3
days ago. The data source and shape is the same.
Do these stack traces point to anything that could be resolved in the
environment?
Thanks for any help.
java.io.IOException: Broken pipe
at java.base/java.io.FileOutputStream.writeBytes(Native Method)
at java.base/java.io.FileOutputStream.write(FileOutputStream.java:354)
at
java.base/java.io.BufferedOutputStream.flushBuffer(BufferedOutputStream.java:81)
at
java.base/java.io.BufferedOutputStream.write(BufferedOutputStream.java:95)
at
org.apache.jena.tdb2.xloader.ProcBuildNodeTableX.hexWrite(ProcBuildNodeTableX.java:366)
at
org.apache.jena.tdb2.xloader.ProcBuildNodeTableX$NodeHashTmpStream.write(ProcBuildNodeTableX.java:442)
at
org.apache.jena.tdb2.xloader.ProcBuildNodeTableX$NodeHashTmpStream.node(ProcBuildNodeTableX.java:431)
at
org.apache.jena.tdb2.xloader.ProcBuildNodeTableX$NodeHashTmpStream.triple(ProcBuildNodeTableX.java:393)
at
org.apache.jena.riot.system.StreamRDFWrapper.triple(StreamRDFWrapper.java:40)
at
org.apache.jena.system.progress.ProgressStreamRDF.triple(ProgressStreamRDF.java:41)
at org.apache.jena.riot.lang.LangTurtle.emit(LangTurtle.java:57)
at
org.apache.jena.riot.lang.LangTurtleBase.emitTriple(LangTurtleBase.java:565)
at
org.apache.jena.riot.lang.LangTurtleBase.objectList(LangTurtleBase.java:420)
at
org.apache.jena.riot.lang.LangTurtleBase.predicateObjectItem(LangTurtleBase.java:352)
at
org.apache.jena.riot.lang.LangTurtleBase.predicateObjectList(LangTurtleBase.java:333)
at org.apache.jena.riot.lang.LangTurtleBase.triples(LangTurtleBase.java:314)
at
org.apache.jena.riot.lang.LangTurtleBase.triplesSameSubject(LangTurtleBase.java:178)
at
org.apache.jena.riot.lang.LangTurtle.oneTopLevelElement(LangTurtle.java:46)
at
org.apache.jena.riot.lang.LangTurtleBase.runParser(LangTurtleBase.java:79)
at org.apache.jena.riot.lang.LangBase.parse(LangBase.java:43)
at
org.apache.jena.riot.RDFParserRegistry$ReaderRIOTLang.read(RDFParserRegistry.java:203)
at org.apache.jena.riot.RDFParser.read(RDFParser.java:416)
at org.apache.jena.riot.RDFParser.parseURI(RDFParser.java:385)
at org.apache.jena.riot.RDFParser.parse(RDFParser.java:360)
at org.apache.jena.riot.RDFParserBuilder.parse(RDFParserBuilder.java:570)
at
org.apache.jena.tdb2.xloader.ProcBuildNodeTableX.lambda$exec2$0(ProcBuildNodeTableX.java:188)
at java.base/java.util.ArrayList.forEach(ArrayList.java:1541)
at
org.apache.jena.tdb2.xloader.ProcBuildNodeTableX.lambda$exec2$1(ProcBuildNodeTableX.java:184)
at java.base/java.lang.Thread.run(Thread.java:829)
java.io.IOException: Stream closed
at
java.base/java.lang.ProcessBuilder$NullOutputStream.write(ProcessBuilder.java:442)
at java.base/java.io.OutputStream.write(OutputStream.java:157)
at
java.base/java.io.BufferedOutputStream.flushBuffer(BufferedOutputStream.java:81)
at
java.base/java.io.BufferedOutputStream.write(BufferedOutputStream.java:95)
at
org.apache.jena.tdb2.xloader.ProcBuildNodeTableX.hexWrite(ProcBuildNodeTableX.java:365)
at
org.apache.jena.tdb2.xloader.ProcBuildNodeTableX$NodeHashTmpStream.write(ProcBuildNodeTableX.java:442)
at
org.apache.jena.tdb2.xloader.ProcBuildNodeTableX$NodeHashTmpStream.node(ProcBuildNodeTableX.java:429)
at
org.apache.jena.tdb2.xloader.ProcBuildNodeTableX$NodeHashTmpStream.triple(ProcBuildNodeTableX.java:395)
at
org.apache.jena.riot.system.StreamRDFWrapper.triple(StreamRDFWrapper.java:40)
at
org.apache.jena.system.progress.ProgressStreamRDF.triple(ProgressStreamRDF.java:41)
at org.apache.jena.riot.lang.LangTurtle.emit(LangTurtle.java:57)
at
org.apache.jena.riot.lang.LangTurtleBase.emitTriple(LangTurtleBase.java:565)
at
org.apache.jena.riot.lang.LangTurtleBase.objectList(LangTurtleBase.java:420)
at
org.apache.jena.riot.lang.LangTurtleBase.predicateObjectItem(LangTurtleBase.java:352)
at
org.apache.jena.riot.lang.LangTurtleBase.predicateObjectList(LangTurtleBase.java:345)
at org.apache.jena.riot.lang.LangTurtleBase.triples(LangTurtleBase.java:314)
at
org.apache.jena.riot.lang.LangTurtleBase.triplesSameSubject(LangTurtleBase.java:178)
at
org.apache.jena.riot.lang.LangTurtle.oneTopLevelElement(LangTurtle.java:46)
at
org.apache.jena.riot.lang.LangTurtleBase.runParser(LangTurtleBase.java:79)
at org.apache.jena.riot.lang.LangBase.parse(LangBase.java:43)
at
org.apache.jena.riot.RDFParserRegistry$ReaderRIOTLang.read(RDFParserRegistry.java:203)
at org.apache.jena.riot.RDFParser.read(RDFParser.java:416)
at org.apache.jena.riot.RDFParser.parseURI(RDFParser.java:385)
at org.apache.jena.riot.RDFParser.parse(RDFParser.java:360)
at org.apache.jena.riot.RDFParserBuilder.parse(RDFParserBuilder.java:570)
at
org.apache.jena.tdb2.xloader.ProcBuildNodeTableX.lambda$exec2$0(ProcBuildNodeTableX.java:188)
at java.base/java.util.ArrayList.forEach(ArrayList.java:1541)
at
org.apache.jena.tdb2.xloader.ProcBuildNodeTableX.lambda$exec2$1(ProcBuildNodeTableX.java:184)
at java.base/java.lang.Thread.run(Thread.java:829)