This is an automated email from the ASF dual-hosted git repository. schor pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/uima-uimaj.git
The following commit(s) were added to refs/heads/master by this push: new 18cdb21 [UIMA-6162] synchronize some CAS serialization 18cdb21 is described below commit 18cdb210a0fc436bdb56830853949fd85618b9ff Author: Marshall Schor <m...@schor.com> AuthorDate: Wed Dec 18 11:47:17 2019 -0500 [UIMA-6162] synchronize some CAS serialization for operations that make use of data structures for building serialized forms that are part of the CAS's svd. --- .../org/apache/uima/cas/impl/BinaryCasSerDes4.java | 283 +++++----- .../org/apache/uima/cas/impl/BinaryCasSerDes6.java | 277 ++++----- .../org/apache/uima/cas/impl/CASSerializer.java | 628 +++++++++++---------- 3 files changed, 599 insertions(+), 589 deletions(-) diff --git a/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes4.java b/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes4.java index fb67c72..91ffbea 100644 --- a/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes4.java +++ b/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes4.java @@ -445,151 +445,154 @@ public class BinaryCasSerDes4 implements SlotKindsConstants { * @throws IOException */ private void serialize() throws IOException { - - // if (doMeasurement) { -// System.out.println(printCasInfo(baseCas)); -// sm.origAuxBytes = baseCas.getByteHeap().getSize(); -// sm.origAuxShorts = baseCas.getShortHeap().getSize() * 2; -// sm.origAuxLongs = baseCas.getLongHeap().getSize() * 8; -// sm.totalTime = System.currentTimeMillis(); -// } - - - /************************ - * Write standard header - ************************/ - CommonSerDes.createHeader() - .v3() - .seqVer(2) // 0 - original, 1 - UIMA-4743, 2 - v3 - .form4() - .delta(isDelta) - .typeSystemIndexDefIncluded(isTsi) - .write(serializedOut); - - if (isTsi) { - CasIOUtils.writeTypeSystem(baseCas, serializedOut, true); - } - if (TRACE_SER) System.out.println("Form4Ser start, delta: " + (isDelta ? "true" : "false")); - /******************************************************************************* - * Setup tables that map to v2 "addresses" - needed for backwards compatibility - * fs2addr - feature structure to address - * addr2fs - address to feature structure - * sortedFSs - sorted by addr (sorted by id) - *******************************************************************************/ - final int origHeapEnd = csds.getHeapEnd(); // csds guaranteed non-null by constructor - if (isDelta) { - csds.setup(mark, origHeapEnd); // add additional above the line items to csds - } // otherwise was initialized when initially set up - - /** - * prepare fs < -- > seq maps - * done for entire cas (in the case of a mark) - */ - fs2seq.clear(); -// seq2fs.clear(); - int seq = 1; // origin 1 - - final List<TOP> localSortedFSs = csds.getSortedFSs(); - for (TOP fs : localSortedFSs) { - fs2seq.put(fs, seq++); -// seq2fs.put(seq++, fs); - if (fs instanceof UimaSerializable) { - ((UimaSerializable)fs)._save_to_cas_data(); + synchronized(baseCas.svd) { + + // if (doMeasurement) { + // System.out.println(printCasInfo(baseCas)); + // sm.origAuxBytes = baseCas.getByteHeap().getSize(); + // sm.origAuxShorts = baseCas.getShortHeap().getSize() * 2; + // sm.origAuxLongs = baseCas.getLongHeap().getSize() * 8; + // sm.totalTime = System.currentTimeMillis(); + // } + + + /************************ + * Write standard header + ************************/ + CommonSerDes.createHeader() + .v3() + .seqVer(2) // 0 - original, 1 - UIMA-4743, 2 - v3 + .form4() + .delta(isDelta) + .typeSystemIndexDefIncluded(isTsi) + .write(serializedOut); + + if (isTsi) { + CasIOUtils.writeTypeSystem(baseCas, serializedOut, true); } - } - - // the sort order is on the id (e.g. creation order) - List<TOP> newSortedFSs = CASImpl.filterAboveMark(csds.getSortedFSs(), mark); // returns all if mark not set - - /************************** - * Strings - * For delta, to determine "new" strings that should be serialized, - * use the same method as used in Binary (plain) serialization. - **************************/ - for (TOP fs : newSortedFSs) { - extractStrings(fs); - } - - if (isDelta) { - FsChange[] fssModified = baseCas.getModifiedFSList(); - - // also add in all modified strings - for (FsChange fsChange : fssModified) { - if (fsChange.fs instanceof UimaSerializable) { - ((UimaSerializable)fsChange.fs)._save_to_cas_data(); + + if (TRACE_SER) System.out.println("Form4Ser start, delta: " + (isDelta ? "true" : "false")); + /******************************************************************************* + * Setup tables that map to v2 "addresses" - needed for backwards compatibility + * fs2addr - feature structure to address + * addr2fs - address to feature structure + * sortedFSs - sorted by addr (sorted by id) + *******************************************************************************/ + final int origHeapEnd = csds.getHeapEnd(); // csds guaranteed non-null by constructor + if (isDelta) { + csds.setup(mark, origHeapEnd); // add additional above the line items to csds + } // otherwise was initialized when initially set up + + /** + * prepare fs < -- > seq maps + * done for entire cas (in the case of a mark) + */ + fs2seq.clear(); + // seq2fs.clear(); + int seq = 1; // origin 1 + + final List<TOP> localSortedFSs = csds.getSortedFSs(); + for (TOP fs : localSortedFSs) { + fs2seq.put(fs, seq++); + // seq2fs.put(seq++, fs); + if (fs instanceof UimaSerializable) { + ((UimaSerializable)fs)._save_to_cas_data(); } - extractStringsFromModifications(fsChange); } - } - - os.optimize(); - - writeStringInfo(); - - /*************************** - * Prepare to walk main heap - ***************************/ - heapEnd = csds.getHeapEnd(); - - heapStart = isDelta ? origHeapEnd : 0; -// -// -// if (isDelta) { -// // edge case - delta serializing with no new fs -// heapStart = (null == firstFS) ? heapEnd : csds.fs2addr.get(firstFS); -// } else { -// heapStart = 0; // not 1, in order to match v2 semantics -// // is switched to 1 later -// } - -// if (isDelta) { -// // debug -// for (TOP fs : csds.sortedFSs) { -// System.out.format("debug heapAddr: %,d type: %s%n", csds.fs2addr.get(fs), fs._getTypeImpl().getShortName()); -// if (csds.fs2addr.get(fs) == 439) { -// System.out.println("debug"); -// } -// } -// System.out.format("debug End of debug scan, heapStart: %,d heapEnd: %,d%n%n", heapStart, heapEnd); -// } - - if (TRACE_SER) System.out.println("Form4Ser heapstart: " + heapStart + " heapEnd: " + heapEnd); - - writeVnumber(control_dos, heapEnd - heapStart); // used for delta heap size to grow the CAS and ending condition on deser loop - if (TRACE_SER) System.out.println("Form4Ser heapstart: " + heapStart + " heapEnd: " + heapEnd); - Arrays.fill(prevFsByType, null); - -// if (heapStart == 0) { -// heapStart = 1; // slot 0 not serialized, it's null / 0 -// } - - - // scan thru all fs and save their offsets in the heap - // to allow conversion from addr to sequential fs numbers -// initFsStartIndexes(fsStartIndexes, heap, heapStart, heapEnd, typeCodeHisto); - - /*************************** - * walk all fs's - * For delta, just those above the line - ***************************/ - for (TOP fs : newSortedFSs) { - writeFs(fs); - } - - if (TRACE_SER) System.out.println("Form4Ser writing index info"); - serializeIndexedFeatureStructures(csds); - - if (isDelta) { - if (TRACE_SER) System.out.println("Form4Ser writing modified FSs"); - (new SerializeModifiedFSs(csds)).serializeModifiedFSs(); - } - - collectAndZip(); - - if (doMeasurement) { - sm.totalTime = System.currentTimeMillis() - sm.totalTime; + // the sort order is on the id (e.g. creation order) + List<TOP> newSortedFSs = CASImpl.filterAboveMark(csds.getSortedFSs(), mark); // returns all if mark not set + + /************************** + * Strings + * For delta, to determine "new" strings that should be serialized, + * use the same method as used in Binary (plain) serialization. + **************************/ + for (TOP fs : newSortedFSs) { + extractStrings(fs); + } + + if (isDelta) { + FsChange[] fssModified = baseCas.getModifiedFSList(); + + // also add in all modified strings + for (FsChange fsChange : fssModified) { + if (fsChange.fs instanceof UimaSerializable) { + ((UimaSerializable)fsChange.fs)._save_to_cas_data(); + } + extractStringsFromModifications(fsChange); + } + } + + os.optimize(); + + writeStringInfo(); + + /*************************** + * Prepare to walk main heap + ***************************/ + heapEnd = csds.getHeapEnd(); + + heapStart = isDelta ? origHeapEnd : 0; + // + // + // if (isDelta) { + // // edge case - delta serializing with no new fs + // heapStart = (null == firstFS) ? heapEnd : csds.fs2addr.get(firstFS); + // } else { + // heapStart = 0; // not 1, in order to match v2 semantics + // // is switched to 1 later + // } + + // if (isDelta) { + // // debug + // for (TOP fs : csds.sortedFSs) { + // System.out.format("debug heapAddr: %,d type: %s%n", csds.fs2addr.get(fs), fs._getTypeImpl().getShortName()); + // if (csds.fs2addr.get(fs) == 439) { + // System.out.println("debug"); + // } + // } + // System.out.format("debug End of debug scan, heapStart: %,d heapEnd: %,d%n%n", heapStart, heapEnd); + // } + + if (TRACE_SER) System.out.println("Form4Ser heapstart: " + heapStart + " heapEnd: " + heapEnd); + + writeVnumber(control_dos, heapEnd - heapStart); // used for delta heap size to grow the CAS and ending condition on deser loop + if (TRACE_SER) System.out.println("Form4Ser heapstart: " + heapStart + " heapEnd: " + heapEnd); + Arrays.fill(prevFsByType, null); + + // if (heapStart == 0) { + // heapStart = 1; // slot 0 not serialized, it's null / 0 + // } + + + // scan thru all fs and save their offsets in the heap + // to allow conversion from addr to sequential fs numbers + // initFsStartIndexes(fsStartIndexes, heap, heapStart, heapEnd, typeCodeHisto); + + + /*************************** + * walk all fs's + * For delta, just those above the line + ***************************/ + for (TOP fs : newSortedFSs) { + writeFs(fs); + } + + if (TRACE_SER) System.out.println("Form4Ser writing index info"); + serializeIndexedFeatureStructures(csds); + + if (isDelta) { + if (TRACE_SER) System.out.println("Form4Ser writing modified FSs"); + (new SerializeModifiedFSs(csds)).serializeModifiedFSs(); + } + + collectAndZip(); + + if (doMeasurement) { + sm.totalTime = System.currentTimeMillis() - sm.totalTime; + } } } diff --git a/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes6.java b/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes6.java index 22ba245..3d73fdd 100644 --- a/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes6.java +++ b/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes6.java @@ -708,153 +708,156 @@ public class BinaryCasSerDes6 implements SlotKindsConstants { throw new UnsupportedOperationException("Can't do Delta Serialization with different target TS"); } - setupOutputStreams(out); + synchronized (cas.svd) { - if (doMeasurements) { -// System.out.println(printCasInfo(cas)); -// sm.origAuxBytes = cas.getByteHeap().getSize(); -// sm.origAuxShorts = cas.getShortHeap().getSize() * 2; -// sm.origAuxLongs = cas.getLongHeap().getSize() * 8; - sm.totalTime = System.currentTimeMillis(); - } - - CommonSerDes.createHeader() - .form6() - .delta(isSerializingDelta) - .seqVer(2) // 2 == version 3 (or later) - .v3() - .typeSystemIncluded(isTsIncluded) - .typeSystemIndexDefIncluded(isTsiIncluded) - .write(serializedOut); - - if (isTsIncluded || isTsiIncluded) { - CasIOUtils.writeTypeSystem(cas, serializedOut, isTsiIncluded); - } - - os = new OptimizeStrings(doMeasurements); - - /****************************************************************** - * Find all FSs to be serialized via the indexes - * including those FSs referenced - * For Delta Serialization - excludes those FSs below the line - ******************************************************************/ - - /** - * Skip this, and use the reuse-info, only for the case of: - * - reuse info is provided, and its not a delta serialization. - * - This should only happen if the same identical CAS is being - * serialized multiple times (being sent to multiple remote services, for instance) - */ - - if (!reuseInfoProvided || isSerializingDelta) { -// long start = System.currentTimeMillis(); - processIndexedFeatureStructures(cas, false /* compute ref'd FSs, no write */); -// System.out.format("Time to enqueue reachable FSs: %,.3f seconds%n", (System.currentTimeMillis() - start)/ 1000f); - } - - - - /*************************** - * Prepare to walk main heap - * We prescan the main heap and - * 1) identify any types that should be skipped - * building a source and target fsStartIndexes table - * 2) add all strings to the string table, - * for strings above the mark - ***************************/ - - // scan thru all fs (above the line if delta) and create a map from - // the src id to the tgt id (some types may be missing, so is not identity map). - // Add all strings to string optimizer. - // Note: for delta cas, this only picks up strings - // referenced by FSs above the line - - // Note: does the UimaSerializable _save_to_cas_data call for above the line items - initSrcTgtIdMapsAndStrings(); - - // add remaining strings for this case: - // deltaCas, FS below the line modified, modification is new string. - // use the deltaCasMod scanning - final SerializeModifiedFSs smfs = isSerializingDelta ? new SerializeModifiedFSs() : null; - if (isSerializingDelta) { - // Note: does the UimaSerializable _save_to_cas_data call for modified below the line items - smfs.addModifiedStrings(); - } - - - /************************** - * Strings - **************************/ - - os.optimize(); - writeStringInfo(); - - /*************************** - * Prepare to walk main heap - ***************************/ - writeVnumber(control_dos, fssToSerialize.size()); // was totalMappedHeapSize - -// Arrays.fill(prevFsByType, null); - Arrays.fill(prevHeapInstanceWithIntValues, null); - prevFsWithLongValues.clear(); - - /*************************** - * walk main heap - ***************************/ + setupOutputStreams(out); + + if (doMeasurements) { + // System.out.println(printCasInfo(cas)); + // sm.origAuxBytes = cas.getByteHeap().getSize(); + // sm.origAuxShorts = cas.getShortHeap().getSize() * 2; + // sm.origAuxLongs = cas.getLongHeap().getSize() * 8; + sm.totalTime = System.currentTimeMillis(); + } + + CommonSerDes.createHeader() + .form6() + .delta(isSerializingDelta) + .seqVer(2) // 2 == version 3 (or later) + .v3() + .typeSystemIncluded(isTsIncluded) + .typeSystemIndexDefIncluded(isTsiIncluded) + .write(serializedOut); + + if (isTsIncluded || isTsiIncluded) { + CasIOUtils.writeTypeSystem(cas, serializedOut, isTsiIncluded); + } + + os = new OptimizeStrings(doMeasurements); - for (TOP fs : fssToSerialize) { - - final TypeImpl srcType = fs._getTypeImpl(); - final int tCode = srcType.getCode(); - final TypeImpl tgtType = isTypeMapping ? typeMapper.mapTypeSrc2Tgt(srcType) : srcType; - assert(null != tgtType); // because those are not put on queue for serialization + /****************************************************************** + * Find all FSs to be serialized via the indexes + * including those FSs referenced + * For Delta Serialization - excludes those FSs below the line + ******************************************************************/ + + /** + * Skip this, and use the reuse-info, only for the case of: + * - reuse info is provided, and its not a delta serialization. + * - This should only happen if the same identical CAS is being + * serialized multiple times (being sent to multiple remote services, for instance) + */ + + if (!reuseInfoProvided || isSerializingDelta) { + // long start = System.currentTimeMillis(); + processIndexedFeatureStructures(cas, false /* compute ref'd FSs, no write */); + // System.out.format("Time to enqueue reachable FSs: %,.3f seconds%n", (System.currentTimeMillis() - start)/ 1000f); + } + + + + /*************************** + * Prepare to walk main heap + * We prescan the main heap and + * 1) identify any types that should be skipped + * building a source and target fsStartIndexes table + * 2) add all strings to the string table, + * for strings above the mark + ***************************/ -// prevFs = prevFsByType[tCode]; + // scan thru all fs (above the line if delta) and create a map from + // the src id to the tgt id (some types may be missing, so is not identity map). + // Add all strings to string optimizer. + // Note: for delta cas, this only picks up strings + // referenced by FSs above the line - if (TRACE_SER) { - System.out.format("Ser: %,d adr: %,8d tCode: %,3d %13s tgtTypeCode: %,3d %n", - fs._id, fs._id, srcType.getCode(), srcType.getShortName(), tgtType.getCode()); + // Note: does the UimaSerializable _save_to_cas_data call for above the line items + initSrcTgtIdMapsAndStrings(); + + // add remaining strings for this case: + // deltaCas, FS below the line modified, modification is new string. + // use the deltaCasMod scanning + final SerializeModifiedFSs smfs = isSerializingDelta ? new SerializeModifiedFSs() : null; + if (isSerializingDelta) { + // Note: does the UimaSerializable _save_to_cas_data call for modified below the line items + smfs.addModifiedStrings(); } - - writeVnumber(typeCode_dos, tgtType.getCode()); - - if (fs instanceof CommonArrayFS) { - serializeArray(fs); - } else { - if (isTypeMapping) { - // Serialize out in the order the features are in the target - for (FeatureImpl tgtFeat : tgtType.getFeatureImpls()) { - FeatureImpl srcFeat = typeMapper.getSrcFeature(tgtType, tgtFeat); - assert(srcFeat != null); //for serialization, target is never a superset of features of src - serializeByKind(fs, srcFeat); - } - } else { // not type mapping - for (FeatureImpl srcFeat : srcType.getFeatureImpls()) { - serializeByKind(fs, srcFeat); + + + /************************** + * Strings + **************************/ + + os.optimize(); + writeStringInfo(); + + /*************************** + * Prepare to walk main heap + ***************************/ + writeVnumber(control_dos, fssToSerialize.size()); // was totalMappedHeapSize + + // Arrays.fill(prevFsByType, null); + Arrays.fill(prevHeapInstanceWithIntValues, null); + prevFsWithLongValues.clear(); + + /*************************** + * walk main heap + ***************************/ + + for (TOP fs : fssToSerialize) { + + final TypeImpl srcType = fs._getTypeImpl(); + final int tCode = srcType.getCode(); + final TypeImpl tgtType = isTypeMapping ? typeMapper.mapTypeSrc2Tgt(srcType) : srcType; + assert(null != tgtType); // because those are not put on queue for serialization + + // prevFs = prevFsByType[tCode]; + + if (TRACE_SER) { + System.out.format("Ser: %,d adr: %,8d tCode: %,3d %13s tgtTypeCode: %,3d %n", + fs._id, fs._id, srcType.getCode(), srcType.getShortName(), tgtType.getCode()); + } + + writeVnumber(typeCode_dos, tgtType.getCode()); + + if (fs instanceof CommonArrayFS) { + serializeArray(fs); + } else { + if (isTypeMapping) { + // Serialize out in the order the features are in the target + for (FeatureImpl tgtFeat : tgtType.getFeatureImpls()) { + FeatureImpl srcFeat = typeMapper.getSrcFeature(tgtType, tgtFeat); + assert(srcFeat != null); //for serialization, target is never a superset of features of src + serializeByKind(fs, srcFeat); + } + } else { // not type mapping + for (FeatureImpl srcFeat : srcType.getFeatureImpls()) { + serializeByKind(fs, srcFeat); + } } } + + // prevFsByType[tCode] = fs; + if (doMeasurements) { + sm.statDetails[typeCode_i].incr(DataIO.lengthVnumber(tCode)); + sm.mainHeapFSs ++; + } + } // end of FSs above the line walk + + // write out views, sofas, indexes + processIndexedFeatureStructures(cas, true /* pass 2 */); + + if (isSerializingDelta) { + smfs.serializeModifiedFSs(); } - -// prevFsByType[tCode] = fs; + + collectAndZip(); + if (doMeasurements) { - sm.statDetails[typeCode_i].incr(DataIO.lengthVnumber(tCode)); - sm.mainHeapFSs ++; - } - } // end of FSs above the line walk - - // write out views, sofas, indexes - processIndexedFeatureStructures(cas, true /* pass 2 */); - - if (isSerializingDelta) { - smfs.serializeModifiedFSs(); - } - - collectAndZip(); - - if (doMeasurements) { - sm.totalTime = System.currentTimeMillis() - sm.totalTime; + sm.totalTime = System.currentTimeMillis() - sm.totalTime; + } + return sm; } - return sm; } private void serializeArray(TOP fs) throws IOException { diff --git a/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASSerializer.java b/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASSerializer.java index 63bf035..5d3372e 100644 --- a/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASSerializer.java +++ b/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASSerializer.java @@ -151,29 +151,31 @@ public class CASSerializer implements Serializable { * @param addMetaData - true to include metadata */ public void addCAS(CASImpl cas, boolean addMetaData) { - BinaryCasSerDes bcsd = cas.getBinaryCasSerDes(); - // next call runs the setup, which scans all the reachables - final CommonSerDesSequential csds = BinaryCasSerDes4.getCsds(cas.getBaseCAS(), false); // saves the csds in the cas - bcsd.scanAllFSsForBinarySerialization(null, csds); // no mark - this.fsIndex = bcsd.getIndexedFSs(csds.fs2addr); // must follow scanAll... - - if (addMetaData) { - // some details about current main-heap specifications - // not required to deserialize - // not sent for C++ - // is 7 words long - // not serialized by custom serializers, only by Java object serialization - int heapsz = bcsd.heap.getCellsUsed(); - this.heapMetaData = new int[] { - Heap.getRoundedSize(heapsz), // a bit more than the size of the used heap - heapsz, // the position of the next (unused) slot in the heap - heapsz, - 0, - 0, - 1024, // initial size - 0}; + synchronized (cas.svd) { + BinaryCasSerDes bcsd = cas.getBinaryCasSerDes(); + // next call runs the setup, which scans all the reachables + final CommonSerDesSequential csds = BinaryCasSerDes4.getCsds(cas.getBaseCAS(), false); // saves the csds in the cas + bcsd.scanAllFSsForBinarySerialization(null, csds); // no mark + this.fsIndex = bcsd.getIndexedFSs(csds.fs2addr); // must follow scanAll... + + if (addMetaData) { + // some details about current main-heap specifications + // not required to deserialize + // not sent for C++ + // is 7 words long + // not serialized by custom serializers, only by Java object serialization + int heapsz = bcsd.heap.getCellsUsed(); + this.heapMetaData = new int[] { + Heap.getRoundedSize(heapsz), // a bit more than the size of the used heap + heapsz, // the position of the next (unused) slot in the heap + heapsz, + 0, + 0, + 1024, // initial size + 0}; + } + copyHeapsToArrays(bcsd); } - copyHeapsToArrays(bcsd); } private void outputStringHeap( @@ -272,139 +274,141 @@ public class CASSerializer implements Serializable { } public void addCAS(CASImpl cas, OutputStream ostream, boolean includeTsi) { - final BinaryCasSerDes bcsd = cas.getBinaryCasSerDes(); - - // next call runs the setup, which scans all the reachables - // these may have changed since this was previously computed due to updates in the CAS - final CommonSerDesSequential csds = BinaryCasSerDes4.getCsds(cas.getBaseCAS(), false); // saves the csds in the cas, used for possible future delta deser - bcsd.scanAllFSsForBinarySerialization(null, csds); // no mark - - try { - - DataOutputStream dos = new DataOutputStream(ostream); - - // get the indexed FSs for all views - this.fsIndex = bcsd.getIndexedFSs(csds.fs2addr); - - // output the key and version number - CommonSerDes.createHeader() - .seqVer(2) // 0 original, 1 UIMA-4743 2 for uima v3 - .typeSystemIndexDefIncluded(includeTsi) - .v3() - .write(dos); - - if (includeTsi) { - CasIOUtils.writeTypeSystem(cas, ostream, true); - } - - // output the FS heap - final int heapSize = bcsd.heap.getCellsUsed(); - dos.writeInt(heapSize); - // writing the 0th (null) element, because that's what V2 did - final int[] vs = bcsd.heap.heap; - for (int i = 0; i < heapSize; i++) { - dos.writeInt(vs[i]); - } - - // output the strings - StringHeapDeserializationHelper shdh = bcsd.stringHeap.serialize(); - - outputStringHeap(dos, cas, shdh, bcsd); -// // compute the number of total size of data in stringHeap -// // total size = char buffer length + length of strings in the string list; -// int stringHeapLength = shdh.charHeapPos; -// int stringListLength = 0; -// for (int i = 0; i < shdh.refHeap.length; i += 3) { -// int ref = shdh.refHeap[i + StringHeapDeserializationHelper.STRING_LIST_ADDR_OFFSET]; -// // this is a string in the string list -// // get length and add to total string heap length -// if (ref != 0) { -// // terminate each string with a null -// stringListLength += 1 + cas.getStringHeap().getStringForCode(ref).length(); -// } -// } -// -// int stringTotalLength = stringHeapLength + stringListLength; -// if (stringHeapLength == 0 && stringListLength > 0) { -// // nothing from stringHeap -// // add 1 for the null at the beginning -// stringTotalLength += 1; -// } -// dos.writeInt(stringTotalLength); -// -// // write the data in the stringheap, if there is any -// if (stringTotalLength > 0) { -// if (shdh.charHeapPos > 0) { -// dos.writeChars(String.valueOf(shdh.charHeap, 0, shdh.charHeapPos)); -// } else { -// // no stringheap data -// // if there is data in the string lists, write a leading 0 -// if (stringListLength > 0) { -// dos.writeChar(0); -// } -// } -// -// // word alignment -// if (stringTotalLength % 2 != 0) { -// dos.writeChar(0); -// } -// } -// -// // write out the string ref heap -// // each reference consist of a offset into stringheap and a length -// int refheapsz = ((shdh.refHeap.length - StringHeapDeserializationHelper.FIRST_CELL_REF) / StringHeapDeserializationHelper.REF_HEAP_CELL_SIZE) * 2; -// refheapsz++; -// dos.writeInt(refheapsz); -// dos.writeInt(0); -// for (int i = StringHeapDeserializationHelper.FIRST_CELL_REF; i < shdh.refHeap.length; i += 3) { -// dos.writeInt(shdh.refHeap[i + StringHeapDeserializationHelper.CHAR_HEAP_POINTER_OFFSET]); -// dos.writeInt(shdh.refHeap[i + StringHeapDeserializationHelper.CHAR_HEAP_STRLEN_OFFSET]); -// } - - // output the index FSs - dos.writeInt(this.fsIndex.length); - for (int i = 0; i < this.fsIndex.length; i++) { - dos.writeInt(this.fsIndex[i]); - } - - // 8bit heap - final int byteheapsz = bcsd.byteHeap.getSize(); - dos.writeInt(byteheapsz); - dos.write(bcsd.byteHeap.heap, 0, byteheapsz); - - // word alignment - int align = (4 - (byteheapsz % 4)) % 4; - for (int i = 0; i < align; i++) { - dos.writeByte(0); - } - - // 16bit heap - final int shortheapsz = bcsd.shortHeap.getSize(); - dos.writeInt(shortheapsz); - final short[] sh = bcsd.shortHeap.heap; - for (int i = 0; i < shortheapsz; i++) { - dos.writeShort(sh[i]); - } - - // word alignment - if (shortheapsz % 2 != 0) { - dos.writeShort(0); - } - - // 64bit heap - int longheapsz = bcsd.longHeap.getSize(); - dos.writeInt(longheapsz); - final long[] lh = bcsd.longHeap.heap; - for (int i = 0; i < longheapsz; i++) { - dos.writeLong(lh[i]); + synchronized (cas.svd) { + final BinaryCasSerDes bcsd = cas.getBinaryCasSerDes(); + + // next call runs the setup, which scans all the reachables + // these may have changed since this was previously computed due to updates in the CAS + final CommonSerDesSequential csds = BinaryCasSerDes4.getCsds(cas.getBaseCAS(), false); // saves the csds in the cas, used for possible future delta deser + bcsd.scanAllFSsForBinarySerialization(null, csds); // no mark + + try { + + DataOutputStream dos = new DataOutputStream(ostream); + + // get the indexed FSs for all views + this.fsIndex = bcsd.getIndexedFSs(csds.fs2addr); + + // output the key and version number + CommonSerDes.createHeader() + .seqVer(2) // 0 original, 1 UIMA-4743 2 for uima v3 + .typeSystemIndexDefIncluded(includeTsi) + .v3() + .write(dos); + + if (includeTsi) { + CasIOUtils.writeTypeSystem(cas, ostream, true); + } + + // output the FS heap + final int heapSize = bcsd.heap.getCellsUsed(); + dos.writeInt(heapSize); + // writing the 0th (null) element, because that's what V2 did + final int[] vs = bcsd.heap.heap; + for (int i = 0; i < heapSize; i++) { + dos.writeInt(vs[i]); + } + + // output the strings + StringHeapDeserializationHelper shdh = bcsd.stringHeap.serialize(); + + outputStringHeap(dos, cas, shdh, bcsd); + // // compute the number of total size of data in stringHeap + // // total size = char buffer length + length of strings in the string list; + // int stringHeapLength = shdh.charHeapPos; + // int stringListLength = 0; + // for (int i = 0; i < shdh.refHeap.length; i += 3) { + // int ref = shdh.refHeap[i + StringHeapDeserializationHelper.STRING_LIST_ADDR_OFFSET]; + // // this is a string in the string list + // // get length and add to total string heap length + // if (ref != 0) { + // // terminate each string with a null + // stringListLength += 1 + cas.getStringHeap().getStringForCode(ref).length(); + // } + // } + // + // int stringTotalLength = stringHeapLength + stringListLength; + // if (stringHeapLength == 0 && stringListLength > 0) { + // // nothing from stringHeap + // // add 1 for the null at the beginning + // stringTotalLength += 1; + // } + // dos.writeInt(stringTotalLength); + // + // // write the data in the stringheap, if there is any + // if (stringTotalLength > 0) { + // if (shdh.charHeapPos > 0) { + // dos.writeChars(String.valueOf(shdh.charHeap, 0, shdh.charHeapPos)); + // } else { + // // no stringheap data + // // if there is data in the string lists, write a leading 0 + // if (stringListLength > 0) { + // dos.writeChar(0); + // } + // } + // + // // word alignment + // if (stringTotalLength % 2 != 0) { + // dos.writeChar(0); + // } + // } + // + // // write out the string ref heap + // // each reference consist of a offset into stringheap and a length + // int refheapsz = ((shdh.refHeap.length - StringHeapDeserializationHelper.FIRST_CELL_REF) / StringHeapDeserializationHelper.REF_HEAP_CELL_SIZE) * 2; + // refheapsz++; + // dos.writeInt(refheapsz); + // dos.writeInt(0); + // for (int i = StringHeapDeserializationHelper.FIRST_CELL_REF; i < shdh.refHeap.length; i += 3) { + // dos.writeInt(shdh.refHeap[i + StringHeapDeserializationHelper.CHAR_HEAP_POINTER_OFFSET]); + // dos.writeInt(shdh.refHeap[i + StringHeapDeserializationHelper.CHAR_HEAP_STRLEN_OFFSET]); + // } + + // output the index FSs + dos.writeInt(this.fsIndex.length); + for (int i = 0; i < this.fsIndex.length; i++) { + dos.writeInt(this.fsIndex[i]); + } + + // 8bit heap + final int byteheapsz = bcsd.byteHeap.getSize(); + dos.writeInt(byteheapsz); + dos.write(bcsd.byteHeap.heap, 0, byteheapsz); + + // word alignment + int align = (4 - (byteheapsz % 4)) % 4; + for (int i = 0; i < align; i++) { + dos.writeByte(0); + } + + // 16bit heap + final int shortheapsz = bcsd.shortHeap.getSize(); + dos.writeInt(shortheapsz); + final short[] sh = bcsd.shortHeap.heap; + for (int i = 0; i < shortheapsz; i++) { + dos.writeShort(sh[i]); + } + + // word alignment + if (shortheapsz % 2 != 0) { + dos.writeShort(0); + } + + // 64bit heap + int longheapsz = bcsd.longHeap.getSize(); + dos.writeInt(longheapsz); + final long[] lh = bcsd.longHeap.heap; + for (int i = 0; i < longheapsz; i++) { + dos.writeLong(lh[i]); + } + } catch (IOException e) { + throw new CASRuntimeException(CASRuntimeException.BLOB_SERIALIZATION, e.getMessage()); } - } catch (IOException e) { - throw new CASRuntimeException(CASRuntimeException.BLOB_SERIALIZATION, e.getMessage()); + + bcsd.setHeapExtents(); + // non delta serialization + csds.setHeapEnd(bcsd.nextHeapAddrAfterMark); } - - bcsd.setHeapExtents(); - // non delta serialization - csds.setHeapEnd(bcsd.nextHeapAddrAfterMark); } @@ -453,169 +457,169 @@ public class CASSerializer implements Serializable { throw new CASRuntimeException(CASRuntimeException.INVALID_MARKER, "Invalid Marker."); } MarkerImpl mark = (MarkerImpl) trackingMark; - - final BinaryCasSerDes bcsd = cas.getBinaryCasSerDes(); - // next call runs the setup, which scans all the reachables - final CommonSerDesSequential csds = BinaryCasSerDes4.getCsds(cas.getBaseCAS(), true); // saves the csds in the cas - // because the output is only the new elements, this populates the arrays with only the new elements - // Note: all heaps reserve slot 0 for null, real data starts at position 1 - List<TOP> all = bcsd.scanAllFSsForBinarySerialization(mark, csds); - - // if (csds.getHeapEnd() == 0) { - // System.out.println("debug"); - //} - final Obj2IntIdentityHashMap<TOP> fs2auxOffset = new Obj2IntIdentityHashMap<>(TOP.class, TOP._singleton); - - int byteOffset = 1; - int shortOffset = 1; - int longOffset = 1; - - // scan all below mark and set up maps from aux array FSs to the offset to where the array starts in the modelled aux heap - for (TOP fs : all) { - if (trackingMark.isNew(fs)) { - break; - } - if (fs instanceof CommonArrayFS) { - CommonArrayFS ca = (CommonArrayFS) fs; - SlotKind kind = fs._getTypeImpl().getComponentSlotKind(); - switch (kind) { - case Slot_BooleanRef: - case Slot_ByteRef : - fs2auxOffset.put(fs, byteOffset); - byteOffset += ca.size(); - break; - case Slot_ShortRef: - fs2auxOffset.put(fs, shortOffset); - shortOffset += ca.size(); - break; - case Slot_LongRef: - case Slot_DoubleRef: - fs2auxOffset.put(fs, longOffset); - longOffset += ca.size(); - break; - default: - } // end of switch - } // end of if commonarray - else { // fs has feature slots - // model long and double refs which use up the long aux heap for 1 cell - TypeImpl ti = fs._getTypeImpl(); - longOffset += ti.getNbrOfLongOrDoubleFeatures(); - } - } // end of for - - try { - DataOutputStream dos = new DataOutputStream(ostream); - - // get the indexed FSs - this.fsIndex = bcsd.getDeltaIndexedFSs(mark, csds.fs2addr); - - CommonSerDes.createHeader() - .delta() - .seqVer(2) // 1 for UIMA-4743 2 for uima v3 - .v3() - .write(dos); - - // output the new FS heap cells - - final int heapSize = bcsd.heap.getCellsUsed() - 1; // first entry (null) is not written - - // Write heap - either the entire heap, or for delta, just the new part - - dos.writeInt(heapSize); - final int[] vs = bcsd.heap.heap; - for (int i = 1; i <= heapSize; i++) { // <= because heapsize is 1 less than cells used, but we start at index 1 - dos.writeInt(vs[i]); - } - - // convert v3 change-logging to v2 form, setting the chgXX-addr and chgXX-values lists. - // we do this before the strings or aux arrays are written out, because this - // could make additions to those. - - // addresses are in terms of modeled v2 arrays, as absolute addr in the aux arrays, and values - List<AddrPlusValue> chgMainAvs = new ArrayList<>(); - List<AddrPlusValue> chgByteAvs = new ArrayList<>(); - List<AddrPlusValue> chgShortAvs = new ArrayList<>(); - List<AddrPlusValue> chgLongAvs = new ArrayList<>(); - - scanModifications(bcsd, csds, cas.getModifiedFSList(), fs2auxOffset, - chgMainAvs, chgByteAvs, chgShortAvs, chgLongAvs); - - // output the new strings - StringHeapDeserializationHelper shdh = bcsd.stringHeap.serialize(1); - - outputStringHeap(dos, cas, shdh, bcsd); - - //output modified FS Heap cells - // this is output in a way that is the total number of slots changed == - // the sum over all fsChanges of - // for each fsChange, the number of slots (heap-sited-array or feature) modified - final int modHeapSize = chgMainAvs.size(); - dos.writeInt(modHeapSize); //num modified - for (AddrPlusValue av : chgMainAvs) { - dos.writeInt(av.addr); - dos.writeInt((int)av.value); - } - - // output the index FSs - dos.writeInt(this.fsIndex.length); - for (int i = 0; i < this.fsIndex.length; i++) { - dos.writeInt(this.fsIndex[i]); - } - - // 8bit heap new - int byteheapsz = bcsd.byteHeap.getSize() - 1; - dos.writeInt(byteheapsz); - dos.write(bcsd.byteHeap.heap, 1, byteheapsz); // byte 0 not used + synchronized (cas.svd) { + final BinaryCasSerDes bcsd = cas.getBinaryCasSerDes(); + // next call runs the setup, which scans all the reachables + final CommonSerDesSequential csds = BinaryCasSerDes4.getCsds(cas.getBaseCAS(), true); // saves the csds in the cas + // because the output is only the new elements, this populates the arrays with only the new elements + // Note: all heaps reserve slot 0 for null, real data starts at position 1 + List<TOP> all = bcsd.scanAllFSsForBinarySerialization(mark, csds); + + // if (csds.getHeapEnd() == 0) { + // System.out.println("debug"); + //} + final Obj2IntIdentityHashMap<TOP> fs2auxOffset = new Obj2IntIdentityHashMap<>(TOP.class, TOP._singleton); + + int byteOffset = 1; + int shortOffset = 1; + int longOffset = 1; - // word alignment - int align = (4 - (byteheapsz % 4)) % 4; - for (int i = 0; i < align; i++) { - dos.writeByte(0); - } - - // 16bit heap new - int shortheapsz = bcsd.shortHeap.getSize() - 1; - dos.writeInt(shortheapsz); - for (int i = 1; i <= shortheapsz; i++) { // <= in test because we're starting at 1 - dos.writeShort(bcsd.shortHeap.heap[i]); - } - - // word alignment - if (shortheapsz % 2 != 0) { - dos.writeShort(0); - } - - // 64bit heap new - int longheapsz = bcsd.longHeap.getSize() - 1; - dos.writeInt(longheapsz); - for (int i = 1; i <= longheapsz; i++) { - dos.writeLong(bcsd.longHeap.heap[i]); - } + // scan all below mark and set up maps from aux array FSs to the offset to where the array starts in the modelled aux heap + for (TOP fs : all) { + if (trackingMark.isNew(fs)) { + break; + } + if (fs instanceof CommonArrayFS) { + CommonArrayFS ca = (CommonArrayFS) fs; + SlotKind kind = fs._getTypeImpl().getComponentSlotKind(); + switch (kind) { + case Slot_BooleanRef: + case Slot_ByteRef : + fs2auxOffset.put(fs, byteOffset); + byteOffset += ca.size(); + break; + case Slot_ShortRef: + fs2auxOffset.put(fs, shortOffset); + shortOffset += ca.size(); + break; + case Slot_LongRef: + case Slot_DoubleRef: + fs2auxOffset.put(fs, longOffset); + longOffset += ca.size(); + break; + default: + } // end of switch + } // end of if commonarray + else { // fs has feature slots + // model long and double refs which use up the long aux heap for 1 cell + TypeImpl ti = fs._getTypeImpl(); + longOffset += ti.getNbrOfLongOrDoubleFeatures(); + } + } // end of for - // 8bit heap modified cells - writeMods(chgByteAvs, dos, av -> dos.writeByte((byte)av.value)); - - // word alignment - align = (4 - (chgByteAvs.size() % 4)) % 4; - for (int i = 0; i < align; i++) { - dos.writeByte(0); - } - - // 16bit heap modified cells - writeMods(chgShortAvs, dos, av -> dos.writeShort((short)av.value)); - - // word alignment - if (chgShortAvs.size() % 2 != 0) { - dos.writeShort(0); + try { + DataOutputStream dos = new DataOutputStream(ostream); + + // get the indexed FSs + this.fsIndex = bcsd.getDeltaIndexedFSs(mark, csds.fs2addr); + + CommonSerDes.createHeader() + .delta() + .seqVer(2) // 1 for UIMA-4743 2 for uima v3 + .v3() + .write(dos); + + // output the new FS heap cells + + final int heapSize = bcsd.heap.getCellsUsed() - 1; // first entry (null) is not written + + // Write heap - either the entire heap, or for delta, just the new part + + dos.writeInt(heapSize); + final int[] vs = bcsd.heap.heap; + for (int i = 1; i <= heapSize; i++) { // <= because heapsize is 1 less than cells used, but we start at index 1 + dos.writeInt(vs[i]); + } + + // convert v3 change-logging to v2 form, setting the chgXX-addr and chgXX-values lists. + // we do this before the strings or aux arrays are written out, because this + // could make additions to those. + + // addresses are in terms of modeled v2 arrays, as absolute addr in the aux arrays, and values + List<AddrPlusValue> chgMainAvs = new ArrayList<>(); + List<AddrPlusValue> chgByteAvs = new ArrayList<>(); + List<AddrPlusValue> chgShortAvs = new ArrayList<>(); + List<AddrPlusValue> chgLongAvs = new ArrayList<>(); + + scanModifications(bcsd, csds, cas.getModifiedFSList(), fs2auxOffset, + chgMainAvs, chgByteAvs, chgShortAvs, chgLongAvs); + + // output the new strings + StringHeapDeserializationHelper shdh = bcsd.stringHeap.serialize(1); + + outputStringHeap(dos, cas, shdh, bcsd); + + //output modified FS Heap cells + // this is output in a way that is the total number of slots changed == + // the sum over all fsChanges of + // for each fsChange, the number of slots (heap-sited-array or feature) modified + final int modHeapSize = chgMainAvs.size(); + dos.writeInt(modHeapSize); //num modified + for (AddrPlusValue av : chgMainAvs) { + dos.writeInt(av.addr); + dos.writeInt((int)av.value); + } + + // output the index FSs + dos.writeInt(this.fsIndex.length); + for (int i = 0; i < this.fsIndex.length; i++) { + dos.writeInt(this.fsIndex[i]); + } + + // 8bit heap new + int byteheapsz = bcsd.byteHeap.getSize() - 1; + dos.writeInt(byteheapsz); + dos.write(bcsd.byteHeap.heap, 1, byteheapsz); // byte 0 not used + + // word alignment + int align = (4 - (byteheapsz % 4)) % 4; + for (int i = 0; i < align; i++) { + dos.writeByte(0); + } + + // 16bit heap new + int shortheapsz = bcsd.shortHeap.getSize() - 1; + dos.writeInt(shortheapsz); + for (int i = 1; i <= shortheapsz; i++) { // <= in test because we're starting at 1 + dos.writeShort(bcsd.shortHeap.heap[i]); + } + + // word alignment + if (shortheapsz % 2 != 0) { + dos.writeShort(0); + } + + // 64bit heap new + int longheapsz = bcsd.longHeap.getSize() - 1; + dos.writeInt(longheapsz); + for (int i = 1; i <= longheapsz; i++) { + dos.writeLong(bcsd.longHeap.heap[i]); + } + + // 8bit heap modified cells + writeMods(chgByteAvs, dos, av -> dos.writeByte((byte)av.value)); + + // word alignment + align = (4 - (chgByteAvs.size() % 4)) % 4; + for (int i = 0; i < align; i++) { + dos.writeByte(0); + } + + // 16bit heap modified cells + writeMods(chgShortAvs, dos, av -> dos.writeShort((short)av.value)); + + // word alignment + if (chgShortAvs.size() % 2 != 0) { + dos.writeShort(0); + } + + // 64bit heap modified cells + writeMods(chgLongAvs, dos, av -> dos.writeLong(av.value)); + + } catch (IOException e) { + throw new CASRuntimeException(CASRuntimeException.BLOB_SERIALIZATION, e.getMessage()); } - - // 64bit heap modified cells - writeMods(chgLongAvs, dos, av -> dos.writeLong(av.value)); - - } catch (IOException e) { - throw new CASRuntimeException(CASRuntimeException.BLOB_SERIALIZATION, e.getMessage()); } - } private void writeMods(