Author: schor Date: Tue Jan 22 22:19:34 2013 New Revision: 1437217 URL: http://svn.apache.org/viewvc?rev=1437217&view=rev Log: [UIMA-2498] prior to adding mapping, fix up the design of the APIs for compression. Add missing Mapper class.
Added: uima/uimaj/branches/filteredCompress-uima-2498/uimaj-core/src/main/java/org/apache/uima/cas/impl/CasTypeSystemMapper.java (with props) Modified: uima/uimaj/branches/filteredCompress-uima-2498/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes4.java uima/uimaj/branches/filteredCompress-uima-2498/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASImpl.java uima/uimaj/branches/filteredCompress-uima-2498/uimaj-core/src/main/java/org/apache/uima/cas/impl/TypeSystemImpl.java Modified: uima/uimaj/branches/filteredCompress-uima-2498/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes4.java URL: http://svn.apache.org/viewvc/uima/uimaj/branches/filteredCompress-uima-2498/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes4.java?rev=1437217&r1=1437216&r2=1437217&view=diff ============================================================================== --- uima/uimaj/branches/filteredCompress-uima-2498/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes4.java (original) +++ uima/uimaj/branches/filteredCompress-uima-2498/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes4.java Tue Jan 22 22:19:34 2013 @@ -132,12 +132,14 @@ import org.apache.uima.util.impl.Seriali * in identity-map cache (size limit = 10?) - key is target typesystemimpl. * Defaulting: * flags: doMeasurement, compressLevel, CompressStrategy + * Defaulting set in call to create instance of this class * Per serialize call: cas, output, [target ts], [mark for delta] * Per deserialize call: cas, input, [target ts] * - * This class has static methods with all args - * CASImpl has instance method with defaulting args - * + * CASImpl has instance method with defaulting args for serialization. + * CASImpl has reinit which works with compressed binary serialization objects + * if no type mapping + * If type mapping, (new BinaryCasSerDes4(sourceTypeSystem)).deserialize(in-steam, targetTypeSystem) */ public class BinaryCasSerDes4 { @@ -315,28 +317,35 @@ public class BinaryCasSerDes4 { */ final private TypeSystemImpl ts; final private boolean doMeasurements; - + final private CompressLevel compressLevel; + final private CompressStrat compressStrategy; /** * - * @param ts - * @param doMeasurements - normally set this to false. + * @param ts Type System (the source type system) + * @param doMeasurements true if measurements should be collected + * @param compressLevel + * @param compressStrategy */ - public BinaryCasSerDes4(TypeSystemImpl ts, boolean doMeasurements) { + public BinaryCasSerDes4(TypeSystemImpl ts, boolean doMeasurements, + CompressLevel compressLevel, CompressStrat compressStrategy) { this.ts = ts; this.doMeasurements = doMeasurements; - + this.compressLevel = compressLevel; + this.compressStrategy = compressStrategy; + } + + public BinaryCasSerDes4(TypeSystemImpl ts) { + this(ts, false, CompressLevel.Default, CompressStrat.Default); } /** - * * @param cas * @param out * @param trackingMark * @return null or serialization measurements (depending on setting of doMeasurements) * @throws IOException */ - public SerializationMeasures serialize(AbstractCas cas, Object out, Marker trackingMark, - CompressLevel compressLevel, CompressStrat compressStrategy) throws IOException { + public SerializationMeasures serialize(AbstractCas cas, Object out, Marker trackingMark) throws IOException { SerializationMeasures sm = (doMeasurements) ? new SerializationMeasures() : null; CASImpl casImpl = (CASImpl) ((cas instanceof JCas) ? ((JCas)cas).getCas(): cas); if (null != trackingMark && !trackingMark.isValid() ) { @@ -345,26 +354,44 @@ public class BinaryCasSerDes4 { } Serializer serializer = new Serializer( - casImpl, makeDataOutputStream(out), (MarkerImpl) trackingMark, sm, - compressLevel, compressStrategy); + casImpl, makeDataOutputStream(out), (MarkerImpl) trackingMark, sm); serializer.serialize(); return sm; } - - public SerializationMeasures serialize(AbstractCas cas, Object out, Marker trackingMark, - CompressLevel compressLevel) throws IOException { - return serialize(cas, out,trackingMark, compressLevel, CompressStrat.Default); - } - - public SerializationMeasures serialize(AbstractCas cas, Object out, Marker trackingMark) throws IOException { - return serialize(cas, out,trackingMark, CompressLevel.Default, CompressStrat.Default); - } public SerializationMeasures serialize(AbstractCas cas, Object out) throws IOException { return serialize(cas, out, null); } + public void deserialize(CASImpl cas, InputStream istream) throws IOException { + final DataInputStream dis = (istream instanceof DataInputStream) ? + (DataInputStream) istream : new DataInputStream(istream); + + // key + // determine if byte swap if needed based on key + byte[] bytebuf = new byte[4]; + bytebuf[0] = dis.readByte(); // U + bytebuf[1] = dis.readByte(); // I + bytebuf[2] = dis.readByte(); // M + bytebuf[3] = dis.readByte(); // A + + // version + // version bit in 2's place indicates this is in delta format. + final int version = dis.readInt(); + final boolean delta = ((version & 2) == 2); + + cas = cas.getBaseCAS(); + if (!delta) { + cas.resetNoQuestions(); + } + + if (0 == (version & 4)) { + throw new RuntimeException("non-compressed invalid object passed to BinaryCasSerDes4 deserialize"); + } + deserialize(cas, istream, delta); + } + public void deserialize(CASImpl cas, InputStream deserIn, boolean isDelta) throws IOException { DataInput in; if (deserIn instanceof DataInputStream) { @@ -406,8 +433,6 @@ public class BinaryCasSerDes4 { final private Integer[] serializedTypeCode2Code = new Integer[ts.getTypeArraySize()]; // needs to be Integer to get comparator choice final private int[] estimatedZipSize = new int[NBR_SLOT_KIND_ZIP_STREAMS]; // one entry for each output stream kind final private OptimizeStrings os; - final private CompressLevel compressLevel; - final private CompressStrat compressStrategy; // typeInfo is local to this serialization instance to permit multiple threads private TypeInfo typeInfo; // type info for the current type being serialized @@ -440,15 +465,11 @@ public class BinaryCasSerDes4 { final private DataOutputStream strSeg_dos; private Serializer(CASImpl cas, DataOutputStream serializedOut, MarkerImpl mark, - SerializationMeasures sm, - CompressLevel compressLevel, - CompressStrat compressStrategy) { + SerializationMeasures sm) { this.cas = cas; this.serializedOut = serializedOut; this.mark = mark; this.sm = sm; - this.compressLevel = compressLevel; - this.compressStrategy = compressStrategy; isDelta = (mark != null); doMeasurement = (sm != null); Modified: uima/uimaj/branches/filteredCompress-uima-2498/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASImpl.java URL: http://svn.apache.org/viewvc/uima/uimaj/branches/filteredCompress-uima-2498/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASImpl.java?rev=1437217&r1=1437216&r2=1437217&view=diff ============================================================================== --- uima/uimaj/branches/filteredCompress-uima-2498/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASImpl.java (original) +++ uima/uimaj/branches/filteredCompress-uima-2498/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASImpl.java Tue Jan 22 22:19:34 2013 @@ -244,10 +244,6 @@ public class CASImpl extends AbstractCas */ private List<Marker> trackingMarkList; - // must be in svd part because has a field that is updated - // while serializing - private BinaryCasSerDes4 binaryCompressor; - private SharedViewData(boolean useFSCache) { this.useFSCache = useFSCache; } @@ -1193,10 +1189,7 @@ public class CASImpl extends AbstractCas } if (0 != (version & 4)) { - if (svd.binaryCompressor == null) { - svd.binaryCompressor = new BinaryCasSerDes4(this.getTypeSystemImpl(), false); - } - svd.binaryCompressor.deserialize(this, dis, delta); + (new BinaryCasSerDes4(this.getTypeSystemImpl())).deserialize(this, dis, delta); return; } @@ -4294,10 +4287,7 @@ public class CASImpl extends AbstractCas * @throws IOException */ public void serializeWithCompression(Object out) throws IOException { - if (svd.binaryCompressor == null) { - svd.binaryCompressor = new BinaryCasSerDes4(this.getTypeSystemImpl(), false); - } - svd.binaryCompressor.serialize(this, out); + (new BinaryCasSerDes4(this.getTypeSystemImpl())).serialize(this, out); } } Added: uima/uimaj/branches/filteredCompress-uima-2498/uimaj-core/src/main/java/org/apache/uima/cas/impl/CasTypeSystemMapper.java URL: http://svn.apache.org/viewvc/uima/uimaj/branches/filteredCompress-uima-2498/uimaj-core/src/main/java/org/apache/uima/cas/impl/CasTypeSystemMapper.java?rev=1437217&view=auto ============================================================================== --- uima/uimaj/branches/filteredCompress-uima-2498/uimaj-core/src/main/java/org/apache/uima/cas/impl/CasTypeSystemMapper.java (added) +++ uima/uimaj/branches/filteredCompress-uima-2498/uimaj-core/src/main/java/org/apache/uima/cas/impl/CasTypeSystemMapper.java Tue Jan 22 22:19:34 2013 @@ -0,0 +1,247 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.uima.cas.impl; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + +import org.apache.uima.cas.Type; + +/** + * This class gets initialized with two type systems, and then provides + * resources to map type and feature codes between them. + * + * It is used by some Binary serialization/ deserialization + * code to allow non-exact matched type systems to send and + * receive CASes in a binary-like format. + * + * Use cases: + * + * Serializing: Source ts -> generate serialized form in Target ts + * Deserializing: Target ts -> generate deserialized form in Source ts + * - either from remote or + * - from disk-stored-form + * + * LifeCycle: + * Instance of this are created for a CAS when needed, and then + * kept in the (source) TypeSystemImpl, in a map indexed by + * the target type system (identity map) + */ + +public class CasTypeSystemMapper { + private final static int[] INT0 = new int[0]; + + public final TypeSystemImpl tsSrc; // source type system + public final TypeSystemImpl tsTgt; // target type system + + /** + * Map from source type codes to target type codes. + * Source type code used as index, + * value is target type code + */ + final private int[] tSrc2Tgt; + + /** + * First index is src type code, 2nd index is src feature offset, 0 is 1st feature. + * Value is -1 if tgt doesn't have feature, else it is the feature offset in target. + * Only for type codes that are not arrays. + */ + final private int[][] fSrc2Tgt; + + /** + * First index is src type code, 2nd index is tgt feature offset, 0 is 1st feature. + * Value is -1 if src doesn't have feature, else it is the feature offset in source. + * Only used for type codes that are not arrays. + * Use: When serializing a source type that exists in the target, have to output + * the slots in the target feature order + * Also, when comparing the slots in the target with a given source + */ + final private int[][] tgtFoffsets2Src; + + /** + * Same as tSrc2Tgt, but reversed + * used when deserializing a target back into a source + */ + final private int[] tTgt2Src; + + public CasTypeSystemMapper(TypeSystemImpl tsSrc, TypeSystemImpl tsTgt) { + if (!tsSrc.isCommitted() || !tsTgt.isCommitted()) { + throw new RuntimeException("Type Systems must be committed before calling this method"); + } + this.tsSrc = tsSrc; + this.tsTgt = tsTgt; + + this.tSrc2Tgt = addTypes(tsSrc, tsTgt); + this.tTgt2Src = addTypes(tsTgt, tsSrc); + this.fSrc2Tgt = new int[tsSrc.getTypeArraySize()] []; + this.tgtFoffsets2Src = new int[tsSrc.getTypeArraySize()] []; + addFeatures(tsSrc, tsTgt); +// this.fTgt2Src = addFeatures(tsTgt, tsSrc); + } + + // returns 0 if type doesn't have corresponding code in other type system + public int mapTypeCodeSrc2Tgt(int c) { + return tSrc2Tgt[c]; + } + + // returns 0 if type doesn't have corresponding code in other type system + public int mapTypeCodeTgt2Src(int c) { + return tTgt2Src[c]; + } + + public int[] getTgtFeatOffsets2Src(int tCode) { + return tgtFoffsets2Src[tCode]; + } + + // returns -1 if feature doesn't have corresponding code in other type system + public int mapFeatureOffsetSrc2Tgt(int tCode, int offset) { + return fSrc2Tgt[tCode][offset]; + } + + // returns 0 if feature doesn't have corresponding code in other type system +// public int mapFeatureCodeTgt2Src(int c) { +// return fTgt2Src[c]; +// } + + private int[] addTypes(TypeSystemImpl tsSrc, TypeSystemImpl tsTgt) { + Map<TypeImpl, TypeImpl> mSrc2Tgt = new LinkedHashMap<TypeImpl, TypeImpl>(); + for (Iterator<Type> it = tsSrc.getTypeIterator(); it.hasNext();) { + TypeImpl tSrc = (TypeImpl) it.next(); + TypeImpl tTgt = (TypeImpl) tsTgt.getType(tSrc.getName()); + if (tTgt != null) { + mSrc2Tgt.put(tSrc, tTgt); + } + } + int[] r = new int[tsSrc.getNumberOfTypes() + 1]; // type codes are numbered starting with 1 + for (Entry<TypeImpl, TypeImpl> e : mSrc2Tgt.entrySet()) { + r[e.getKey().getCode()] = e.getValue().getCode(); + } + return r; + } + + private void addFeatures(TypeSystemImpl tsSrc, TypeSystemImpl tsTgt) { + for (int tCodeSrc = 0; tCodeSrc < tsSrc.getTypeArraySize(); tCodeSrc++) { + final int tCodeTgt = mapTypeCodeSrc2Tgt(tCodeSrc); + if (tCodeTgt == 0) { // this type not in target + fSrc2Tgt[tCodeSrc] = INT0; + tgtFoffsets2Src[tCodeSrc] = null; // should never be referenced + continue; + } + + // type is part of target ts + final int[] fcSrc = tsSrc.ll_getAppropriateFeatures(tCodeSrc); + final int[] fcTgt = tsTgt.ll_getAppropriateFeatures(tCodeTgt); + + if (fcSrc.length == 0) { + // source has no features + fSrc2Tgt[tCodeSrc] = INT0; + tgtFoffsets2Src[tCodeSrc] = new int[fcTgt.length]; + Arrays.fill(tgtFoffsets2Src[tCodeSrc], -1); + continue; // source type has no features + } + + final int[] src2tgtOffsets = new int[fcSrc.length]; + fSrc2Tgt[tCodeSrc] = src2tgtOffsets; + + if (fcTgt.length == 0) { + Arrays.fill(src2tgtOffsets, -1); + tgtFoffsets2Src[tCodeSrc] = INT0; + continue; // target type has no features + } + final int[] tgt2srcOffsets = new int[fcTgt.length]; + tgtFoffsets2Src[tCodeSrc] = tgt2srcOffsets; + +// // debug +// if (tCodeTgt == 228) { +// String ss[] = new String[fcTgt.length]; +// for (int i = 0; i < fcTgt.length; i++) { +// ss[i] = tsTgt.ll_getFeatureForCode(fcTgt[i]).getName(); +// } +// System.out.print(""); +// } +// // debug - verify features are in alpha order +// String ss[] = new String[fcSrc.length]; +// String prev = " "; +// boolean fault = false; +// for (int i = 0; i < fcSrc.length; i++) { +// String s = tsSrc.ll_getFeatureForCode(fcSrc[i]).getName(); +// ss[i] = s; +// if (prev.compareTo(s) >= 0) { +// fault = true; +// System.out.format("Source feature names not sorted, prev = %s, this = %s%n", prev, s); +// } +// prev = s; +// } +// if (fault) { +// System.out.print(""); +// } +// prev = " "; +// if (tCodeTgt == 228) { +// +// for (int i = 0; i < fcTgt.length; i++) { +// String s = tsTgt.ll_getFeatureForCode(fcTgt[i]).getName(); +// if (prev.compareTo(s) >= 0) { +// fault = true; +// System.out.format("Target feature names not sorted, prev = %s, this = %s%n", prev, s); +// } +// prev = s; +// } +// } + + // get List of names of appropriate features in the target for this type + List<String> namesTgt = new ArrayList<String>(fcTgt.length); + for (int i = 0; i < fcTgt.length; i++) { + namesTgt.add(tsTgt.ll_getFeatureForCode(fcTgt[i]).getName()); + } + + // get List of names of appropriate features in the source for this type + List<String> namesSrc = new ArrayList<String>(fcSrc.length); + for (int i = 0; i < fcSrc.length; i++) { + namesSrc.add(tsSrc.ll_getFeatureForCode(fcSrc[i]).getName()); + } + + + // for each feature in the source, find the corresponding target feature by name match (if any) + for (int fciSrc = 0; fciSrc < fcSrc.length; fciSrc++) { + final String nameSrc = namesSrc.get(fciSrc); + // feature names are semi sorted, not completely sorted due to inheritence + final int iTgt = namesTgt.indexOf(nameSrc); + src2tgtOffsets[fciSrc] = iTgt; // -1 if not there + } // end of for loop over all source features of a type code + + // for each feature in the target, find the corresponding source feature by name match (if any) + for (int fciTgt = 0; fciTgt < fcTgt.length; fciTgt++) { + final String nameTgt = namesTgt.get(fciTgt); + // feature names are semi sorted, not completely sorted due to inheritence + final int iSrc = namesSrc.indexOf(nameTgt); + tgt2srcOffsets[fciTgt] = iSrc; // -1 if not there + } // end of for loop over all target features of a type code + + + } // end of for loop over all typecodes + } + + +} Propchange: uima/uimaj/branches/filteredCompress-uima-2498/uimaj-core/src/main/java/org/apache/uima/cas/impl/CasTypeSystemMapper.java ------------------------------------------------------------------------------ svn:eol-style = native Modified: uima/uimaj/branches/filteredCompress-uima-2498/uimaj-core/src/main/java/org/apache/uima/cas/impl/TypeSystemImpl.java URL: http://svn.apache.org/viewvc/uima/uimaj/branches/filteredCompress-uima-2498/uimaj-core/src/main/java/org/apache/uima/cas/impl/TypeSystemImpl.java?rev=1437217&r1=1437216&r2=1437217&view=diff ============================================================================== --- uima/uimaj/branches/filteredCompress-uima-2498/uimaj-core/src/main/java/org/apache/uima/cas/impl/TypeSystemImpl.java (original) +++ uima/uimaj/branches/filteredCompress-uima-2498/uimaj-core/src/main/java/org/apache/uima/cas/impl/TypeSystemImpl.java Tue Jan 22 22:19:34 2013 @@ -1558,6 +1558,9 @@ public class TypeSystemImpl implements T Map<TypeSystemImpl, CasTypeSystemMapper> typeSystemMappers = new HashMap<TypeSystemImpl, CasTypeSystemMapper>(); synchronized CasTypeSystemMapper getTypeSystemMapper(TypeSystemImpl tgtTs) { + if ((null == tgtTs) || (this == tgtTs)) { + return null; // conventions for no type mapping + } CasTypeSystemMapper m = typeSystemMappers.get(tgtTs); if (null == m) { m = new CasTypeSystemMapper(this, tgtTs);