On Fri, Apr 22, 2011 at 12:03 PM, Alex vB <m...@avomberg.de> wrote: > During indexing I use StandardAnalyzer (StandardFilter, LowerCaseFilter, > StopFilter). > Can I get somewhere more information for Codec creation or is there just > "grubbing" through the code?
try the following patch to switch PFOR1 and PFOR2 over to Sep, so that they create separate .doc and .frq files. then you can compare the compression of the freqs against your implementation (again the .skp/.tib/.tiv will be larger due to using Sep codec and due to having pos pointers, but try to ignore that) Index: lucene/src/java/org/apache/lucene/index/codecs/pfordelta/PatchedFrameOfRefCodec.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/pfordelta/PatchedFrameOfRefCodec.java (revision 1095422) +++ lucene/src/java/org/apache/lucene/index/codecs/pfordelta/PatchedFrameOfRefCodec.java (working copy) @@ -30,6 +30,8 @@ import org.apache.lucene.index.codecs.FieldsProducer; import org.apache.lucene.index.codecs.fixed.FixedPostingsReaderImpl; import org.apache.lucene.index.codecs.fixed.FixedPostingsWriterImpl; +import org.apache.lucene.index.codecs.sep.SepPostingsReaderImpl; +import org.apache.lucene.index.codecs.sep.SepPostingsWriterImpl; import org.apache.lucene.index.codecs.standard.StandardCodec; import org.apache.lucene.index.codecs.BlockTermsWriter; import org.apache.lucene.index.codecs.BlockTermsReader; @@ -48,7 +50,7 @@ @Override public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException { - PostingsWriterBase postingsWriter = new FixedPostingsWriterImpl(state, new PForDeltaFactory(128)); + PostingsWriterBase postingsWriter = new SepPostingsWriterImpl(state, new PForDeltaFactory(128)); boolean success = false; TermsIndexWriterBase indexWriter; @@ -79,7 +81,7 @@ @Override public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException { - PostingsReaderBase postingsReader = new FixedPostingsReaderImpl(state.dir, + PostingsReaderBase postingsReader = new SepPostingsReaderImpl(state.dir, state.segmentInfo, state.readBufferSize, new PForDeltaFactory(128), @@ -125,14 +127,14 @@ @Override public void files(Directory dir, SegmentInfo segmentInfo, String id, Set<String> files) { - FixedPostingsReaderImpl.files(segmentInfo, id, files); + SepPostingsReaderImpl.files(segmentInfo, id, files); BlockTermsReader.files(dir, segmentInfo, id, files); VariableGapTermsIndexReader.files(dir, segmentInfo, id, files); } @Override public void getExtensions(Set<String> extensions) { - FixedPostingsWriterImpl.getExtensions(extensions); + SepPostingsWriterImpl.getExtensions(extensions); BlockTermsReader.getExtensions(extensions); VariableGapTermsIndexReader.getIndexExtensions(extensions); } Index: lucene/src/java/org/apache/lucene/index/codecs/pfordelta2/PForDeltaFixedIntBlockCodec.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/pfordelta2/PForDeltaFixedIntBlockCodec.java (revision 1095422) +++ lucene/src/java/org/apache/lucene/index/codecs/pfordelta2/PForDeltaFixedIntBlockCodec.java (working copy) @@ -41,6 +41,8 @@ import org.apache.lucene.index.codecs.VariableGapTermsIndexReader; import org.apache.lucene.index.codecs.VariableGapTermsIndexWriter; import org.apache.lucene.index.codecs.sep.IntStreamFactory; +import org.apache.lucene.index.codecs.sep.SepPostingsReaderImpl; +import org.apache.lucene.index.codecs.sep.SepPostingsWriterImpl; import org.apache.lucene.index.codecs.standard.StandardCodec; import org.apache.lucene.store.*; import org.apache.lucene.util.BytesRef; @@ -168,7 +170,7 @@ @Override public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException { - PostingsWriterBase postingsWriter = new FixedPostingsWriterImpl(state, new PForDeltaIntFactory()); + PostingsWriterBase postingsWriter = new SepPostingsWriterImpl(state, new PForDeltaIntFactory()); boolean success = false; TermsIndexWriterBase indexWriter; @@ -199,7 +201,7 @@ @Override public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException { - PostingsReaderBase postingsReader = new FixedPostingsReaderImpl(state.dir, + PostingsReaderBase postingsReader = new SepPostingsReaderImpl(state.dir, state.segmentInfo, state.readBufferSize, new PForDeltaIntFactory(), state.codecId); @@ -244,14 +246,14 @@ @Override public void files(Directory dir, SegmentInfo segmentInfo, String codecId, Set<String> files) { - FixedPostingsReaderImpl.files(segmentInfo, codecId, files); + SepPostingsReaderImpl.files(segmentInfo, codecId, files); BlockTermsReader.files(dir, segmentInfo, codecId, files); VariableGapTermsIndexReader.files(dir, segmentInfo, codecId, files); } @Override public void getExtensions(Set<String> extensions) { - FixedPostingsWriterImpl.getExtensions(extensions); + SepPostingsWriterImpl.getExtensions(extensions); BlockTermsReader.getExtensions(extensions); VariableGapTermsIndexReader.getIndexExtensions(extensions); } --------------------------------------------------------------------- To unsubscribe, e-mail: java-user-unsubscr...@lucene.apache.org For additional commands, e-mail: java-user-h...@lucene.apache.org