Revision: 17249
          http://sourceforge.net/p/gate/code/17249
Author:   valyt
Date:     2014-01-27 19:13:19 +0000 (Mon, 27 Jan 2014)
Log Message:
-----------
Refactored the writing functionality into its own class.

Modified Paths:
--------------
    
mimir/branches/5.0/mimir-core/src/gate/mimir/index/mg4j/zipcollection/DocumentCollection.java

Modified: 
mimir/branches/5.0/mimir-core/src/gate/mimir/index/mg4j/zipcollection/DocumentCollection.java
===================================================================
--- 
mimir/branches/5.0/mimir-core/src/gate/mimir/index/mg4j/zipcollection/DocumentCollection.java
       2014-01-27 17:17:30 UTC (rev 17248)
+++ 
mimir/branches/5.0/mimir-core/src/gate/mimir/index/mg4j/zipcollection/DocumentCollection.java
       2014-01-27 19:13:19 UTC (rev 17249)
@@ -205,13 +205,100 @@
     }
   }
 
+  /**
+   * Class that handles the creation of collection files.
+   */
+  protected static class CollectionFileWriter {
+
+    /**
+     * The zip file managed by this collection.
+     */
+    protected ZipOutputStream zipOuputStream;
+    
+    /**
+     * The zip file to which we are currently writing.
+     */
+    protected File zipFile;
+    
+    /**
+     * The number of entries written so far to the current zip file.
+     */
+    protected int currentEntries;
+    
+    /**
+     * The amount of bytes written so far to the current zip file.
+     */
+    protected long currentLength;
+    
+    /**
+     * A {@link ByteArrayOutputStream} used to temporarily store serialised 
+     * document data objects.
+     */
+    protected ByteArrayOutputStream byteArrayOS;
+    
+    public CollectionFileWriter(File file) throws IndexException {
+      this.zipFile = file;
+      if(zipFile.exists()) throw new IndexException("Collection zip file (" + 
+          file.getAbsolutePath() + ") already exists!");
+      byteArrayOS = new ByteArrayOutputStream();
+      
+      try {
+        zipOuputStream = new ZipOutputStream(new BufferedOutputStream(
+                new  FileOutputStream(zipFile)));
+      } catch(FileNotFoundException e) {
+        throw new IndexException("Cannot write to collection zip file (" + 
+                zipFile.getAbsolutePath() + ")", e);
+      }
+      currentEntries = 0;
+      currentLength = 0;
+    }
+    
+    /**
+     * 
+     * @param entryName
+     * @param document
+     * @return true if the document was written successfully, false if this 
+     * collection file is full and cannot take the extra content.
+     * 
+     * @throws IOException
+     */
+    public boolean writeDocumentData(String entryName, DocumentData document) 
throws IOException {
+      //write the new document to the byte array
+      ObjectOutputStream objectOutStream = new ObjectOutputStream(byteArrayOS);
+      objectOutStream.writeObject(document);
+      objectOutStream.close();
+
+      // check if this will take us over size
+      if(currentLength + byteArrayOS.size() > ZIP_FILE_MAX_SIZE ||
+         currentEntries >= ZIP_FILE_MAX_ENTRIES) return false;
+      
+      // create a new entry in the current zip file
+      ZipEntry entry = new ZipEntry(entryName);
+      zipOuputStream.putNextEntry(entry);
+      //write the data
+      byteArrayOS.writeTo(zipOuputStream);
+      zipOuputStream.closeEntry();
+      currentLength += entry.getCompressedSize();
+      
+      //clean up the byte array for next time
+      byteArrayOS.reset();
+      currentEntries++;
+      return true;
+    }
+
+    public void close() throws IOException {
+      if(zipOuputStream != null) zipOuputStream.close();
+    }
+  }
   
-  
   /**
    * The zip files containing the document collection.
    */
   protected List<CollectionFile> collectionFiles = null;
   
+  protected CollectionFileWriter collectionFileWriter;
+  
+  
   private static Logger logger = Logger.getLogger(DocumentCollection.class);
   
   /**
@@ -252,43 +339,15 @@
    */
   public static final int ZIP_FILE_MAX_ENTRIES = 65530;
   
-  /**
-   * The zip file managed by this collection.
-   */
-  protected ZipOutputStream zipOuputStream;
+
   
   /**
-   * The zip file to which we are currently writing.
+   * The ID for the next document to be written in this collection. This value 
+   * is initialised to 0 and then is automatically incremented whenever a new 
+   * document is written.
    */
-  protected File zipFile;
-  
-  /**
-   * The number of entries written so far to the current zip file.
-   */
-  protected int currentEntries;
-  
-  /**
-   * The amount of bytes written so far to the current zip file.
-   */
-  protected long currentLength;
-  
-  /**
-   * A {@link ByteArrayOutputStream} used to temporarily store serialised 
-   * document data objects.
-   */
-  protected ByteArrayOutputStream byteArrayOS;
-  
-  /**
-   * The ID for the next document to be written. This value is initialised to 0
-   * and then is automatically incremented whenever a new document is written.
-   */
   protected long nextDocumentId;
   
-
-  /**
-   * The unique ID of the current zip file (the file open for writing).
-   */
-  protected int zipFileId;
   
   /**
    * Opens a zip file and creates a DocumentCollection object for accessing 
the 
@@ -322,10 +381,8 @@
     documentCache = new Long2ObjectLinkedOpenHashMap<DocumentData>();
     
     // prepare for writing
-    byteArrayOS = new ByteArrayOutputStream();
     nextDocumentId = collectionFiles.isEmpty() ? 0 : 
         (collectionFiles.get(collectionFiles.size() - 1).lastEntry + 1);
-    zipFileId = collectionFiles.size();
     inputBuffer = new Long2ObjectLinkedOpenHashMap<DocumentData>();
   }
   
@@ -378,41 +435,31 @@
    * collection file(s).
    */
   public void writeDocument(DocumentData document) throws IndexException{
-    if(zipFile == null) openZipFile();
+    if(collectionFileWriter == null) openCollectionWriter();
+    
     try{
-      //write the new document to the byte array
-      ObjectOutputStream objectOutStream = new ObjectOutputStream(byteArrayOS);
-      objectOutStream.writeObject(document);
-      objectOutStream.close();
-
-      //see if we're about to go over the limits
-      if(currentEntries >= ZIP_FILE_MAX_ENTRIES || 
-         currentLength + byteArrayOS.size()  >= ZIP_FILE_MAX_SIZE ||
-         inputBuffer.size() >= INPUT_BUFFER_SIZE) {
-        //move to the next zip file
-        closeZipFile();
-        // open the newly-closed zip file in read mode
-        collectionFiles.add(new CollectionFile(zipFile));
-        zipFileId++;
-        openZipFile();
+      boolean success = false;
+      while(!success) {
+        success = collectionFileWriter.writeDocumentData(
+            Long.toString(nextDocumentId), document);
+        if(!success) {
+          // the current collection file is full: close it
+          collectionFileWriter.close();
+          synchronized(collectionFiles) {
+            // open the newly saved zip file
+            collectionFiles.add(new 
CollectionFile(collectionFileWriter.zipFile));
+            inputBuffer.clear();
+          }
+          // open a new one and try again
+          openCollectionWriter();
+        }   
       }
-
-      // create a new entry in the current zip file
-      ZipEntry entry = new ZipEntry(Long.toString(nextDocumentId++));
-      zipOuputStream.putNextEntry(entry);
-      //write the data
-      byteArrayOS.writeTo(zipOuputStream);
-      zipOuputStream.closeEntry();
-      currentLength += entry.getCompressedSize();
-      
-      //clean up the byte array for next time
-      byteArrayOS.reset();
-      currentEntries++;
     } catch(IOException e){
       throw new IndexException("Problem while accessing the collection file", 
e);
     } finally {
       // save the document data to the input buffer
       inputBuffer.put(nextDocumentId, document);
+      nextDocumentId++;
     }
   }
   
@@ -422,33 +469,19 @@
    * @throws IndexException if the collection zip file already exists, or 
cannot
    * be opened for writing.
    */
-  protected void openZipFile() throws IndexException{
-    zipFile = new File(indexDirectory,
-        CollectionFile.getCollectionFileName(Integer.toString(zipFileId)));
-    if(zipFile.exists()) throw new IndexException("Collection zip file (" + 
-            zipFile.getAbsolutePath() + ") already exists!");
-    
-    try {
-      zipOuputStream = new ZipOutputStream(new BufferedOutputStream(
-              new  FileOutputStream(zipFile)));
-    } catch(FileNotFoundException e) {
-      throw new IndexException("Cannot write to collection zip file (" + 
-              zipFile.getAbsolutePath() + ")", e);
+  protected void openCollectionWriter() throws IndexException{
+    int zipFileNumber = 0;
+    synchronized(collectionFiles) {
+      zipFileNumber = collectionFiles.isEmpty() ? 0 :
+        collectionFiles.get(collectionFiles.size() - 1).collectionFileNumber + 
1; 
     }
-    currentEntries = 0;
-    currentLength = 0;
-    inputBuffer.clear();
+    collectionFileWriter = new CollectionFileWriter(
+        new File(indexDirectory,
+            CollectionFile.getCollectionFileName(
+                Integer.toString(zipFileNumber))));
   }
   
   /**
-   * Closes the current zip file.
-   * @throws IOException 
-   */
-  protected void closeZipFile() throws IOException{
-    if(zipOuputStream != null) zipOuputStream.close();
-  }
-  
-  /**
    * Close this document collection and release all allocated resources (such 
    * as open file handles). 
    * @throws IOException 
@@ -456,7 +489,7 @@
    */
   public void close() throws IOException {
     // close the writer
-    closeZipFile();
+    collectionFileWriter.close();
     // close the reader
     closed = true;
     if(collectionFiles != null){

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
CenturyLink Cloud: The Leader in Enterprise Cloud Services.
Learn Why More Businesses Are Choosing CenturyLink Cloud For
Critical Workloads, Development Environments & Everything In Between.
Get a Quote or Start a Free Trial Today. 
http://pubads.g.doubleclick.net/gampad/clk?id=119420431&iu=/4140/ostg.clktrk
_______________________________________________
GATE-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gate-cvs

Reply via email to