mimir

valyt Fri, 24 Jan 2014 08:46:12 -0800

Revision: 17246
          http://sourceforge.net/p/gate/code/17246
Author:   valyt
Date:     2014-01-24 16:43:33 +0000 (Fri, 24 Jan 2014)
Log Message:
-----------
We now have a re-openable [zip-] Document Collection.


The compacting functionality is not yet fully implemented.  

Modified Paths:
--------------
    mimir/branches/5.0/mimir-core/src/gate/mimir/MimirIndex.java
    mimir/branches/5.0/mimir-core/src/gate/mimir/index/AtomicIndex.java
    mimir/branches/5.0/mimir-core/src/gate/mimir/index/Indexer.java
    mimir/branches/5.0/mimir-core/src/gate/mimir/index/mg4j/MG4JIndexer.java
    
mimir/branches/5.0/mimir-core/src/gate/mimir/index/mg4j/TokenIndexBuilder.java
    
mimir/branches/5.0/mimir-core/src/gate/mimir/index/mg4j/zipcollection/DocumentCollection.java

Modified: mimir/branches/5.0/mimir-core/src/gate/mimir/MimirIndex.java
===================================================================
--- mimir/branches/5.0/mimir-core/src/gate/mimir/MimirIndex.java        
2014-01-23 16:52:34 UTC (rev 17245)
+++ mimir/branches/5.0/mimir-core/src/gate/mimir/MimirIndex.java        
2014-01-24 16:43:33 UTC (rev 17246)
@@ -469,12 +469,11 @@
       new WriteDeletedDocsTask().run();
     }
 
+    // wait for indexing to end
+    documentsCollectorThread.join();
+    
     // close the document collection
     documentCollection.close();
-
-
-    // wait for indexing to end
-    documentsCollectorThread.join();
     // write the config file
     try {
       IndexConfig.writeConfigToFile(indexConfig, new File(indexDirectory,

Modified: mimir/branches/5.0/mimir-core/src/gate/mimir/index/AtomicIndex.java
===================================================================
--- mimir/branches/5.0/mimir-core/src/gate/mimir/index/AtomicIndex.java 
2014-01-23 16:52:34 UTC (rev 17245)
+++ mimir/branches/5.0/mimir-core/src/gate/mimir/index/AtomicIndex.java 
2014-01-24 16:43:33 UTC (rev 17246)
@@ -580,6 +580,13 @@
   protected Index indexCluster;
   
   /**
+   * The direct index for this atomic index. If 
+   * <code>{@link #hasDirectIndex()}</code> is false, then this index will be 
+   * <code>null</code>.
+   */
+  protected Index directIndex;
+  
+  /**
    * A set of properties added to the ones obtained from the index writer when
    * writing out batches.
    */
@@ -1487,7 +1494,26 @@
     return indexCluster;
   }
   
+  
   /**
+   * Gets the direct index for this atomic index. The returned value is 
+   * <code>non-null</code> only if the atomic index was configured to have a 
+   * direct index upon its construction (see 
+   * {@link #AtomicIndex(MimirIndex, String, File, boolean, TermProcessor, 
BlockingQueue, BlockingQueue)}.).
+   * You can check if a direct index has been configured by calling 
+   * {@link #hasDirectIndex()}.
+   * @return an Index in which terms and documents are reversed. When querying 
+   * the returned index, the &quot;terms&quot; provided should be String 
+   * representations of document IDs (as produced by {@link 
#longToTerm(long)}).
+   * The search results is a set of &quot;document IDs&quot;, which are 
actually
+   * term IDs. The actual term string corresponding to the returned term IDs 
can
+   * be obtained by calling {@link #getTerm(long)}.   
+   */
+  public Index getDirectIndex() {
+    return directIndex;
+  }
+  
+  /**
    * Creates and returns an {@link IndexReader} for this index.
    * @return
    * @throws IOException
@@ -1497,7 +1523,8 @@
   }
  
   /**
-   * Gets the term string for a given term ID.
+   * Gets the term string for a given term ID. The term ID must have been 
+   * obtained from this index's direct index.
    * @param termId the ID for the term being sought.
    * @return the string for the given term.
    */

Modified: mimir/branches/5.0/mimir-core/src/gate/mimir/index/Indexer.java
===================================================================
--- mimir/branches/5.0/mimir-core/src/gate/mimir/index/Indexer.java     
2014-01-23 16:52:34 UTC (rev 17245)
+++ mimir/branches/5.0/mimir-core/src/gate/mimir/index/Indexer.java     
2014-01-24 16:43:33 UTC (rev 17246)
@@ -88,8 +88,9 @@
    * 
    * @param config
    *          the indexer configuration.
+   * @throws IOException 
    */
-  public Indexer(IndexConfig config) throws IndexException {
+  public Indexer(IndexConfig config) throws IndexException, IOException {
     this.config = config;
     indexDir = config.getIndexDirectory();
     if(indexDir.exists()) { throw new IndexException(
@@ -119,7 +120,7 @@
     annHelpersClosingProgress = 0;
   }
 
-  protected void initMG4J() throws IndexException {
+  protected void initMG4J() throws IndexException, IOException {
     // make sure the index directory exists
     mg4jIndexDir =
       new File(config.getIndexDirectory(), Indexer.MG4J_INDEX_DIRNAME);

Modified: 
mimir/branches/5.0/mimir-core/src/gate/mimir/index/mg4j/MG4JIndexer.java
===================================================================
--- mimir/branches/5.0/mimir-core/src/gate/mimir/index/mg4j/MG4JIndexer.java    
2014-01-23 16:52:34 UTC (rev 17245)
+++ mimir/branches/5.0/mimir-core/src/gate/mimir/index/mg4j/MG4JIndexer.java    
2014-01-24 16:43:33 UTC (rev 17246)
@@ -139,8 +139,9 @@
    * Initialises the MG4JConnector, based on the index config provided to the 
    * constructor, and all other options set after construction.
    * @throws IndexException 
+   * @throws IOException 
    */
-  public void init() throws IndexException{
+  public void init() throws IndexException, IOException{
     gateDocFactory = new GATEDocumentFactory(indexConfig);
     inputQueue =  new LinkedBlockingQueue<GATEDocument>(documentQueueSize);
     //start the sub-indexers for the token features

Modified: 
mimir/branches/5.0/mimir-core/src/gate/mimir/index/mg4j/TokenIndexBuilder.java
===================================================================
--- 
mimir/branches/5.0/mimir-core/src/gate/mimir/index/mg4j/TokenIndexBuilder.java  
    2014-01-23 16:52:34 UTC (rev 17245)
+++ 
mimir/branches/5.0/mimir-core/src/gate/mimir/index/mg4j/TokenIndexBuilder.java  
    2014-01-24 16:43:33 UTC (rev 17246)
@@ -101,7 +101,7 @@
           BlockingQueue<GATEDocument> outputQueue, Indexer indexer,
           GATEDocumentFactory factory, boolean zipCollection,
           String baseName,
-          TokenIndexerConfig config) throws IndexException {
+          TokenIndexerConfig config) throws IndexException, IOException {
     super(inputQueue, outputQueue, indexer, baseName, 
         config.isDirectIndexEnabled());
     this.termProcessor = config.getTermProcessor();

Modified: 
mimir/branches/5.0/mimir-core/src/gate/mimir/index/mg4j/zipcollection/DocumentCollection.java
===================================================================
--- 
mimir/branches/5.0/mimir-core/src/gate/mimir/index/mg4j/zipcollection/DocumentCollection.java
       2014-01-23 16:52:34 UTC (rev 17245)
+++ 
mimir/branches/5.0/mimir-core/src/gate/mimir/index/mg4j/zipcollection/DocumentCollection.java
       2014-01-24 16:43:33 UTC (rev 17246)
@@ -41,6 +41,14 @@
   protected static final int DOCUMENT_DATA_CACHE_SIZE = 100;
   
   /**
+   * The number of documents kept in memory until a new zip file is written. 
As 
+   * new documents are submitted, they get written to the currently open zip 
+   * file but they cannot be read from the file. To account for this, we keep 
+   * them in memory, in the {@link #inputBuffer} structure.
+   */
+  protected static final int INPUT_BUFFER_SIZE = 100;
+  
+  /**
    * A simple {@link FilenameFilter} that only accepts the zip files that are
    * part of a collection.
    * 
@@ -54,29 +62,6 @@
   }
   
   /**
-   * Given the name of a zip file, this method returns its ID (the numeric 
part 
-   * of the name), or -1 if the name is not that of a valid collection file.
-   * @param fileName the file name to be parsed.
-   * @return the ID of the file, or -1.
-   */
-  protected int getZipFileId(String fileName){
-    if(fileName.startsWith(Indexer.MIMIR_COLLECTION_BASENAME + "-") &&
-            fileName.endsWith(Indexer.MIMIR_COLLECTION_EXTENSION)){
-      String numberPart = fileName.substring(
-             Indexer.MIMIR_COLLECTION_BASENAME.length() + 1,
-             fileName.length() - Indexer.MIMIR_COLLECTION_EXTENSION.length());
-      
-      try {
-        return Integer.parseInt(numberPart);
-      } catch(NumberFormatException e) {
-        //non-parseable
-        return -1;
-      }
-    }
-    return -1; 
-  }
-  
-  /**
    * The zip files containing the document collection.
    */
   protected List<ZipFile> zipFiles = null;
@@ -86,7 +71,7 @@
   /**
    * The top level directory for the index.
    */
-  protected File indexDir;
+  protected File indexDirectory;
   
   /**
    * The maximum entry number in each zip file. This array is aligned with 
@@ -116,8 +101,6 @@
    */
   private volatile boolean closed = false; 
   
-
-  
   /**
    * The maximum number of bytes to write to a single zip file.
    */
@@ -160,20 +143,176 @@
   
   /**
    * The ID for the next document to be written. This value is initialised to 0
-   * and then is automatically incremented whenever anew document is written.
+   * and then is automatically incremented whenever a new document is written.
    */
-  protected int documentId;
+  protected long documentId;
   
 
   /**
-   * The unique ID of the current zip file.
+   * The unique ID of the current zip file (the file open for writing).
    */
   protected int zipFileId;
   
-
-
+  /**
+   * Given the name of a zip file, this method returns its ID (the numeric 
part 
+   * of the name), or -1 if the name is not that of a valid collection file.
+   * @param fileName the file name to be parsed.
+   * @return the ID of the file, or -1.
+   */
+  protected static int getZipFileId(String fileName){
+    if(fileName.startsWith(Indexer.MIMIR_COLLECTION_BASENAME + "-") &&
+            fileName.endsWith(Indexer.MIMIR_COLLECTION_EXTENSION)){
+      String numberPart = fileName.substring(
+             Indexer.MIMIR_COLLECTION_BASENAME.length() + 1,
+             fileName.length() - Indexer.MIMIR_COLLECTION_EXTENSION.length());
+      
+      try {
+        return Integer.parseInt(numberPart);
+      } catch(NumberFormatException e) {
+        //non-parseable
+        return -1;
+      }
+    }
+    return -1; 
+  }
+  /**
+   * Opens a zip file and creates a DocumentCollection object for accessing 
the 
+   * document data.
+   * @param indexDirectory
+   * @throws IndexException if the document collection files cannot be 
accessed. 
+   * @throws IOException 
+   */
+  public DocumentCollection(File indexDirectory) throws IOException {
+    this.indexDirectory = indexDirectory;
+    
+    zipFiles = new ArrayList<ZipFile>();
+    maxEntries = new IntArrayList();
+    // prepare for reading
+    for(File aCollectionFile : enumerateCollectionFiles()) {
+      openCollectionFile(aCollectionFile);
+    }
+    documentCache = new Long2ObjectLinkedOpenHashMap<DocumentData>();
+    
+    // prepare for writing
+    byteArrayOS = new ByteArrayOutputStream();
+    documentId = maxEntries.isEmpty() ? 0 : 
+        (maxEntries.getInt(maxEntries.size() -1) + 1);
+    zipFileId = zipFiles.size();
+    inputBuffer = new Long2ObjectLinkedOpenHashMap<DocumentData>();
+  }
   
   /**
+   * Gets the collection file in order.
+   * @return
+   */
+  protected File[] enumerateCollectionFiles() {
+    File[] collectionFiles = indexDirectory.listFiles(
+        new CollectionFilenameFilter());
+    //sort the files by ID
+    Arrays.sort(collectionFiles, new Comparator<File>(){
+      public int compare(File o1, File o2) {
+        return getZipFileId(o1.getName()) - getZipFileId(o2.getName());
+      }
+    });
+    return collectionFiles;
+  }
+  
+  /**
+   * Adds a new zip file to the collection.
+   * @throws IndexException 
+   */
+  protected synchronized void openCollectionFile(File collectionFile) throws 
IOException {
+    try {
+      //for each file, open a ZipFile, parse the entries, set the maxEntry 
value.
+      ZipFile aZipFile = new ZipFile(collectionFile);
+      int fileId = getZipFileId(collectionFile.getName());
+      zipFiles.add(aZipFile);
+      Enumeration<? extends ZipEntry> entries = aZipFile.entries();
+      int maxEntryInFile = -1;
+      while(entries.hasMoreElements()){
+        ZipEntry anEntry = entries.nextElement();
+        String entryName = anEntry.getName();
+        try {
+          int entryId = Integer.parseInt(entryName);
+          //sanity check
+          if(fileId > 0 && entryId <= maxEntries.get(fileId-1)){
+            throw new IOException(
+                    "Invalid entries distribution: collection file " + 
+                    collectionFile.getAbsolutePath() + 
+                    " contains an entry named \"" + entryName + 
+                    "\", but an entry with a larger-or-equal ID was " +
+                    "already seen in a previous collection file!");
+          }
+          //update the current maximum
+          if(entryId > maxEntryInFile) maxEntryInFile = entryId;
+        } catch(NumberFormatException e) {
+          //not parseable -> we'll ignore this entry.
+          logger.warn("Unparseable zip entry name: " + entryName);
+        }
+      }
+      maxEntries.add(maxEntryInFile);
+    } catch(ZipException e) {
+      throw new IOException("Problem while reading collection file " + 
+              collectionFile.getAbsolutePath(), e);
+    }
+  }
+  
+  /**
+   * Gets the document data for a given document ID.
+   * @param documentID the ID of the document to be retrieved.
+   * @return a {@link DocumentData} object for the requested document ID.
+   * @throws IOException if there are problems accessing the underlying zip 
file; 
+   * @throws NoSuchElementException if the requested document ID is not found.
+   */
+  public DocumentData getDocumentData(long documentID) throws IndexException{
+    if(closed) throw new IllegalStateException(
+            "This document collection has already been closed!");
+    DocumentData documentData = null;
+    if(documentID > maxEntries.get(maxEntries.size() - 1)) {
+      // it's a new document that's not yet available from the zip files
+      documentData = inputBuffer.get(documentID);
+      // (or a wrong ID)
+      if(documentData == null) throw new NoSuchElementException(
+          "No entry found for document ID " + documentID);
+    } else {
+      // it's an old document. Try the cache first
+      documentData = documentCache.getAndMoveToFirst(documentID);
+      if(documentData == null) {
+        // cache miss: we need to actually load it
+        //locate the right zip file
+        int zipFileId = 0;
+        while(zipFileId < maxEntries.size() && documentID > 
maxEntries.get(zipFileId)){
+          zipFileId++;
+        }
+        if(zipFileId >= maxEntries.size()){
+          //entry not found (entry number too large)
+          throw new NoSuchElementException("No entry found for document ID " + 
+                  documentID + ". Document ID too large for this collection!");
+        }
+        
+        ZipEntry entry = 
zipFiles.get(zipFileId).getEntry(Long.toString(documentID));
+        if(entry == null) throw new NoSuchElementException(
+            "No entry found for document ID " + documentID);
+        try {
+          ObjectInputStream ois = new 
ObjectInputStream(zipFiles.get(zipFileId).getInputStream(entry));
+          documentData = (DocumentData) ois.readObject();
+          ois.close();
+          documentCache.putAndMoveToFirst(documentID, documentData);
+          if(documentCache.size() > DOCUMENT_DATA_CACHE_SIZE) {
+            documentCache.removeLast();
+          }
+        } catch(ClassNotFoundException e) {
+          //invalid data read from the zip file
+          throw new IndexException("Invalid data read from zip file!", e);
+        } catch(IOException e) {
+          throw new IndexException("Exception reading zip file!", e);
+        }
+      }
+    }
+    return documentData;  
+  }
+  
+  /**
    * Writes a new document to the underlying zip file. The documents added 
    * through this method will get automatically generated names starting from 
    * &quot;0&quot;, and continuing with &quot;1&quot;, &quot;2&quot;, etc.   
@@ -191,25 +330,32 @@
 
       //see if we're about to go over the limits
       if(currentEntries >= ZIP_FILE_MAX_ENTRIES || 
-         currentLength + byteArrayOS.size()  >= ZIP_FILE_MAX_SIZE){
+         currentLength + byteArrayOS.size()  >= ZIP_FILE_MAX_SIZE ||
+         inputBuffer.size() >= INPUT_BUFFER_SIZE) {
         //move to the next zip file
         closeZipFile();
-        zipFileId ++;
+        // open the newly-closed zip file in read mode
+        openCollectionFile(zipFile);
+        zipFileId++;
         openZipFile();
       }
 
-      //create a new entry in the current zip file
-      ZipEntry entry = new ZipEntry(Integer.toString(documentId++));
+      // create a new entry in the current zip file
+      ZipEntry entry = new ZipEntry(Long.toString(documentId++));
       zipOuputStream.putNextEntry(entry);
       //write the data
       byteArrayOS.writeTo(zipOuputStream);
       zipOuputStream.closeEntry();
       currentLength += entry.getCompressedSize();
+      
       //clean up the byte array for next time
       byteArrayOS.reset();
       currentEntries++;
-    }catch(IOException e){
+    } catch(IOException e){
       throw new IndexException("Problem while accessing the collection file", 
e);
+    } finally {
+      // save the document data to the input buffer
+      inputBuffer.put(documentId, document);
     }
   }
   
@@ -220,7 +366,7 @@
    * be opened for writing.
    */
   protected void openZipFile() throws IndexException{
-    zipFile = new File(indexDir, 
+    zipFile = new File(indexDirectory, 
             Indexer.MIMIR_COLLECTION_BASENAME + 
             "-" + zipFileId +
             Indexer.MIMIR_COLLECTION_EXTENSION);
@@ -236,6 +382,7 @@
     }
     currentEntries = 0;
     currentLength = 0;
+    inputBuffer.clear();
   }
   
   /**
@@ -246,144 +393,11 @@
     if(zipOuputStream != null) zipOuputStream.close();
   }
   
-
-  
   /**
-   * Opens a zip file and creates a DocumentCollection object for accessing 
the 
-   * document data.
-   * @param indexDirectory
-   * @throws IndexException if the document collection files cannot be 
accessed. 
-   */
-  public DocumentCollection(File indexDirectory) throws IndexException {
-    this.indexDir = indexDirectory;
-    // prepare the document cache
-    documentCache = new Long2ObjectLinkedOpenHashMap<DocumentData>();
-    
-    byteArrayOS = new ByteArrayOutputStream();
-    documentId = 0;
-    zipFileId = 0;
-  }
-  
-  /**
-   * Opens all the collection files, parses their catalogues, and populates the
-   * {@link #zipFiles} and {@link #maxEntries} arrays. 
-   * @param indexDirectory
-   * @throws IndexException
-   */
-  protected void openCollectionFiles() throws IndexException{
-    File mg4JIndexDir = new File(indexDir, Indexer.MG4J_INDEX_DIRNAME);
-    if(!mg4JIndexDir.isDirectory()) throw new IndexException(
-            "Cannot locate an MG4J index directory at " + mg4JIndexDir + "!");
-    File[] collectionFiles = mg4JIndexDir.listFiles(
-            new CollectionFilenameFilter());
-    if(collectionFiles.length == 0){
-      logger.warn("No collection files found! The index at " + indexDir + 
-              " is probably emtpy or corrupted!");
-    }
-    //sort the files by ID
-    Arrays.sort(collectionFiles, new Comparator<File>(){
-      public int compare(File o1, File o2) {
-        return getZipFileId(o1.getName()) - getZipFileId(o2.getName());
-      }
-    });
-    zipFiles = new ArrayList<ZipFile>(collectionFiles.length);
-    
-    int[] maxEntriesArr = new int[collectionFiles.length];
-    for(int  i = 0; i  < collectionFiles.length; i++){
-      try {
-        //for each file, open a ZipFile, parse the entries, set the maxEntry 
value.
-        ZipFile aZipFile = new ZipFile(collectionFiles[i]); 
-        zipFiles.add(aZipFile);
-        Enumeration<? extends ZipEntry> entries = aZipFile.entries();
-        maxEntriesArr[i] = -1;
-        while(entries.hasMoreElements()){
-          ZipEntry anEntry = entries.nextElement();
-          String entryName = anEntry.getName();
-          try {
-            int entryId = Integer.parseInt(entryName);
-            //sanity check
-            if(i > 0 && entryId <= maxEntriesArr[i-1]){
-              throw new IndexException(
-                      "Invalid entries distribution: collection file " + 
-                      collectionFiles[i].getAbsolutePath() + 
-                      " contains an entry named \"" + entryName + 
-                      "\", but an entry with a larger-or-equal ID was " +
-                      "already seen in a previous collection file!");
-            }
-            //update the current maximum
-            if(entryId > maxEntriesArr[i]) maxEntriesArr[i] = entryId;
-          } catch(NumberFormatException e) {
-            //not parseable -> we'll ignore this entry.
-            logger.warn("Unparseable zip entry name: " + entryName);
-          }
-        }
-      } catch(ZipException e) {
-        throw new IndexException("Problem while reading collection file " + 
-                collectionFiles[i].getAbsolutePath(), e);
-      } catch(IOException e) {
-        throw new IndexException("Problem while accessing collection file " + 
-                collectionFiles[i].getAbsolutePath(), e);
-      }
-    }
-    maxEntries = new IntArrayList(maxEntriesArr);
-    logger.info("Opened zip collection: maxEntries = " + 
Arrays.toString(maxEntriesArr));
-  }
-  
-  /**
-   * Gets the document data for a given document ID.
-   * @param documentID the ID of the document to be retrieved.
-   * @return a {@link DocumentData} object for the requested document ID.
-   * @throws IOException if there are problems accessing the underlying zip 
file; 
-   * @throws NoSuchElementException if the requested document ID is not found.
-   */
-  public DocumentData getDocumentData(long documentID) throws IndexException{
-    if(closed) throw new IllegalStateException(
-            "This document collection has already been closed!");
-    
-    DocumentData documentData = documentCache.getAndMoveToFirst(documentID);
-    if(documentData == null) {
-      // cache miss
-      if(zipFiles == null){
-        //open the zip files, parse their catalogues and update the values in 
-        //maxEntries
-        openCollectionFiles();
-      }
-      //locate the right zip file
-      int zipFileId = 0;
-      while(zipFileId < maxEntries.size() && documentID > 
maxEntries.get(zipFileId)){
-        zipFileId++;
-      }
-      if(zipFileId >= maxEntries.size()){
-        //entry not found (entry number too large)
-        throw new NoSuchElementException("No entry found for document ID " + 
-                documentID + ". Document ID too large for this collection!");
-      }
-      
-      ZipEntry entry = 
zipFiles.get(zipFileId).getEntry(Long.toString(documentID));
-      if(entry == null) throw new NoSuchElementException(
-          "No entry found for document ID " + documentID);
-      try {
-        ObjectInputStream ois = new 
ObjectInputStream(zipFiles.get(zipFileId).getInputStream(entry));
-        documentData = (DocumentData) ois.readObject();
-        ois.close();
-        documentCache.putAndMoveToFirst(documentID, documentData);
-        if(documentCache.size() > DOCUMENT_DATA_CACHE_SIZE) {
-          documentCache.removeLast();
-        }
-      } catch(ClassNotFoundException e) {
-        //invalid data read from the zip file
-        throw new IndexException("Invalid data read from zip file!", e);
-      } catch(IOException e) {
-        throw new IndexException("Exception reading zip file!", e);
-      }
-    }
-    return documentData;  
-  }
-  
-  /**
    * Close this document collection and release all allocated resources (such 
    * as open file handles). 
    * @throws IOException 
+   * @throws IndexException 
    */
   public void close() throws IOException {
     // close the writer
@@ -403,4 +417,64 @@
     }
     documentCache.clear();
   }
-}
+  
+  
+  protected void compact() throws ZipException, IOException {
+    
+    ZipOutputStream outputStream = null;
+    long outFileSize = 0;
+    int outFileEntries = 0;
+    for(File inputFile : enumerateCollectionFiles()) {
+      ZipFile inputZipFile = new ZipFile(inputFile);
+      if(outputStream == null) {
+        // we're not currently writing because all files so far have been OK
+        if(inputZipFile.size() < ZIP_FILE_MAX_ENTRIES &&
+           inputFile.length() < ZIP_FILE_MAX_SIZE) {
+          // the current file is too small: we need to add more entries to it
+          
+          //this becomes the first out file
+          // TODO
+          //MKDIR out dir
+          // mv file to out
+          // open file for writing
+          outFileEntries = inputZipFile.size();
+          inputZipFile.close();
+          outFileSize = inputFile.length();
+          outputStream = new ZipOutputStream(new BufferedOutputStream(
+              new  FileOutputStream(inputFile)));
+        }        
+      } else {
+        // we're currently writing to an output file: we need to copy all the
+        // entries in the new input file
+        Enumeration<? extends ZipEntry> inputEntries = inputZipFile.entries();
+        while(inputEntries.hasMoreElements()) {
+          if(outFileEntries > ZIP_FILE_MAX_ENTRIES ||
+              outFileSize > ZIP_FILE_MAX_SIZE) {
+            // we need to move on to the next zip output file
+            // TODO
+          }
+          
+          
+          ZipEntry inputEntry = inputEntries.nextElement();
+          ZipEntry outputEntry = new ZipEntry(inputEntry);
+          outputStream.putNextEntry(outputEntry);
+          //write the data
+          byte[] buf = new byte[1024 * 1024];
+          InputStream is = inputZipFile.getInputStream(inputEntry);
+          int read = is.read(buf);
+          while(read >= 0) {
+            outputStream.write(buf, 0, read);
+            read = is.read(buf);
+          }
+          outputStream.closeEntry();
+          outFileSize += outputEntry.getCompressedSize();
+          outFileEntries++;
+          
+
+        }
+      }
+
+
+    }
+  }
+}
\ No newline at end of file

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
CenturyLink Cloud: The Leader in Enterprise Cloud Services.
Learn Why More Businesses Are Choosing CenturyLink Cloud For
Critical Workloads, Development Environments & Everything In Between.
Get a Quote or Start a Free Trial Today. 
http://pubads.g.doubleclick.net/gampad/clk?id=119420431&iu=/4140/ostg.clktrk
_______________________________________________
GATE-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gate-cvs

[gate-cvs] SF.net SVN: gate:[17246] mimir/branches/5.0/mimir-core/src/gate/mimir

Reply via email to