[Xindice Command Tool] patch Importing Tree

JC Tchitchiama 23 Jan 2004 11:45:38 -0000

Hi All,

The current Xindice tool only import files from the file system. I see the 
need to extend that functionality to include  import of files from Jar files 
as well.
I am just submitting this patch so that we can start the ball rolling. 
It is by no means perfect and has some bugs in it which I know about.


The current behaviour is unchanged.

See what the community things.

Best Regards.

JC.

Index: AddDocument.java
===================================================================
RCS file: /home/cvspublic/xml-xindice/java/src/org/apache/xindice/tools/command/AddDocument.java,v
retrieving revision 1.12
diff -u -r1.12 AddDocument.java
--- AddDocument.java	15 Jan 2004 14:23:20 -0000	1.12
+++ AddDocument.java	23 Jan 2004 11:36:14 -0000
@@ -108,8 +108,14 @@
                 return false;
             }
 
-            File file = new File((String) table.get(XMLTools.FILE_PATH));
-            InputStream fis = new FileInputStream(file);
+            InputStream fis = null;
+            if (table.get(XMLTools.FILE_PATH) instanceof String ) {
+                File file = new File((String) table.get(XMLTools.FILE_PATH));
+                fis = new FileInputStream(file);
+            }
+            if (table.get(XMLTools.FILE_PATH) instanceof InputStream  ) {
+                fis = (InputStream)table.get(XMLTools.FILE_PATH);
+            }
 
             // Parse in XML using Xerces
             SAXParserFactory spf = javax.xml.parsers.SAXParserFactory.newInstance();

Index: ImportTree.java
===================================================================
RCS file: /home/cvspublic/xml-xindice/java/src/org/apache/xindice/tools/command/ImportTree.java,v
retrieving revision 1.11
diff -u -r1.11 ImportTree.java
--- ImportTree.java	9 Aug 2003 21:19:53 -0000	1.11
+++ ImportTree.java	23 Jan 2004 11:37:06 -0000
@@ -63,10 +63,30 @@
 
 import org.xmldb.api.DatabaseManager;
 import org.xmldb.api.base.Collection;
+import org.xmldb.api.base.XMLDBException;
 
 import java.io.File;
 import java.io.FileFilter;
+import java.io.InputStream;
+import java.io.FilenameFilter;
+import java.io.InputStreamReader;
+import java.io.IOException;
+import java.io.FileNotFoundException;
+import java.io.BufferedReader;
 import java.util.Hashtable;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.StringTokenizer;
+import java.util.Iterator;
+import java.util.Enumeration;
+import java.util.zip.ZipFile;
+import java.util.zip.ZipEntry;
+import java.net.URL;
+import java.net.URLClassLoader;
+
+
 
 /**
  * ImportTree.java is designed to take a directory/file path and create a Collection
@@ -118,8 +138,14 @@
 
             File startdir = new File((String) table.get(XMLTools.FILE_PATH));
 
-            // Make call to process, this is called recursively!
-            process(startdir, colstring, table);
+            //now process the directory of the jars
+            if (startdir.exists() == true) {
+                // Make call to process, this is called recursively!
+                process(startdir, colstring, table);
+            } else {
+                //Try to process jars in the classpath
+                new DocumentResolver().processJar(colstring, (String)table.get(XMLTools.FILE_PATH), table);
+            }
 
         } finally {
             // Be sure to close collection objects
@@ -181,6 +207,19 @@
         addDocument.execute(table);
     }
 
+    protected void importFile(String collectionName, String name, InputStream is)
+            throws Exception {
+       Hashtable table = new Hashtable();
+       // Use functionality from AddDocument to add this document
+       table.put(XMLTools.COLLECTION, collectionName);
+       table.put(XMLTools.NAME_OF, name);
+       table.put(XMLTools.FILE_PATH, is);
+
+       addDocument.execute(table);
+   }
+
+
+
     protected String createCollection(String baseCollection, String newCollection)
             throws Exception {
         if (!newCollection.equals(".")) {
@@ -194,6 +233,241 @@
         }
 
         return baseCollection + "/" + newCollection;
+    }
+
+    /**
+     * Allow us to add collection and document from zip or jar source
+        */
+    protected class DocumentResolver {
+
+        /**
+        * classpath
+        */
+        final List locations = new ArrayList();
+
+        public DocumentResolver() {
+            initializeClasspath();
+        }
+
+        /**
+        * Static vector of ClasspathElements corresponding to CLASSPATH entries.
+        */
+        private void initializeClasspath() {
+            try {
+                //*** use library list to find urls from the normal classloader
+                URL[] appClassLoaderUrls =
+                    ((URLClassLoader) Thread
+                        .currentThread()
+                        .getContextClassLoader()
+                        .getSystemClassLoader())
+                        .getURLs();
+
+                Set duplicates = new HashSet(); // don't add duplicates.
+
+                for (int j = 0; j < appClassLoaderUrls.length; j++) {
+                    URL url = (URL) appClassLoaderUrls[j];
+
+                    String path = url.getPath();
+                    File zipElement = new File(path);
+
+                    if (duplicates.contains(path))
+                        continue; // skip duplicate.
+                    else
+                        duplicates.add(path);
+
+                    if (zipElement.isFile()) {
+                        if (path.toLowerCase().endsWith(".zip")
+                            || path.toLowerCase().endsWith(".jar")) {
+                            try {
+                                locations.add(
+                                    new ZipFile(zipElement.getAbsolutePath()));
+                            } catch (IOException ex) { /* skip this zip file, then. */
+                            }
+                        }
+                    } else
+                        if (zipElement.isDirectory()) {
+                            setDirectory(zipElement.getPath());
+                        }
+                }
+                ((ArrayList) locations).trimToSize(); // save memory.
+            } catch (FileNotFoundException fnfEx) { /* just skip that file then */
+            }
+        }
+
+        /**
+         * Add a directory to search
+         * @param dirName the directory to search
+         * @throws FileNotFoundException
+         */
+        protected void setDirectory(String dirName)
+            throws FileNotFoundException {
+            File directory = new File(dirName);
+
+            if (!directory.exists() || !directory.isDirectory()) {
+                throw new FileNotFoundException(
+                    dirName + " is not a directory");
+            }
+
+            File dirList[] = directory.listFiles(new JARFilter());
+            for (int i = 0; i < dirList.length; i++) {
+                System.err.println("Searching " + dirList[i].getAbsolutePath());
+                try {
+                    ZipFile zf = new ZipFile(dirList[i].getAbsolutePath());
+                    locations.add(zf);
+                } catch (IOException iox) { /* skip this file */
+                }
+            }
+        }
+
+        /**
+        * process the classpath and examine jars files to extract documents
+        * @param baseDirectory for search
+        * @param nacthing pattern
+        * @param arguments
+        * @throws IOException
+        */
+        protected void processJar(
+            String baseCollection,
+            String matchPattern,
+            Hashtable table)
+            throws IOException {
+            ZipFile zf = null;
+            Iterator it = locations.iterator();
+            String rootCollection = baseCollection;
+
+            //*** The file extension to use for reading in files default .xml
+            final String ext =
+                ((table.get(XMLTools.EXTENSION) != null)
+                    ? '.' + (String) table.get(XMLTools.EXTENSION)
+                    : ".xml");
+
+            while (it.hasNext()) {
+                try {
+                    //*** initialize zip file
+                    zf = (ZipFile) it.next();
+
+                    Set duplicates = new HashSet(); // don't add duplicates.
+
+                    //*** look into jar entries
+                    Enumeration enum = zf.entries();
+                    while (enum.hasMoreElements()) {
+                        ZipEntry ze = (ZipEntry) enum.nextElement();
+                        if (ze.getName().startsWith(matchPattern)) {
+                            String path = ze.getName();
+
+                            //*** check for the extension of interest
+                            if (ze.getName().endsWith(ext)) {
+
+                                //*** the slash character MUST exist when we get here.
+                                int idx =
+                                    (path.lastIndexOf("/") == -1
+                                        ? path.length()
+                                        : path.lastIndexOf("/"));
+                                path = path.substring(0, path.lastIndexOf("/"));
+
+                                //*** reset rootCollection to be that of the base
+                                rootCollection = baseCollection;
+
+                                //*** extract the collection path outwith the connection string
+                                // *** +1 to skip the slash charater
+                                idx =
+                                    (matchPattern.lastIndexOf("/") == -1
+                                        ? 0
+                                        : matchPattern.lastIndexOf("/") + 1);
+                                String pathCollection =
+                                    path.substring(idx, path.length());
+
+                                //*** construct the full (with connection string) new collection path
+                                String newCollection =
+                                    rootCollection + "/" + pathCollection;
+
+                                String documentName = ze.getName();
+                                documentName =
+                                    documentName.substring(
+                                        documentName.lastIndexOf("/") + 1);
+
+                                if (table
+                                    .get(XMLTools.VERBOSE)
+                                    .equals("true")) {
+                                    System.err.println(
+                                        "->->->    FOUND XML "
+                                            + ze.getName()
+                                            + " in "
+                                            + zf.getName());
+                                    System.err.println(
+                                        "->->->    pathCollection :- "
+                                            + pathCollection);
+                                    System.err.println(
+                                        "->->->    newCollection :- "
+                                            + newCollection);
+                                    System.err.println(
+                                        "->->->    documentName :- "
+                                            + documentName);
+                                }
+
+                                //***attempt to create all required collections for the xml files to add
+                                if (duplicates.contains(pathCollection)
+                                    == false) {
+                                    // skip duplicate.
+                                    duplicates.add(pathCollection);
+
+                                    StringTokenizer s =
+                                        new StringTokenizer(
+                                            pathCollection,
+                                            "/",
+                                            false);
+                                    while (s.hasMoreTokens()) {
+                                        String token = s.nextToken();
+                                        try {
+                                            rootCollection =
+                                                createCollection(
+                                                    rootCollection,
+                                                    token);
+                                        } catch (XMLDBException xdbEx) {
+                                            /* ignore duplicate collection creation */
+                                            //*** update collection path
+                                            rootCollection =
+                                                rootCollection.concat(
+                                                    "/" + token);
+                                        }
+                                    }
+                                }
+
+                                //*** finally import the document
+                                importFile(
+                                    newCollection,
+                                    documentName,
+                                    zf.getInputStream(ze));
+                            }
+                        }
+                    }
+                } catch (Exception e) {
+                    System.out.println("ERROR : " + e.getMessage());
+                    if (table.get(XMLTools.VERBOSE).equals("true")) {
+                        e.printStackTrace(System.err);
+                    }
+                } finally {
+                    if (zf != null) {
+                        //*** now close the stream to allow it to be garbage collected
+                        zf.close();
+                    }
+                }
+            }
+        }
+
+
+
+        /**
+         * helper class
+         */
+        class JARFilter implements FilenameFilter
+        {
+            public boolean accept(File dir, String name)
+            {
+                if (name.toLowerCase().endsWith("jar")) return true;
+                else return false;
+            }
+        }
     }
 
     /**

[Xindice Command Tool] patch Importing Tree

Reply via email to