Hi All,
The current Xindice tool only import files from the file system. I see the
need to extend that functionality to include import of files from Jar files
as well.
I am just submitting this patch so that we can start the ball rolling.
It is by no means perfect and has some bugs in it which I know about.
The current behaviour is unchanged.
See what the community things.
Best Regards.
JC.
Index: AddDocument.java
===================================================================
RCS file: /home/cvspublic/xml-xindice/java/src/org/apache/xindice/tools/command/AddDocument.java,v
retrieving revision 1.12
diff -u -r1.12 AddDocument.java
--- AddDocument.java 15 Jan 2004 14:23:20 -0000 1.12
+++ AddDocument.java 23 Jan 2004 11:36:14 -0000
@@ -108,8 +108,14 @@
return false;
}
- File file = new File((String) table.get(XMLTools.FILE_PATH));
- InputStream fis = new FileInputStream(file);
+ InputStream fis = null;
+ if (table.get(XMLTools.FILE_PATH) instanceof String ) {
+ File file = new File((String) table.get(XMLTools.FILE_PATH));
+ fis = new FileInputStream(file);
+ }
+ if (table.get(XMLTools.FILE_PATH) instanceof InputStream ) {
+ fis = (InputStream)table.get(XMLTools.FILE_PATH);
+ }
// Parse in XML using Xerces
SAXParserFactory spf = javax.xml.parsers.SAXParserFactory.newInstance();
Index: ImportTree.java
===================================================================
RCS file: /home/cvspublic/xml-xindice/java/src/org/apache/xindice/tools/command/ImportTree.java,v
retrieving revision 1.11
diff -u -r1.11 ImportTree.java
--- ImportTree.java 9 Aug 2003 21:19:53 -0000 1.11
+++ ImportTree.java 23 Jan 2004 11:37:06 -0000
@@ -63,10 +63,30 @@
import org.xmldb.api.DatabaseManager;
import org.xmldb.api.base.Collection;
+import org.xmldb.api.base.XMLDBException;
import java.io.File;
import java.io.FileFilter;
+import java.io.InputStream;
+import java.io.FilenameFilter;
+import java.io.InputStreamReader;
+import java.io.IOException;
+import java.io.FileNotFoundException;
+import java.io.BufferedReader;
import java.util.Hashtable;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.StringTokenizer;
+import java.util.Iterator;
+import java.util.Enumeration;
+import java.util.zip.ZipFile;
+import java.util.zip.ZipEntry;
+import java.net.URL;
+import java.net.URLClassLoader;
+
+
/**
* ImportTree.java is designed to take a directory/file path and create a Collection
@@ -118,8 +138,14 @@
File startdir = new File((String) table.get(XMLTools.FILE_PATH));
- // Make call to process, this is called recursively!
- process(startdir, colstring, table);
+ //now process the directory of the jars
+ if (startdir.exists() == true) {
+ // Make call to process, this is called recursively!
+ process(startdir, colstring, table);
+ } else {
+ //Try to process jars in the classpath
+ new DocumentResolver().processJar(colstring, (String)table.get(XMLTools.FILE_PATH), table);
+ }
} finally {
// Be sure to close collection objects
@@ -181,6 +207,19 @@
addDocument.execute(table);
}
+ protected void importFile(String collectionName, String name, InputStream is)
+ throws Exception {
+ Hashtable table = new Hashtable();
+ // Use functionality from AddDocument to add this document
+ table.put(XMLTools.COLLECTION, collectionName);
+ table.put(XMLTools.NAME_OF, name);
+ table.put(XMLTools.FILE_PATH, is);
+
+ addDocument.execute(table);
+ }
+
+
+
protected String createCollection(String baseCollection, String newCollection)
throws Exception {
if (!newCollection.equals(".")) {
@@ -194,6 +233,241 @@
}
return baseCollection + "/" + newCollection;
+ }
+
+ /**
+ * Allow us to add collection and document from zip or jar source
+ */
+ protected class DocumentResolver {
+
+ /**
+ * classpath
+ */
+ final List locations = new ArrayList();
+
+ public DocumentResolver() {
+ initializeClasspath();
+ }
+
+ /**
+ * Static vector of ClasspathElements corresponding to CLASSPATH entries.
+ */
+ private void initializeClasspath() {
+ try {
+ //*** use library list to find urls from the normal classloader
+ URL[] appClassLoaderUrls =
+ ((URLClassLoader) Thread
+ .currentThread()
+ .getContextClassLoader()
+ .getSystemClassLoader())
+ .getURLs();
+
+ Set duplicates = new HashSet(); // don't add duplicates.
+
+ for (int j = 0; j < appClassLoaderUrls.length; j++) {
+ URL url = (URL) appClassLoaderUrls[j];
+
+ String path = url.getPath();
+ File zipElement = new File(path);
+
+ if (duplicates.contains(path))
+ continue; // skip duplicate.
+ else
+ duplicates.add(path);
+
+ if (zipElement.isFile()) {
+ if (path.toLowerCase().endsWith(".zip")
+ || path.toLowerCase().endsWith(".jar")) {
+ try {
+ locations.add(
+ new ZipFile(zipElement.getAbsolutePath()));
+ } catch (IOException ex) { /* skip this zip file, then. */
+ }
+ }
+ } else
+ if (zipElement.isDirectory()) {
+ setDirectory(zipElement.getPath());
+ }
+ }
+ ((ArrayList) locations).trimToSize(); // save memory.
+ } catch (FileNotFoundException fnfEx) { /* just skip that file then */
+ }
+ }
+
+ /**
+ * Add a directory to search
+ * @param dirName the directory to search
+ * @throws FileNotFoundException
+ */
+ protected void setDirectory(String dirName)
+ throws FileNotFoundException {
+ File directory = new File(dirName);
+
+ if (!directory.exists() || !directory.isDirectory()) {
+ throw new FileNotFoundException(
+ dirName + " is not a directory");
+ }
+
+ File dirList[] = directory.listFiles(new JARFilter());
+ for (int i = 0; i < dirList.length; i++) {
+ System.err.println("Searching " + dirList[i].getAbsolutePath());
+ try {
+ ZipFile zf = new ZipFile(dirList[i].getAbsolutePath());
+ locations.add(zf);
+ } catch (IOException iox) { /* skip this file */
+ }
+ }
+ }
+
+ /**
+ * process the classpath and examine jars files to extract documents
+ * @param baseDirectory for search
+ * @param nacthing pattern
+ * @param arguments
+ * @throws IOException
+ */
+ protected void processJar(
+ String baseCollection,
+ String matchPattern,
+ Hashtable table)
+ throws IOException {
+ ZipFile zf = null;
+ Iterator it = locations.iterator();
+ String rootCollection = baseCollection;
+
+ //*** The file extension to use for reading in files default .xml
+ final String ext =
+ ((table.get(XMLTools.EXTENSION) != null)
+ ? '.' + (String) table.get(XMLTools.EXTENSION)
+ : ".xml");
+
+ while (it.hasNext()) {
+ try {
+ //*** initialize zip file
+ zf = (ZipFile) it.next();
+
+ Set duplicates = new HashSet(); // don't add duplicates.
+
+ //*** look into jar entries
+ Enumeration enum = zf.entries();
+ while (enum.hasMoreElements()) {
+ ZipEntry ze = (ZipEntry) enum.nextElement();
+ if (ze.getName().startsWith(matchPattern)) {
+ String path = ze.getName();
+
+ //*** check for the extension of interest
+ if (ze.getName().endsWith(ext)) {
+
+ //*** the slash character MUST exist when we get here.
+ int idx =
+ (path.lastIndexOf("/") == -1
+ ? path.length()
+ : path.lastIndexOf("/"));
+ path = path.substring(0, path.lastIndexOf("/"));
+
+ //*** reset rootCollection to be that of the base
+ rootCollection = baseCollection;
+
+ //*** extract the collection path outwith the connection string
+ // *** +1 to skip the slash charater
+ idx =
+ (matchPattern.lastIndexOf("/") == -1
+ ? 0
+ : matchPattern.lastIndexOf("/") + 1);
+ String pathCollection =
+ path.substring(idx, path.length());
+
+ //*** construct the full (with connection string) new collection path
+ String newCollection =
+ rootCollection + "/" + pathCollection;
+
+ String documentName = ze.getName();
+ documentName =
+ documentName.substring(
+ documentName.lastIndexOf("/") + 1);
+
+ if (table
+ .get(XMLTools.VERBOSE)
+ .equals("true")) {
+ System.err.println(
+ "->->-> FOUND XML "
+ + ze.getName()
+ + " in "
+ + zf.getName());
+ System.err.println(
+ "->->-> pathCollection :- "
+ + pathCollection);
+ System.err.println(
+ "->->-> newCollection :- "
+ + newCollection);
+ System.err.println(
+ "->->-> documentName :- "
+ + documentName);
+ }
+
+ //***attempt to create all required collections for the xml files to add
+ if (duplicates.contains(pathCollection)
+ == false) {
+ // skip duplicate.
+ duplicates.add(pathCollection);
+
+ StringTokenizer s =
+ new StringTokenizer(
+ pathCollection,
+ "/",
+ false);
+ while (s.hasMoreTokens()) {
+ String token = s.nextToken();
+ try {
+ rootCollection =
+ createCollection(
+ rootCollection,
+ token);
+ } catch (XMLDBException xdbEx) {
+ /* ignore duplicate collection creation */
+ //*** update collection path
+ rootCollection =
+ rootCollection.concat(
+ "/" + token);
+ }
+ }
+ }
+
+ //*** finally import the document
+ importFile(
+ newCollection,
+ documentName,
+ zf.getInputStream(ze));
+ }
+ }
+ }
+ } catch (Exception e) {
+ System.out.println("ERROR : " + e.getMessage());
+ if (table.get(XMLTools.VERBOSE).equals("true")) {
+ e.printStackTrace(System.err);
+ }
+ } finally {
+ if (zf != null) {
+ //*** now close the stream to allow it to be garbage collected
+ zf.close();
+ }
+ }
+ }
+ }
+
+
+
+ /**
+ * helper class
+ */
+ class JARFilter implements FilenameFilter
+ {
+ public boolean accept(File dir, String name)
+ {
+ if (name.toLowerCase().endsWith("jar")) return true;
+ else return false;
+ }
+ }
}
/**