Revision: 18791
          http://sourceforge.net/p/gate/code/18791
Author:   johann_p
Date:     2015-06-19 13:02:51 +0000 (Fri, 19 Jun 2015)
Log Message:
-----------
This should fix issue #200 (Duplicate plugin paths in persisted application 
files): before
persisting a URL for a plugin it first checks if that URL is a file: URL and if 
yes, if the
real file for that URL (using getCanonicalPath) has already been persisted.
This also adds a first attempt to fix #201 (Rethink creation of $relpath$ and 
related paths).
Details in the #201 bug description.

Modified Paths:
--------------
    gate/trunk/src/main/gate/util/persistence/PersistenceManager.java

Modified: gate/trunk/src/main/gate/util/persistence/PersistenceManager.java
===================================================================
--- gate/trunk/src/main/gate/util/persistence/PersistenceManager.java   
2015-06-19 01:20:04 UTC (rev 18790)
+++ gate/trunk/src/main/gate/util/persistence/PersistenceManager.java   
2015-06-19 13:02:51 UTC (rev 18791)
@@ -80,6 +80,14 @@
 import com.thoughtworks.xstream.io.xml.StaxReader;
 import com.thoughtworks.xstream.io.xml.XStream11NameCoder;
 import com.thoughtworks.xstream.io.xml.XmlFriendlyNameCoder;
+import java.nio.file.FileSystems;
+import java.nio.file.LinkOption;
+import java.nio.file.Path;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.logging.Level;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 /**
  * This class provides utility methods for saving resources through
@@ -155,55 +163,91 @@
 
   /**
    * URLs get upset when serialised and deserialised so we need to
-   * convert them to strings for storage. In the case of
+   * convert them to strings for storage. 
+   * In the case of
    * "file:" URLs the relative path to the persistence file
-   * will actually be stored, except when the URL refers to a resource
-   * within the current GATE home directory in which case the relative path
-   * to the GATE home directory will be stored. If the property 
+   * will actually be stored, except when {@link 
#saveObjectToFile(obj,file,true,trueOrFalse} was
+   * used and there is one of the following cases:
+   * <ul>
+   * <li>when the URL refers to a resource
+   * within the current GATE home directory and the persistence file is not 
within the GATE home
+   * directory, in which case the relative path
+   * to the GATE home directory will be stored. 
+   * <li>Otherwise, if the persistence file is not within the GATE directory 
and if the property 
    * gate.user.resourceshome is set to a directory path and the URL refers 
    * to a resource inside this directory, the relative path to this directory
-   * will be stored. If resources are stored relative to gate home or
+   * will be stored.
+   * <ul>
+   * If resources are stored relative to gate home or
    * resources home, a warning will also be logged.
+   * <p>
+   * The real path as returned by getCanonicalPath() is used to check if the 
resource is 
+   * within a special directory (e.g. GATE HOME). Once we know this, and the 
resource is in
+   * a special directory, the canonical paths are used to generate the 
relative path.
+   * If we are not inside a special directory, then the non-canonical path 
names will be used, so
+   * that any symbolic links are NOT getting resolved, which is usually what 
you want. 
+   * The non-canonical path names will get normalized to remove things like 
"../dir" and the 
+   * result of the normalization is checked using getCanonicalPath() to see if 
it refers to 
+   * the same location as the non-normalized one (it could be different if the 
.. follows a
+   * symbolic link, for example). If it is the same, then the normalized 
version is used, otherwise
+   * the original version is used. 
+   * 
    */
   public static class URLHolder implements Persistence {
     /**
      * Populates this Persistence with the data that needs to be stored
      * from the original source object.
      */
+    static final Logger logger = Logger.getLogger(URLHolder.class);
     @Override
     public void extractDataFromSource(Object source)
             throws PersistenceException {
-      final Logger logger = Logger.getLogger(URLHolder.class);
       try {
         URL url = (URL)source;
         if(url.getProtocol().equals("file")) {
-          try {
+            // url is what we want to convert to something that is relative to 
$relpath$, 
+            // $resourceshome$ or $gatehome$ 
+            
+            // The file in which the URL should get persisted, the directory 
this file is in
+            // is the location to which we want to make the URL relative, 
unless we want to 
+            // use $gatehome$ or $resourceshome$
+            File outFile = currentPersistenceFile();
+            File outFileReal = getCanonicalFileIfPossible(outFile);
+            // Note: the outDir will be correct if the application file itself 
is not a link
+            // If the application file is a link than its parent is the parent 
of the real
+            // path. 
+            File outDir = outFile.getParentFile(); 
+            File outDirReal = getCanonicalFileIfPossible(outDir);
+            File outDirOfReal = 
getCanonicalFileIfPossible(outFileReal.getParentFile());
+            
+            File urlFile = Files.fileFromURL(url);
+            File urlFileReal = getCanonicalFileIfPossible(urlFile);
+            
+            logger.debug("Trying to persist "+url+" for "+outFile);
+            logger.debug("urlFile="+urlFile+", urlFileReal"+urlFileReal);
+            logger.debug("outDir="+outDir+", outDirReal"+outDirReal);
+            
+            // by default, use just the relative path
             String pathMarker = relativePathMarker;
             
-            File gateHomePath = getGateHomePath();
-            File resourceshomeDir = getResourceshomePath();
+              // - this returns the canonical path to GATE_HOME
+            File gateHomePathReal = getGateHomePath();
+
+            // get the canonical path of the resources home directory, if set, 
otherwise null
+            File resourceshomeDirReal = getResourceshomePath();
             
-            URL urlPersistenceFilePath = 
-              
getCanonicalFileIfPossible(currentPersistenceFile()).toURI().toURL();
-            File urlPath = 
-              getCanonicalFileIfPossible(Files.fileFromURL(url));
-            url = urlPath.toURI().toURL();
+            // Only if we actually *want* to consider GATE_HOME and other 
special directories,
+            // do all of this ...
+            // Note: the semantics of this has slightly changed to what it was 
before: we only
+            // do this is the use gatehome flag is set, not if it is not set 
but the warn flag
+            // is set (previsouly, the warn flag alone was sufficient too).
+            if(currentUseGateHome()) {
+              // First of all, check if we want to use GATE_HOME instead: this 
happens 
+              // if the real location of the url is inside the real location 
of GATE_HOME            
+              // and the location of the outfile is outside of GATE_HOME
 
-            // If the persistence file does NOT reside in the GATE home
-            // tree and if the URL references something in the GATE home
-            // tree, use $gatehome$ instead of $relpath$
-            // Also if the system property for $resourceshome$ is set and the
-            // persistence file does not reside in the projecthome tree but
-            // the URL references something in the projecthome tree, use
-            // $resourceshome$ instead of $relpath$
-            // $resourceshome$ is only used when $gatehome$ would also be used,
-            // but there is a separate warning which is shown once something
-            // is stored relative to $resourceshome$
-            // If URL can be made relative to both gatehome and projecthome,
-            // gatehome is preferred.
-            if(currentUseGateHome() || currentWarnAboutGateHome()) {           
   
-              if (!isContainedWithin(currentPersistenceFile(), gateHomePath) &&
-                  isContainedWithin(urlPath, gateHomePath)) {
+              if (!isContainedWithin(outFileReal, gateHomePathReal) &&
+                  isContainedWithin(urlFileReal, gateHomePathReal)) {
                 logger.debug("Setting path marker to "+gatehomePathMarker);
                 if(currentWarnAboutGateHome()) {
                   if(!currentHaveWarnedAboutGateHome().getValue()) {
@@ -221,9 +265,12 @@
                 if(currentUseGateHome()) {
                   pathMarker = gatehomePathMarker;
                 }
-              } else if(resourceshomeDir != null &&
-                  !isContainedWithin(currentPersistenceFile(), 
resourceshomeDir) &&
-                  isContainedWithin(urlPath,resourceshomeDir)) {
+              } else 
+                // Otherwise, do the same check for the resources home path, 
but only if it
+                // is actuallys set
+                if(resourceshomeDirReal != null &&
+                  !isContainedWithin(outFileReal, resourceshomeDirReal) &&
+                  isContainedWithin(urlFileReal, resourceshomeDirReal)) {
                  if(currentWarnAboutGateHome()) {
                   if(!currentHaveWarnedAboutResourceshome().getValue()) {
                     logger.warn(
@@ -241,24 +288,57 @@
                
               }
             }
+                        
             if(pathMarker.equals(relativePathMarker)) {
-              urlString = pathMarker
-                 + getRelativePath(urlPersistenceFilePath, url);
+              // In theory we should just relativize here using the original 
paths, without
+              // following any symbolic links. We do not want to follow 
symbolic links because 
+              // somebody may want to use a symbolic link to a completely 
different location
+              // to include some resources into the project directory that 
contains the 
+              // pipeline (and the link to the resource).
+              // However, if the project directory itself is within a linked 
location, then 
+              // GATE will sometimes use the linked and sometimes the 
non-linked path 
+              // (for some reason it uses the non-linked path in the plugin 
manager but it uses
+              // the linked path when the application file is choosen). This 
means that 
+              // in those cases, there would be a large number of unwanted 
../../../../some/dir/to/get/back
+              // If we solve this by using the canonical paths, it will do the 
wrong thing for 
+              // links to resources. So we choose to make a compromise: if we 
can create a relative
+              // path that does not generate any ../ at the beginning of the 
relative part, then
+              // we use that, otherwise we use the real paths 
+              
+              String relPath = getRelativeFilePathString(outDir, urlFile);
+              logger.debug("First attempt got "+relPath);
+              if(relPath.startsWith("../")) {
+                // if we want to actually use the real path, we have to be 
careful which is the 
+                // real parent of the out file: if outFile is a symbolic link 
then it is 
+                // outDirOfReal otherwise it is outDirReal
+                logger.debug("upslength is "+upsLength(relPath));
+                String tmpPath = relPath;
+                if(java.nio.file.Files.isSymbolicLink(outFile.toPath())) {
+                  tmpPath = getRelativeFilePathString(outDirOfReal, 
urlFileReal);
+                  logger.debug("trying outDirOfReal "+tmpPath);
+                  logger.debug("upslength is "+upsLength(tmpPath));
+                } else {
+                  tmpPath = getRelativeFilePathString(outDirReal, 
urlFileReal);                                  
+                  logger.debug("trying outDirReal "+tmpPath);
+                  logger.debug("upslength is "+upsLength(tmpPath));
+                }
+                if(upsLength(tmpPath) < upsLength(relPath)) {
+                  relPath = tmpPath;
+                }
+                logger.debug("Using tmpPath "+relPath);
+              }
+              // if we still get something that starts with ../ then our only 
remaining option is
+              // to find if a parent 
+              urlString = pathMarker + relPath;
             } else if(pathMarker.equals(gatehomePathMarker)) {
-              urlString = pathMarker
-                 + getRelativePath(gateHomePath.toURI().toURL(), url);
+              urlString = pathMarker + 
getRelativeFilePathString(gateHomePathReal, urlFileReal);
             } else if(pathMarker.equals(resourceshomePathMarker)) {
-              urlString = pathMarker
-                 + getRelativePath(getResourceshomePath().toURI().toURL(), 
url);
+              urlString = pathMarker + 
getRelativeFilePathString(resourceshomeDirReal, urlFileReal);
             } else {
               // this should really never happen!
               throw new GateRuntimeException("Unexpected error when persisting 
URL "+url);
             }
-          }
-          catch(MalformedURLException mue) {
-            urlString = ((URL)source).toExternalForm();
-          }
-        }
+        } // if protocol is file:
         else {
           urlString = ((URL)source).toExternalForm();
         }
@@ -273,13 +353,17 @@
      * supposed to be a copy for the original object used as source for
      * data extraction.
      */
+    // TODO: this always uses the one-argument constructor for URL. This may 
not always
+    // do the right thing though, we should test this (e.g. when the URL 
contains %-encoded parts)
     @Override
     public Object createObject() throws PersistenceException {
       try {
         if(urlString.startsWith(relativePathMarker)) {
           URL context = currentPersistenceURL();
-          return new URL(context, urlString.substring(relativePathMarker
+          URL ret =  new URL(context, urlString.substring(relativePathMarker
                   .length()));
+          logger.debug("CurrentPresistenceURL is "+context+" created="+ret);
+          return ret;
         } else if(urlString.startsWith(gatehomePathMarker)) {
           URL gatehome =  
getCanonicalFileIfPossible(getGateHomePath()).toURI().toURL();
           return new URL(gatehome, 
urlString.substring(gatehomePathMarker.length()));
@@ -322,7 +406,11 @@
       }
     }
     
-    public File getGateHomePath() {
+    String urlString;
+    
+    //******** Helper methods just used inside the URLHolder class
+   
+    private File getGateHomePath() {
       if(gatehomePath != null) {
         return gatehomePath;
       } else {
@@ -331,7 +419,7 @@
       }
     }
     
-    public File getResourceshomePath() {
+    private File getResourceshomePath() {
       if(haveResourceshomePath == null) {
         String resourceshomeString = 
System.getProperty(resourceshomePropertyName);
         if(resourceshomeString == null) {
@@ -350,7 +438,7 @@
     }
 
 
-    public File getCanonicalFileIfPossible(File file) {
+    private File getCanonicalFileIfPossible(File file) {
       File tmp = file;
       try {
         tmp = tmp.getCanonicalFile();
@@ -360,8 +448,45 @@
       return tmp;
     }
     
-    String urlString;
+    /** 
+     * Creates the string that needs to get appended to the path of dir to 
obtain the path
+     * to file. 
+     * This does NOT follow any symbolic links - if true locations should be 
used, they 
+     * have to get passed to this method. This will make an attempt to access 
the file system
+     * to see of normalization can be done without changing the meaning of the 
path, but if 
+     * accessing the file system fails, this method will silently use the 
non-normalized 
+     * path instead.
+     * @param dir
+     * @param file
+     * @return 
+     */
+    private String getRelativeFilePathString(File dir, File file) {
+      Path dirPath = dir.toPath();
+      Path filePath = file.toPath();
+      try {
+        // create a normalized version of the paths, but without following 
symbolic links
+        dirPath = dirPath.toRealPath(LinkOption.NOFOLLOW_LINKS);
+      } catch (IOException ex) {
+        // ignore and use original 
+      }
+      try {
+        filePath = filePath.toRealPath(LinkOption.NOFOLLOW_LINKS);
+      } catch (IOException ex) {
+        // ignore and use original
+      }
+      return dirPath.relativize(filePath).toString();
+    }
 
+    private int upsLength(String path) {
+      Matcher m = goUpPattern.matcher(path);
+      if(m.matches()) {
+        return m.group(1).length();
+      } else {
+        return 0;
+      }
+    }
+    
+    
     /**
      * This string will be used to start the serialisation of URL that
      * represent relative paths.
@@ -384,7 +509,11 @@
     
     static final long serialVersionUID = 7943459208429026229L;
     
-  }
+    private static final Pattern goUpPattern = 
+            Pattern.compile(
+                    
"^((?:\\.\\."+Pattern.quote(FileSystems.getDefault().getSeparator())+")+).*");
+    
+  } // class URLHolder 
 
   public static class ClassComparator implements Comparator<Class<?>> {
     /**
@@ -584,6 +713,17 @@
    * This method can be used to determine if a specified file (or directory) is
    * contained within a given directory.
    * 
+   * This will check if the real location of file (with all symbolic links 
removed) is within
+   * the real location of directory with all symbolic links removed. If part 
of the file path
+   * is within the directory but eventually a symbolic link leads out of that 
directory, the
+   * file is considered to NOT be inside the directory. 
+   * <p>
+   * NOTE: this accesses the file system to find the real locations for file 
and directory. 
+   * However, if there is a problem accessing the file system, the method will 
fall back to 
+   * using the literal path names for checking and NOT produce an error. This 
is done so that
+   * any saving operation during which this method is invoked is not aborted 
and proceeds, 
+   * creating a file that may be correct apart from one or more serialized 
URLs. 
+   * 
    * @param file
    *          is this file contained within
    * @param directory
@@ -591,15 +731,45 @@
    * @return true if the file is contained within the directory, false 
otherwise
    */
   public static boolean isContainedWithin(File file, File directory) {
+    // JP: new experimental solution using java.nio 
+    Path dir = directory.toPath();
+    try {
+      dir = dir.toRealPath();
+    } catch (IOException ex) {
+      // ignore and use the original
+    }
+    Path filePath = file.toPath();
+    try {
+      filePath = filePath.toRealPath();
+    } catch (IOException ex) {
+      // ignore and use the original
+    }
+    return filePath.startsWith(dir);
     
-    File parent = file.getParentFile();
-    while (parent != null)
-    {
-      if (parent.equals(directory)) return true;
+    
+      // JP: Old solution using java.io and walking the tree and 
getCanonicalFile
+      /*
+      File dir = directory;
+      try {
+      dir = directory.getCanonicalFile();
+      } catch (IOException ex) {
+      // ignore
+      }
+      if(!dir.isDirectory()) { return false; }
+      
+      File parent = file.getParentFile();
+      while (parent != null)
+      {
+      try {
+      parent = parent.getCanonicalFile();
+      } catch (IOException ex) {
+      // ignore
+      }
+      if (parent.equals(dir)) return true;
       parent = parent.getParentFile();
-    }
-    
-    return false;
+      }
+      return false;
+      */
   }
   
    /**
@@ -701,11 +871,34 @@
     }
   }
 
+  /**
+   * Save the given object to the file, without using $gatehome$.
+   * This is equivalent to  {@link #saveObjectToFile(obj,file,false,false}. 
+   * This method exists with this definition to stay backwards compatible with 
+   * code that was using this method before paths relative to $gatehome$ were 
supported. 
+   * @param obj
+   * @param file
+   * @throws PersistenceException
+   * @throws IOException 
+   */
   public static void saveObjectToFile(Object obj, File file)
     throws PersistenceException, IOException {
     saveObjectToFile(obj, file, false, false);
   }
 
+  /**
+   * Save the given object to the given file.
+   * 
+   * @param obj
+   * @param file
+   * @param usegatehome if true (recommended) use $gatehome$ and 
$resourceshome$ instead of 
+   * $relpath$ in any saved path URLs if the location of that URL is inside 
GATE home or 
+   * inside the resources home directory (if set). 
+   * @param warnaboutgatehome if true, issue a warning message when a saved 
URL uses $gatehome$
+   * or $resourceshome$. 
+   * @throws PersistenceException
+   * @throws IOException 
+   */
   public static void saveObjectToFile(Object obj, File file,
           boolean usegatehome, boolean warnaboutgatehome)
           throws PersistenceException, IOException {
@@ -745,7 +938,27 @@
 
       // always write the list of creole URLs first
       List<URL> urlList = new 
ArrayList<URL>(Gate.getCreoleRegister().getDirectories());
-      Object persistentList = getPersistentRepresentation(urlList);
+      // go through all the URLs in the urlList and make sure that there are 
no duplicate
+      // file: entries
+      Set<String> realPaths = new HashSet<String>();
+      List<URL> cleanedList = new ArrayList<URL>();
+      for(URL url : urlList) {
+        if(url.getProtocol().equals("file:")) {
+          File dir = Files.fileFromURL(url);
+          try {
+            dir = dir.getCanonicalFile();
+          } catch(Exception ex) {
+            // ignore
+          }
+          if(!realPaths.contains(dir.getPath())) {
+            realPaths.add(dir.getPath());
+            cleanedList.add(url); // add the original URL, we just wanted dir 
to check if we already had it!
+          }
+        } else {
+          cleanedList.add(url);
+        }
+      }
+      Object persistentList = getPersistentRepresentation(cleanedList);
 
       Object persistentObject = getPersistentRepresentation(obj);
 

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
_______________________________________________
GATE-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gate-cvs

Reply via email to