Hi all,
Here's my attempt at a JNI GNU/Posix path canonicalizer. If you
missed my previous mail, this is required in order for FilePermission
checks to work (PR classpath/24895). Classpath's canonicalizer
doesn't handle symbolic links, whereas GCJ's does, just not very well.
This patch makes Classpath do the right thing on GNU/Posix systems.
I haven't committed it as it almost certainly breaks builds on
Windows, and I need some help to get stuff building conditionally.
After that I can have a go at porting the Windows canonicalizer from
GCJ (I don't think Classpath's current one does much on Windows).
Thanks,
Gary
Index: java/io/File.java
===================================================================
RCS file: /cvsroot/classpath/classpath/java/io/File.java,v
retrieving revision 1.61
diff -u -r1.61 File.java
--- java/io/File.java 17 Dec 2005 21:16:23 -0000 1.61
+++ java/io/File.java 29 Mar 2006 08:40:39 -0000
@@ -484,9 +484,9 @@
/**
* This method returns a canonical representation of the pathname of
* this file. The actual form of the canonical representation is
- * different. On the GNU system, the canonical form differs from the
- * absolute form in that all relative file references to "." and ".."
- * are resolved and removed.
+ * system-dependent. On the GNU system, conversion to canonical
+ * form involves the removal of redundant separators, references to
+ * "." and "..", and symbolic links.
* <p>
* Note that this method, unlike the other methods which return path
* names, can throw an IOException. This is because native method
Index: vm/reference/java/io/VMFile.java
===================================================================
RCS file: /cvsroot/classpath/classpath/vm/reference/java/io/VMFile.java,v
retrieving revision 1.7
diff -u -r1.7 VMFile.java
--- vm/reference/java/io/VMFile.java 2 Jul 2005 20:33:08 -0000 1.7
+++ vm/reference/java/io/VMFile.java 29 Mar 2006 08:40:39 -0000
@@ -210,10 +210,10 @@
/**
* This method returns a canonical representation of the pathname of
- * the given path. The actual form of the canonical representation is
- * different. On the GNU system, the canonical form differs from the
- * absolute form in that all relative file references to "." and ".."
- * are resolved and removed.
+ * this file. The actual form of the canonical representation is
+ * system-dependent. On the GNU system, conversion to canonical
+ * form involves the removal of redundant separators, references to
+ * "." and "..", and symbolic links.
* <p>
* Note that this method, unlike the other methods which return path
* names, can throw an IOException. This is because native method
@@ -221,9 +221,5 @@
*
* @exception IOException If an error occurs
*/
- public static String toCanonicalForm(String path) throws IOException
- {
- // FIXME: this only works on UNIX
- return PlatformHelper.toCanonicalForm(path);
- }
+ public static native String toCanonicalForm(String path) throws IOException;
}
Index: include/java_io_VMFile.h
===================================================================
RCS file: /cvsroot/classpath/classpath/include/java_io_VMFile.h,v
retrieving revision 1.3
diff -u -r1.3 java_io_VMFile.h
--- include/java_io_VMFile.h 11 Nov 2004 17:31:31 -0000 1.3
+++ include/java_io_VMFile.h 29 Mar 2006 08:40:39 -0000
@@ -24,6 +24,7 @@
JNIEXPORT jboolean JNICALL Java_java_io_VMFile_canWrite (JNIEnv *env, jclass,
jstring);
JNIEXPORT jboolean JNICALL Java_java_io_VMFile_canRead (JNIEnv *env, jclass,
jstring);
JNIEXPORT jboolean JNICALL Java_java_io_VMFile_isDirectory (JNIEnv *env,
jclass, jstring);
+JNIEXPORT jstring JNICALL Java_java_io_VMFile_toCanonicalForm (JNIEnv
*env,jclass, jstring);
#undef java_io_VMFile_IS_CASE_SENSITIVE
#define java_io_VMFile_IS_CASE_SENSITIVE 1L
#undef java_io_VMFile_IS_DOS_8_3
Index: native/jni/java-io/java_io_VMFile.c
===================================================================
RCS file: /cvsroot/classpath/classpath/native/jni/java-io/java_io_VMFile.c,v
retrieving revision 1.10
diff -u -r1.10 java_io_VMFile.c
--- native/jni/java-io/java_io_VMFile.c 25 Jan 2006 10:40:12 -0000 1.10
+++ native/jni/java-io/java_io_VMFile.c 29 Mar 2006 08:40:39 -0000
@@ -1,5 +1,5 @@
/* java_io_VMFile.c - Native methods for java.io.File class
- Copyright (C) 1998, 2004 Free Software Foundation, Inc.
+ Copyright (C) 1998, 2004, 2006 Free Software Foundation, Inc.
This file is part of GNU Classpath.
@@ -41,6 +41,8 @@
#include <stdio.h>
#include <stdlib.h>
+#include <limits.h>
+
#include <jni.h>
#include <jcl.h>
@@ -730,3 +732,173 @@
return (0);
#endif /* not WITHOUT_FILESYSTEM */
}
+
+/*************************************************************************/
+
+/*
+ * This method converts a path to canonical form on GNU/Posix systems.
+ *
+ * Class: java_io_VMFile
+ * Method: toCanonicalForm
+ * Signature: (Ljava/lang/String)Ljava/lang/String
+ */
+
+JNIEXPORT jstring JNICALL
+Java_java_io_VMFile_toCanonicalForm (JNIEnv *env,
+ jclass class __attribute__ ((__unused__)),
+ jstring jpath)
+{
+#ifndef WITHOUT_FILESYSTEM
+ const char *path;
+ char *src, *dst, *tmp;
+ int srci, dsti, tmpi;
+ int len, dsti_save;
+ int fschecks = 1;
+ struct stat sb;
+
+ path = JCL_jstring_to_cstring(env, jpath);
+ if (path == NULL)
+ return NULL;
+
+ /* It is the caller's responsibility to ensure the path is absolute. */
+ if (path[0] == 0 || path[0] != '/')
+ {
+ JCL_free_cstring(env, jpath, path);
+ JCL_ThrowException(env, "java/lang/RuntimeException", "Not absolute");
+ return NULL;
+ }
+
+ len = strlen(path);
+ if (len >= PATH_MAX)
+ {
+ JCL_free_cstring(env, jpath, path);
+ JCL_ThrowException(env, "java/io/IOException", "Path too long");
+ return NULL;
+ }
+ src = JCL_malloc(env, PATH_MAX * 3);
+ if (src == NULL)
+ {
+ JCL_free_cstring(env, jpath, path);
+ return NULL;
+ }
+ dst = src + PATH_MAX;
+ tmp = dst + PATH_MAX;
+
+ strcpy(src, path);
+ JCL_free_cstring(env, jpath, path);
+
+ dst[0] = '/';
+ dst[1] = '\0';
+ dsti = 1;
+
+ srci = 1;
+
+ while (src[srci] != '\0')
+ {
+ /* Skip slashes. */
+ while (src[srci] == '/')
+ srci++;
+ tmpi = srci;
+ /* Find next slash. */
+ while (src[srci] != '/' && src[srci] != '\0')
+ srci++;
+ if (srci == tmpi)
+ /* We hit the end. */
+ break;
+ len = srci - tmpi;
+
+ /* Handle "." and "..". */
+ if (len == 1 && src[tmpi] == '.')
+ continue;
+ if (len == 2 && src[tmpi] == '.' && src[tmpi + 1] == '.')
+ {
+ if (dsti == 1)
+ {
+ /* Unlike other JVMs we do not rewind past the root
+ directory. I can't see any legitimate reason why you
+ would want this, and chopping off bits of path seems
+ like a sure-fire way to introduce vulnerabilities. */
+ JCL_free(env, src);
+ JCL_ThrowException(env, "java/io/IOException",
+ "Too many up-level references");
+
+ return NULL;
+ }
+ while (dsti > 1 && dst[dsti - 1] != '/')
+ dsti--;
+ if (dsti != 1)
+ dsti--;
+ /* Reenable filesystem checking if disabled, as we might
+ have reversed over whatever caused the problem before.
+ At least one proprietary JVM has inconsistencies because
+ it does not do this. */
+ fschecks = 1;
+ continue;
+ }
+
+ /* Handle real path components. */
+ if (dsti + len + 1 >= PATH_MAX)
+ {
+ JCL_free(env, src);
+ JCL_ThrowException(env, "java/io/IOException", "Path too long");
+ return NULL;
+ }
+ dsti_save = dsti;
+ if (dsti > 1)
+ dst[dsti++] = '/';
+ strncpy(&dst[dsti], &src[tmpi], len);
+ dsti += len;
+ if (fschecks == 0)
+ continue;
+
+ dst[dsti] = '\0';
+ if (lstat(dst, &sb) == 0)
+ {
+ if (S_ISLNK(sb.st_mode))
+ {
+ tmpi = readlink(dst, tmp, PATH_MAX);
+ if (tmpi < 1 || tmpi == PATH_MAX)
+ {
+ JCL_free(env, src);
+ JCL_ThrowException(env, "java/io/IOException",
+ "Path too long");
+ return NULL;
+ }
+
+ /* Prepend the link's path to src. */
+ if (tmpi + strlen(&src[srci]) >= PATH_MAX)
+ {
+ JCL_free(env, src);
+ JCL_ThrowException(env, "java/io/IOException",
+ "Path too long");
+ return NULL;
+ }
+ while (src[srci] != '\0')
+ tmp[tmpi++] = src[srci++];
+ tmp[tmpi] = '\0';
+ strcpy(src, tmp);
+ srci = 0;
+
+ /* Either replace or append dst depending on whether the
+ link is relative or absolute. */
+ dsti = tmp[0] == '/' ? 1 : dsti_save;
+ }
+ }
+ else
+ {
+ /* Something doesn't exist, or we don't have permission to
+ read it, or a previous path component is a directory, or
+ a symlink is looped. Whatever, we can't check the
+ filesystem any more. */
+ fschecks = 0;
+ }
+ }
+ dst[dsti] = '\0';
+
+ path = (*env)->NewStringUTF (env, dst);
+ JCL_free(env, src);
+ return path;
+#else /* not WITHOUT_FILESYSTEM */
+ return NULL;
+#endif /* not WITHOUT_FILESYSTEM */
+}
Index: gnu/java/io/PlatformHelper.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/java/io/PlatformHelper.java,v
retrieving revision 1.6
diff -u -r1.6 PlatformHelper.java
--- gnu/java/io/PlatformHelper.java 14 Nov 2005 13:08:11 -0000 1.6
+++ gnu/java/io/PlatformHelper.java 29 Mar 2006 08:40:39 -0000
@@ -97,98 +97,6 @@
}
/**
- * This routine canonicalizes input param "path" to formal path
representation
- * for current platform, including interpreting ".." and "." .
- */
- public static final String toCanonicalForm(String path)
- {
- /*??
- if(path.indexOf('.') < 0 && path.indexOf("..") < 0)
- return path;
- */
- String tmppath = path.replace('/', separatorChar);
- StringBuffer canonpath;
-
- int i;
-
- if ((i = beginWithRootPathPrefix(tmppath)) == 0 )
- return path;
-
- /* The original
- "canonpath = new StringBuffer(tmppath.substring(0, i))"
- isn't very efficient because StringBuffer's
- ensureCapacity_unsynchronized will fail definitely each time
- and will enlarge buffer and copy contents. .
- */
- canonpath = new StringBuffer(INITIAL_MAX_PATH);
- canonpath.append(tmppath.substring(0, i));
- tmppath = tmppath.substring(i);
- // pathdepth==0 indicates there're only root path in the buffer
- int pathdepth = 0;
-
- StringTokenizer st = new StringTokenizer(tmppath, separator);
-
- // Traverse each element of the path, handling "." and ".."
- // Should handle "~" too?
- if (st.hasMoreTokens())
- do
- {
- String s = st.nextToken();
-
- // Handle "." or an empty element.
- if (s.equals(".") || s.equals(""))
- continue;
-
- // Handle ".." by deleting the last element from the path
- if (s.equals(".."))
- {
- if (pathdepth == 0)
- continue;
-
- // Strip of trailing separator
- canonpath.setLength(canonpath.length() -
1/*separator.length()*/);
- String tmpstr = canonpath.toString();
- int idx = tmpstr.lastIndexOf(separator);
-
- if ((idx == -1) || ((idx + 1/*separator.length()*/) >
tmpstr.length()))
- //throw new IOException("Can't happen error");
- return path; // Shouldn't happen
-
- canonpath.setLength(idx + 1/*separator.length()*/);
- pathdepth--;
- continue;
- }
-
- canonpath.append(s);
- pathdepth++; //now it's more than root path
-
- if (st.hasMoreTokens())
- canonpath.append(separator);
- }
- while (st.hasMoreTokens());
-
- if (endWithSeparator(path))
- canonpath.append(separator);
-
- String tmpstr = canonpath.toString();
- //if (pathdepth > 0 && endWithSeparator(tmpstr) )
- // tmpstr = tmpstr.substring(0, tmpstr.length() -
1/*separator.length()*/);
-
- return tmpstr;
- }
-
- /**
- * This routine canonicalizes input param "path" to formal path
representation
- * for current platform, and normalize all separators to "sepchar".
- */
- public static final String toCanonicalForm(String path, char sepchar)
- {
- String tmpstr = toCanonicalForm(path);
- tmpstr = tmpstr.replace(separatorChar, sepchar);
- return tmpstr;
- }
-
- /**
* This routine checks whether input param "path" ends with separator
*/
public static final boolean endWithSeparator(String path)