JENA-923: (Preparation) Put all string escape processing in one place. Project: http://git-wip-us.apache.org/repos/asf/jena/repo Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/890c070a Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/890c070a Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/890c070a
Branch: refs/heads/master Commit: 890c070a4b710b57feb49898f63fab0882441870 Parents: 58c208c Author: Andy Seaborne <[email protected]> Authored: Tue Apr 28 14:50:20 2015 +0100 Committer: Andy Seaborne <[email protected]> Committed: Tue Apr 28 14:50:20 2015 +0100 ---------------------------------------------------------------------- .../org/apache/jena/riot/out/EscapeStr.java | 132 ++++++++++++++++- .../org/apache/jena/sparql/lang/ParserBase.java | 144 ++----------------- 2 files changed, 136 insertions(+), 140 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/jena/blob/890c070a/jena-arq/src/main/java/org/apache/jena/riot/out/EscapeStr.java ---------------------------------------------------------------------- diff --git a/jena-arq/src/main/java/org/apache/jena/riot/out/EscapeStr.java b/jena-arq/src/main/java/org/apache/jena/riot/out/EscapeStr.java index 52d6c13..bf45890 100644 --- a/jena-arq/src/main/java/org/apache/jena/riot/out/EscapeStr.java +++ b/jena-arq/src/main/java/org/apache/jena/riot/out/EscapeStr.java @@ -18,9 +18,9 @@ package org.apache.jena.riot.out; -import org.apache.jena.atlas.io.IndentedLineBuffer ; +import org.apache.jena.atlas.AtlasException ; import org.apache.jena.atlas.io.AWriter ; -import org.apache.jena.sparql.lang.ParserBase ; +import org.apache.jena.atlas.io.IndentedLineBuffer ; public class EscapeStr { @@ -104,13 +104,131 @@ public class EscapeStr // Utilities to remove escapes + /** Replace \ escapes (\\u, \t, \n etc) in a string */ public static String unescapeStr(String s) - { return unescape(s, '\\') ; } + { return unescapeStr(s, '\\') ; } - // Worker function - public static String unescape(String s, char escape) - { - return ParserBase.unescape(s, escape, false, -1, -1) ; + /** Replace \ escapes (\\u, \t, \n etc) in a string */ + public static String unescapeStr(String s, char escapeChar) + { return unescape(s, escapeChar, false) ; } + + // Main worker function for unescaping strings. + public static String unescape(String s, char escape, boolean pointCodeOnly) { + int i = s.indexOf(escape) ; + if ( i == -1 ) + return s ; + + // Dump the initial part straight into the string buffer + StringBuilder sb = new StringBuilder(s.substring(0,i)) ; + + for ( ; i < s.length() ; i++ ) + { + char ch = s.charAt(i) ; + + if ( ch != escape ) + { + sb.append(ch) ; + continue ; + } + + // Escape + if ( i >= s.length()-1 ) + throw new AtlasException("Illegal escape at end of string") ; + char ch2 = s.charAt(i+1) ; + i = i + 1 ; + + // \\u and \\U + if ( ch2 == 'u' ) + { + // i points to the \ so i+6 is next character + if ( i+4 >= s.length() ) + throw new AtlasException("\\u escape too short") ; + int x = hex(s, i+1, 4) ; + sb.append((char)x) ; + // Jump 1 2 3 4 -- already skipped \ and u + i = i+4 ; + continue ; + } + if ( ch2 == 'U' ) + { + // i points to the \ so i+6 is next character + if ( i+8 >= s.length() ) + throw new AtlasException("\\U escape too short") ; + int x = hex(s, i+1, 8) ; + // Convert to UTF-16 codepoint pair. + sb.append((char)x) ; + // Jump 1 2 3 4 5 6 7 8 -- already skipped \ and u + i = i+8 ; + continue ; + } + + // Are we doing just point code escapes? + // If so, \X-anything else is legal as a literal "\" and "X" + + if ( pointCodeOnly ) + { + sb.append('\\') ; + sb.append(ch2) ; + i = i + 1 ; + continue ; + } + + // Not just codepoints. Must be a legal escape. + char ch3 = 0 ; + switch (ch2) + { + case 'n': ch3 = '\n' ; break ; + case 't': ch3 = '\t' ; break ; + case 'r': ch3 = '\r' ; break ; + case 'b': ch3 = '\b' ; break ; + case 'f': ch3 = '\f' ; break ; + case '\'': ch3 = '\'' ; break ; + case '\"': ch3 = '\"' ; break ; + case '\\': ch3 = '\\' ; break ; + default: + throw new AtlasException("Unknown escape: \\"+ch2) ; + } + sb.append(ch3) ; + } + return sb.toString() ; } + + public static int hex(String s, int i, int len) + { +// if ( i+len >= s.length() ) +// { +// +// } + int x = 0 ; + for ( int j = i ; j < i+len ; j++ ) + { + char ch = s.charAt(j) ; + int k = 0 ; + switch (ch) + { + case '0': k = 0 ; break ; + case '1': k = 1 ; break ; + case '2': k = 2 ; break ; + case '3': k = 3 ; break ; + case '4': k = 4 ; break ; + case '5': k = 5 ; break ; + case '6': k = 6 ; break ; + case '7': k = 7 ; break ; + case '8': k = 8 ; break ; + case '9': k = 9 ; break ; + case 'A': case 'a': k = 10 ; break ; + case 'B': case 'b': k = 11 ; break ; + case 'C': case 'c': k = 12 ; break ; + case 'D': case 'd': k = 13 ; break ; + case 'E': case 'e': k = 14 ; break ; + case 'F': case 'f': k = 15 ; break ; + default: + throw new AtlasException("Illegal hex escape: "+ch) ; + } + x = (x<<4)+k ; + } + return x ; + } + } http://git-wip-us.apache.org/repos/asf/jena/blob/890c070a/jena-arq/src/main/java/org/apache/jena/sparql/lang/ParserBase.java ---------------------------------------------------------------------- diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/lang/ParserBase.java b/jena-arq/src/main/java/org/apache/jena/sparql/lang/ParserBase.java index 33df3af..e947360 100644 --- a/jena-arq/src/main/java/org/apache/jena/sparql/lang/ParserBase.java +++ b/jena-arq/src/main/java/org/apache/jena/sparql/lang/ParserBase.java @@ -22,6 +22,7 @@ import java.math.BigInteger ; import java.util.HashSet ; import java.util.Set ; +import org.apache.jena.atlas.AtlasException ; import org.apache.jena.atlas.logging.Log ; import org.apache.jena.datatypes.RDFDatatype ; import org.apache.jena.datatypes.TypeMapper ; @@ -34,6 +35,7 @@ import org.apache.jena.n3.JenaURIException ; import org.apache.jena.query.ARQ ; import org.apache.jena.query.QueryParseException ; import org.apache.jena.riot.checker.CheckerIRI ; +import org.apache.jena.riot.out.EscapeStr ; import org.apache.jena.riot.system.ErrorHandler ; import org.apache.jena.riot.system.ErrorHandlerFactory ; import org.apache.jena.riot.system.RiotLib ; @@ -453,144 +455,20 @@ public class ParserBase // { return unescape(s, '\\', true, line, column) ; } + // Do we nee dthe line/column versions? + // Why not catch exceptions and comvert to QueryParseException + public static String unescapeStr(String s, int line, int column) { return unescape(s, '\\', false, line, column) ; } // Worker function - public static String unescape(String s, char escape, boolean pointCodeOnly, int line, int column) - { - int i = s.indexOf(escape) ; - - if ( i == -1 ) - return s ; - - // Dump the initial part straight into the string buffer - StringBuilder sb = new StringBuilder(s.substring(0,i)) ; - - for ( ; i < s.length() ; i++ ) - { - char ch = s.charAt(i) ; - // Keep line and column numbers. - switch (ch) - { - case '\n': - case '\r': - line++ ; - column = 1 ; - break ; - default: - column++ ; - break ; - } - - if ( ch != escape ) - { - sb.append(ch) ; - continue ; - } - - // Escape - if ( i >= s.length()-1 ) - throwParseException("Illegal escape at end of string", line, column) ; - char ch2 = s.charAt(i+1) ; - column = column+1 ; - i = i + 1 ; - - // \\u and \\U - if ( ch2 == 'u' ) - { - // i points to the \ so i+6 is next character - if ( i+4 >= s.length() ) - throwParseException("\\u escape too short", line, column) ; - int x = hex(s, i+1, 4, line, column) ; - sb.append((char)x) ; - // Jump 1 2 3 4 -- already skipped \ and u - i = i+4 ; - column = column+4 ; - continue ; - } - if ( ch2 == 'U' ) - { - // i points to the \ so i+6 is next character - if ( i+8 >= s.length() ) - throwParseException("\\U escape too short", line, column) ; - int x = hex(s, i+1, 8, line, column) ; - // Convert to UTF-16 codepoint pair. - sb.append((char)x) ; - // Jump 1 2 3 4 5 6 7 8 -- already skipped \ and u - i = i+8 ; - column = column+8 ; - continue ; - } - - // Are we doing just point code escapes? - // If so, \X-anything else is legal as a literal "\" and "X" - - if ( pointCodeOnly ) - { - sb.append('\\') ; - sb.append(ch2) ; - i = i + 1 ; - continue ; - } - - // Not just codepoints. Must be a legal escape. - char ch3 = 0 ; - switch (ch2) - { - case 'n': ch3 = '\n' ; break ; - case 't': ch3 = '\t' ; break ; - case 'r': ch3 = '\r' ; break ; - case 'b': ch3 = '\b' ; break ; - case 'f': ch3 = '\f' ; break ; - case '\'': ch3 = '\'' ; break ; - case '\"': ch3 = '\"' ; break ; - case '\\': ch3 = '\\' ; break ; - default: - throwParseException("Unknown escape: \\"+ch2, line, column) ; - } - sb.append(ch3) ; - } - return sb.toString() ; - } - - // Line and column that started the escape - public static int hex(String s, int i, int len, int line, int column) - { -// if ( i+len >= s.length() ) -// { -// -// } - int x = 0 ; - for ( int j = i ; j < i+len ; j++ ) - { - char ch = s.charAt(j) ; - column++ ; - int k = 0 ; - switch (ch) - { - case '0': k = 0 ; break ; - case '1': k = 1 ; break ; - case '2': k = 2 ; break ; - case '3': k = 3 ; break ; - case '4': k = 4 ; break ; - case '5': k = 5 ; break ; - case '6': k = 6 ; break ; - case '7': k = 7 ; break ; - case '8': k = 8 ; break ; - case '9': k = 9 ; break ; - case 'A': case 'a': k = 10 ; break ; - case 'B': case 'b': k = 11 ; break ; - case 'C': case 'c': k = 12 ; break ; - case 'D': case 'd': k = 13 ; break ; - case 'E': case 'e': k = 14 ; break ; - case 'F': case 'f': k = 15 ; break ; - default: - throwParseException("Illegal hex escape: "+ch, line, column) ; - } - x = (x<<4)+k ; + public static String unescape(String s, char escape, boolean pointCodeOnly, int line, int column) { + try { + return EscapeStr.unescape(s, escape, pointCodeOnly) ; + } catch (AtlasException ex) { + throwParseException(ex.getMessage(), line, column) ; + return null ; } - return x ; } public static String unescapePName(String s, int line, int column)
