This is an automated email from the ASF dual-hosted git repository.
rskraba pushed a commit to branch branch-1.11
in repository https://gitbox.apache.org/repos/asf/avro.git
The following commit(s) were added to refs/heads/branch-1.11 by this push:
new 41b248100 AVRO-3560: Throw SchemaParseException on dangling content in
avsc beyond end of schema (#1748)
41b248100 is described below
commit 41b248100774e145262cbee2122384f69f93b8c2
Author: Radai Rosenblatt <[email protected]>
AuthorDate: Wed Jun 14 09:23:05 2023 -0700
AVRO-3560: Throw SchemaParseException on dangling content in avsc beyond
end of schema (#1748)
---
.../avro/src/main/java/org/apache/avro/Schema.java | 29 ++++++++++++++---
.../src/test/java/org/apache/avro/TestSchema.java | 36 ++++++++++++++++++++++
.../apache/avro/io/TestResolvingIOResolving.java | 2 +-
.../io/parsing/TestResolvingGrammarGenerator.java | 6 ++--
4 files changed, 64 insertions(+), 9 deletions(-)
diff --git a/lang/java/avro/src/main/java/org/apache/avro/Schema.java
b/lang/java/avro/src/main/java/org/apache/avro/Schema.java
index f6c3de768..97513f52f 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/Schema.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/Schema.java
@@ -26,11 +26,13 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.DoubleNode;
import com.fasterxml.jackson.databind.node.NullNode;
+import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.Serializable;
import java.io.StringWriter;
+import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
@@ -1429,7 +1431,7 @@ public abstract class Schema extends JsonProperties
implements Serializable {
* names known to this parser.
*/
public Schema parse(File file) throws IOException {
- return parse(FACTORY.createParser(file));
+ return parse(FACTORY.createParser(file), false);
}
/**
@@ -1437,7 +1439,7 @@ public abstract class Schema extends JsonProperties
implements Serializable {
* names known to this parser. The input stream stays open after the
parsing.
*/
public Schema parse(InputStream in) throws IOException {
- return
parse(FACTORY.createParser(in).disable(JsonParser.Feature.AUTO_CLOSE_SOURCE));
+ return
parse(FACTORY.createParser(in).disable(JsonParser.Feature.AUTO_CLOSE_SOURCE),
true);
}
/** Read a schema from one or more json strings */
@@ -1454,19 +1456,36 @@ public abstract class Schema extends JsonProperties
implements Serializable {
*/
public Schema parse(String s) {
try {
- return parse(FACTORY.createParser(s));
+ return parse(FACTORY.createParser(s), false);
} catch (IOException e) {
throw new SchemaParseException(e);
}
}
- private Schema parse(JsonParser parser) throws IOException {
+ private Schema parse(JsonParser parser, boolean allowDanglingContent)
throws IOException {
boolean saved = validateNames.get();
boolean savedValidateDefaults = VALIDATE_DEFAULTS.get();
try {
validateNames.set(validate);
VALIDATE_DEFAULTS.set(validateDefaults);
- return Schema.parse(MAPPER.readTree(parser), names);
+ JsonNode jsonNode = MAPPER.readTree(parser);
+ Schema schema = Schema.parse(jsonNode, names);
+ if (!allowDanglingContent) {
+ String dangling;
+ StringWriter danglingWriter = new StringWriter();
+ int numCharsReleased = parser.releaseBuffered(danglingWriter);
+ if (numCharsReleased == -1) {
+ ByteArrayOutputStream danglingOutputStream = new
ByteArrayOutputStream();
+ parser.releaseBuffered(danglingOutputStream); // if input isnt
chars above it must be bytes
+ dangling = new String(danglingOutputStream.toByteArray(),
StandardCharsets.UTF_8).trim();
+ } else {
+ dangling = danglingWriter.toString().trim();
+ }
+ if (!dangling.isEmpty()) {
+ throw new SchemaParseException("dangling content after end of
schema: " + dangling);
+ }
+ }
+ return schema;
} catch (JsonParseException e) {
throw new SchemaParseException(e);
} finally {
diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchema.java
b/lang/java/avro/src/test/java/org/apache/avro/TestSchema.java
index 95cb36746..a4e2718cc 100644
--- a/lang/java/avro/src/test/java/org/apache/avro/TestSchema.java
+++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchema.java
@@ -21,10 +21,14 @@ import static org.junit.Assert.*;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
@@ -395,4 +399,36 @@ public class TestSchema {
assertEquals("Int", nameInt.getQualified("space"));
}
+ @Test(expected = SchemaParseException.class)
+ public void testContentAfterAvsc() throws Exception {
+ Schema.Parser parser = new Schema.Parser();
+ parser.setValidate(true);
+ parser.setValidateDefaults(true);
+ parser.parse("{\"type\": \"string\"}; DROP TABLE STUDENTS");
+ }
+
+ @Test
+ public void testContentAfterAvscInInputStream() throws Exception {
+ Schema.Parser parser = new Schema.Parser();
+ parser.setValidate(true);
+ parser.setValidateDefaults(true);
+ String avsc = "{\"type\": \"string\"}; DROP TABLE STUDENTS";
+ ByteArrayInputStream is = new
ByteArrayInputStream(avsc.getBytes(StandardCharsets.UTF_8));
+ Schema schema = parser.parse(is);
+ assertNotNull(schema);
+ }
+
+ @Test(expected = SchemaParseException.class)
+ public void testContentAfterAvscInFile() throws Exception {
+ File avscFile = Files.createTempFile("testContentAfterAvscInFile",
null).toFile();
+ try (FileWriter writer = new FileWriter(avscFile)) {
+ writer.write("{\"type\": \"string\"}; DROP TABLE STUDENTS");
+ writer.flush();
+ }
+
+ Schema.Parser parser = new Schema.Parser();
+ parser.setValidate(true);
+ parser.setValidateDefaults(true);
+ parser.parse(avscFile);
+ }
}
diff --git
a/lang/java/avro/src/test/java/org/apache/avro/io/TestResolvingIOResolving.java
b/lang/java/avro/src/test/java/org/apache/avro/io/TestResolvingIOResolving.java
index 8e3dc8e53..abde027e2 100644
---
a/lang/java/avro/src/test/java/org/apache/avro/io/TestResolvingIOResolving.java
+++
b/lang/java/avro/src/test/java/org/apache/avro/io/TestResolvingIOResolving.java
@@ -101,7 +101,7 @@ public class TestResolvingIOResolving {
"{\"type\":\"record\",\"name\":\"outer\",\"fields\":[" +
"{\"name\": \"g1\", "
+
"\"type\":{\"type\":\"record\",\"name\":\"inner\",\"fields\":["
+ "{\"name\":\"f1\", \"type\":\"int\", \"default\": 101}," +
"{\"name\":\"f2\", \"type\":\"int\"}]}}, "
- + "{\"name\": \"g2\", \"type\": \"long\"}]}}",
+ + "{\"name\": \"g2\", \"type\": \"long\"}]}",
"RRIIL", new Object[] { 10, 101, 11L } },
// Default value for a record.
{ "{\"type\":\"record\",\"name\":\"outer\",\"fields\":[" + "{\"name\":
\"g2\", \"type\": \"long\"}]}", "L",
diff --git
a/lang/java/avro/src/test/java/org/apache/avro/io/parsing/TestResolvingGrammarGenerator.java
b/lang/java/avro/src/test/java/org/apache/avro/io/parsing/TestResolvingGrammarGenerator.java
index 4eac760ce..212bc3ad3 100644
---
a/lang/java/avro/src/test/java/org/apache/avro/io/parsing/TestResolvingGrammarGenerator.java
+++
b/lang/java/avro/src/test/java/org/apache/avro/io/parsing/TestResolvingGrammarGenerator.java
@@ -103,9 +103,9 @@ public class TestResolvingGrammarGenerator {
public static Collection<Object[]> data() {
Collection<Object[]> ret = Arrays.asList(new Object[][] {
{ "{ \"type\": \"record\", \"name\": \"r\", \"fields\": [ " + " {
\"name\" : \"f1\", \"type\": \"int\" }, "
- + " { \"name\" : \"f2\", \"type\": \"float\" } " + "] } }", "{
\"f2\": 10.4, \"f1\": 10 } " },
- { "{ \"type\": \"enum\", \"name\": \"e\", \"symbols\": " + "[ \"s1\",
\"s2\"] } }", " \"s1\" " },
- { "{ \"type\": \"enum\", \"name\": \"e\", \"symbols\": " + "[ \"s1\",
\"s2\"] } }", " \"s2\" " },
+ + " { \"name\" : \"f2\", \"type\": \"float\" } " + "] }", "{
\"f2\": 10.4, \"f1\": 10 } " },
+ { "{ \"type\": \"enum\", \"name\": \"e\", \"symbols\": " + "[ \"s1\",
\"s2\"] }", " \"s1\" " },
+ { "{ \"type\": \"enum\", \"name\": \"e\", \"symbols\": " + "[ \"s1\",
\"s2\"] }", " \"s2\" " },
{ "{ \"type\": \"fixed\", \"name\": \"f\", \"size\": 10 }",
"\"hello\"" },
{ "{ \"type\": \"array\", \"items\": \"int\" }", "[ 10, 20, 30 ]" },
{ "{ \"type\": \"map\", \"values\": \"int\" }", "{ \"k1\": 10, \"k3\":
20, \"k3\": 30 }" },