This is an automated email from the ASF dual-hosted git repository.

rskraba pushed a commit to branch branch-1.11
in repository https://gitbox.apache.org/repos/asf/avro.git


The following commit(s) were added to refs/heads/branch-1.11 by this push:
     new 41b248100 AVRO-3560: Throw SchemaParseException on dangling content in 
avsc beyond end of schema (#1748)
41b248100 is described below

commit 41b248100774e145262cbee2122384f69f93b8c2
Author: Radai Rosenblatt <[email protected]>
AuthorDate: Wed Jun 14 09:23:05 2023 -0700

    AVRO-3560: Throw SchemaParseException on dangling content in avsc beyond 
end of schema (#1748)
---
 .../avro/src/main/java/org/apache/avro/Schema.java | 29 ++++++++++++++---
 .../src/test/java/org/apache/avro/TestSchema.java  | 36 ++++++++++++++++++++++
 .../apache/avro/io/TestResolvingIOResolving.java   |  2 +-
 .../io/parsing/TestResolvingGrammarGenerator.java  |  6 ++--
 4 files changed, 64 insertions(+), 9 deletions(-)

diff --git a/lang/java/avro/src/main/java/org/apache/avro/Schema.java 
b/lang/java/avro/src/main/java/org/apache/avro/Schema.java
index f6c3de768..97513f52f 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/Schema.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/Schema.java
@@ -26,11 +26,13 @@ import com.fasterxml.jackson.databind.ObjectMapper;
 import com.fasterxml.jackson.databind.node.DoubleNode;
 import com.fasterxml.jackson.databind.node.NullNode;
 
+import java.io.ByteArrayOutputStream;
 import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.Serializable;
 import java.io.StringWriter;
+import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
@@ -1429,7 +1431,7 @@ public abstract class Schema extends JsonProperties 
implements Serializable {
      * names known to this parser.
      */
     public Schema parse(File file) throws IOException {
-      return parse(FACTORY.createParser(file));
+      return parse(FACTORY.createParser(file), false);
     }
 
     /**
@@ -1437,7 +1439,7 @@ public abstract class Schema extends JsonProperties 
implements Serializable {
      * names known to this parser. The input stream stays open after the 
parsing.
      */
     public Schema parse(InputStream in) throws IOException {
-      return 
parse(FACTORY.createParser(in).disable(JsonParser.Feature.AUTO_CLOSE_SOURCE));
+      return 
parse(FACTORY.createParser(in).disable(JsonParser.Feature.AUTO_CLOSE_SOURCE), 
true);
     }
 
     /** Read a schema from one or more json strings */
@@ -1454,19 +1456,36 @@ public abstract class Schema extends JsonProperties 
implements Serializable {
      */
     public Schema parse(String s) {
       try {
-        return parse(FACTORY.createParser(s));
+        return parse(FACTORY.createParser(s), false);
       } catch (IOException e) {
         throw new SchemaParseException(e);
       }
     }
 
-    private Schema parse(JsonParser parser) throws IOException {
+    private Schema parse(JsonParser parser, boolean allowDanglingContent) 
throws IOException {
       boolean saved = validateNames.get();
       boolean savedValidateDefaults = VALIDATE_DEFAULTS.get();
       try {
         validateNames.set(validate);
         VALIDATE_DEFAULTS.set(validateDefaults);
-        return Schema.parse(MAPPER.readTree(parser), names);
+        JsonNode jsonNode = MAPPER.readTree(parser);
+        Schema schema = Schema.parse(jsonNode, names);
+        if (!allowDanglingContent) {
+          String dangling;
+          StringWriter danglingWriter = new StringWriter();
+          int numCharsReleased = parser.releaseBuffered(danglingWriter);
+          if (numCharsReleased == -1) {
+            ByteArrayOutputStream danglingOutputStream = new 
ByteArrayOutputStream();
+            parser.releaseBuffered(danglingOutputStream); // if input isnt 
chars above it must be bytes
+            dangling = new String(danglingOutputStream.toByteArray(), 
StandardCharsets.UTF_8).trim();
+          } else {
+            dangling = danglingWriter.toString().trim();
+          }
+          if (!dangling.isEmpty()) {
+            throw new SchemaParseException("dangling content after end of 
schema: " + dangling);
+          }
+        }
+        return schema;
       } catch (JsonParseException e) {
         throw new SchemaParseException(e);
       } finally {
diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchema.java 
b/lang/java/avro/src/test/java/org/apache/avro/TestSchema.java
index 95cb36746..a4e2718cc 100644
--- a/lang/java/avro/src/test/java/org/apache/avro/TestSchema.java
+++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchema.java
@@ -21,10 +21,14 @@ import static org.junit.Assert.*;
 
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileWriter;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.ObjectInputStream;
 import java.io.ObjectOutputStream;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
@@ -395,4 +399,36 @@ public class TestSchema {
     assertEquals("Int", nameInt.getQualified("space"));
   }
 
+  @Test(expected = SchemaParseException.class)
+  public void testContentAfterAvsc() throws Exception {
+    Schema.Parser parser = new Schema.Parser();
+    parser.setValidate(true);
+    parser.setValidateDefaults(true);
+    parser.parse("{\"type\": \"string\"}; DROP TABLE STUDENTS");
+  }
+
+  @Test
+  public void testContentAfterAvscInInputStream() throws Exception {
+    Schema.Parser parser = new Schema.Parser();
+    parser.setValidate(true);
+    parser.setValidateDefaults(true);
+    String avsc = "{\"type\": \"string\"}; DROP TABLE STUDENTS";
+    ByteArrayInputStream is = new 
ByteArrayInputStream(avsc.getBytes(StandardCharsets.UTF_8));
+    Schema schema = parser.parse(is);
+    assertNotNull(schema);
+  }
+
+  @Test(expected = SchemaParseException.class)
+  public void testContentAfterAvscInFile() throws Exception {
+    File avscFile = Files.createTempFile("testContentAfterAvscInFile", 
null).toFile();
+    try (FileWriter writer = new FileWriter(avscFile)) {
+      writer.write("{\"type\": \"string\"}; DROP TABLE STUDENTS");
+      writer.flush();
+    }
+
+    Schema.Parser parser = new Schema.Parser();
+    parser.setValidate(true);
+    parser.setValidateDefaults(true);
+    parser.parse(avscFile);
+  }
 }
diff --git 
a/lang/java/avro/src/test/java/org/apache/avro/io/TestResolvingIOResolving.java 
b/lang/java/avro/src/test/java/org/apache/avro/io/TestResolvingIOResolving.java
index 8e3dc8e53..abde027e2 100644
--- 
a/lang/java/avro/src/test/java/org/apache/avro/io/TestResolvingIOResolving.java
+++ 
b/lang/java/avro/src/test/java/org/apache/avro/io/TestResolvingIOResolving.java
@@ -101,7 +101,7 @@ public class TestResolvingIOResolving {
             "{\"type\":\"record\",\"name\":\"outer\",\"fields\":[" + 
"{\"name\": \"g1\", "
                 + 
"\"type\":{\"type\":\"record\",\"name\":\"inner\",\"fields\":["
                 + "{\"name\":\"f1\", \"type\":\"int\", \"default\": 101}," + 
"{\"name\":\"f2\", \"type\":\"int\"}]}}, "
-                + "{\"name\": \"g2\", \"type\": \"long\"}]}}",
+                + "{\"name\": \"g2\", \"type\": \"long\"}]}",
             "RRIIL", new Object[] { 10, 101, 11L } },
         // Default value for a record.
         { "{\"type\":\"record\",\"name\":\"outer\",\"fields\":[" + "{\"name\": 
\"g2\", \"type\": \"long\"}]}", "L",
diff --git 
a/lang/java/avro/src/test/java/org/apache/avro/io/parsing/TestResolvingGrammarGenerator.java
 
b/lang/java/avro/src/test/java/org/apache/avro/io/parsing/TestResolvingGrammarGenerator.java
index 4eac760ce..212bc3ad3 100644
--- 
a/lang/java/avro/src/test/java/org/apache/avro/io/parsing/TestResolvingGrammarGenerator.java
+++ 
b/lang/java/avro/src/test/java/org/apache/avro/io/parsing/TestResolvingGrammarGenerator.java
@@ -103,9 +103,9 @@ public class TestResolvingGrammarGenerator {
   public static Collection<Object[]> data() {
     Collection<Object[]> ret = Arrays.asList(new Object[][] {
         { "{ \"type\": \"record\", \"name\": \"r\", \"fields\": [ " + " { 
\"name\" : \"f1\", \"type\": \"int\" }, "
-            + " { \"name\" : \"f2\", \"type\": \"float\" } " + "] } }", "{ 
\"f2\": 10.4, \"f1\": 10 } " },
-        { "{ \"type\": \"enum\", \"name\": \"e\", \"symbols\": " + "[ \"s1\", 
\"s2\"] } }", " \"s1\" " },
-        { "{ \"type\": \"enum\", \"name\": \"e\", \"symbols\": " + "[ \"s1\", 
\"s2\"] } }", " \"s2\" " },
+            + " { \"name\" : \"f2\", \"type\": \"float\" } " + "] }", "{ 
\"f2\": 10.4, \"f1\": 10 } " },
+        { "{ \"type\": \"enum\", \"name\": \"e\", \"symbols\": " + "[ \"s1\", 
\"s2\"] }", " \"s1\" " },
+        { "{ \"type\": \"enum\", \"name\": \"e\", \"symbols\": " + "[ \"s1\", 
\"s2\"] }", " \"s2\" " },
         { "{ \"type\": \"fixed\", \"name\": \"f\", \"size\": 10 }", 
"\"hello\"" },
         { "{ \"type\": \"array\", \"items\": \"int\" }", "[ 10, 20, 30 ]" },
         { "{ \"type\": \"map\", \"values\": \"int\" }", "{ \"k1\": 10, \"k3\": 
20, \"k3\": 30 }" },

Reply via email to