[ https://issues.apache.org/jira/browse/AVRO-3001?focusedWorklogId=804024&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-804024 ]
ASF GitHub Bot logged work on AVRO-3001: ---------------------------------------- Author: ASF GitHub Bot Created on: 26/Aug/22 17:25 Start Date: 26/Aug/22 17:25 Worklog Time Spent: 10m Work Description: KalleOlaviNiemitalo commented on code in PR #1833: URL: https://github.com/apache/avro/pull/1833#discussion_r956252918 ########## lang/csharp/src/apache/main/IO/Parsing/Symbol.cs: ########## @@ -0,0 +1,983 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections; +using System.Collections.Generic; +using System.Linq; + +namespace Avro.IO.Parsing +{ + /// <summary> + /// Symbol is the base of all symbols (terminals and non-terminals) of the + /// grammar. + /// </summary> + public abstract class Symbol + { + /// <summary> + /// The type of symbol. + /// </summary> + public enum Kind + { + /// <summary> + /// terminal symbols which have no productions </summary> + Terminal, + + /// <summary> + /// Start symbol for some grammar </summary> + Root, + + /// <summary> + /// non-terminal symbol which is a sequence of one or more other symbols </summary> + Sequence, + + /// <summary> + /// non-terminal to represent the contents of an array or map </summary> + Repeater, + + /// <summary> + /// non-terminal to represent the union </summary> + Alternative, + + /// <summary> + /// non-terminal action symbol which are automatically consumed </summary> + ImplicitAction, + + /// <summary> + /// non-terminal action symbol which is explicitly consumed </summary> + ExplicitAction + } + + /// The kind of this symbol. + public Kind SymKind { get; private set; } + + /// <summary> + /// The production for this symbol. If this symbol is a terminal this is + /// <tt>null</tt>. Otherwise this holds the the sequence of the symbols that + /// forms the production for this symbol. The sequence is in the reverse order of + /// production. This is useful for easy copying onto parsing stack. + /// + /// Please note that this is a final. So the production for a symbol should be + /// known before that symbol is constructed. This requirement cannot be met for + /// those symbols which are recursive (e.g. a record that holds union a branch of + /// which is the record itself). To resolve this problem, we initialize the + /// symbol with an array of nulls. Later we fill the symbols. Not clean, but + /// works. The other option is to not have this field a final. But keeping it + /// final and thus keeping symbol immutable gives some comfort. See various + /// generators how we generate records. + /// </summary> + public Symbol[] Production { get; private set; } + + /// <summary> + /// Constructs a new symbol of the given kind. + /// </summary> + protected Symbol(Kind kind) : this(kind, null) + { + } + + /// <summary> + /// Constructs a new symbol of the given kind and production. + /// </summary> + protected Symbol(Kind kind, Symbol[] production) + { + Production = production; + SymKind = kind; + } + + /// <summary> + /// A convenience method to construct a root symbol. + /// </summary> + public static Symbol NewRoot(params Symbol[] symbols) => new Root(symbols); + + /// <summary> + /// A convenience method to construct a sequence. + /// </summary> + /// <param name="production"> The constituent symbols of the sequence. </param> + public static Symbol NewSeq(params Symbol[] production) => new Sequence(production); + + /// <summary> + /// A convenience method to construct a repeater. + /// </summary> + /// <param name="endSymbol"> The end symbol. </param> + /// <param name="symsToRepeat"> The symbols to repeat in the repeater. </param> + public static Symbol NewRepeat(Symbol endSymbol, params Symbol[] symsToRepeat) => + new Repeater(endSymbol, symsToRepeat); + + /// <summary> + /// A convenience method to construct a union. + /// </summary> + public static Symbol NewAlt(Symbol[] symbols, string[] labels) => new Alternative(symbols, labels); + + /// <summary> + /// A convenience method to construct an ErrorAction. + /// </summary> + /// <param name="e"> </param> + protected static Symbol Error(string e) => new ErrorAction(e); + + /// <summary> + /// A convenience method to construct a ResolvingAction. + /// </summary> + /// <param name="w"> The writer symbol </param> + /// <param name="r"> The reader symbol </param> + protected static Symbol Resolve(Symbol w, Symbol r) => new ResolvingAction(w, r); + + /// <summary> + /// Fixup symbol. + /// </summary> + protected class Fixup + { + private readonly Symbol[] symbols; + + /// <summary> + /// The symbols. + /// </summary> + public Symbol[] Symbols + { + get { return (Symbol[])symbols.Clone(); } + } + + /// <summary> + /// The position. + /// </summary> + public int Pos { get; private set; } + + /// <summary> + /// Initializes a new instance of the <see cref="Fixup"/> class. + /// </summary> + public Fixup(Symbol[] symbols, int pos) + { + this.symbols = (Symbol[])symbols.Clone(); + Pos = pos; + } + } + + /// <summary> + /// Flatten the given sub-array of symbols into a sub-array of symbols. + /// </summary> + protected virtual Symbol Flatten(IDictionary<Sequence, Sequence> map, IDictionary<Sequence, IList<Fixup>> map2) => this; + + /// <summary> + /// Returns the flattened size. + /// </summary> + public virtual int FlattenedSize() => 1; + + /// <summary> + /// Flattens the given sub-array of symbols into an sub-array of symbols. Every + /// <tt>Sequence</tt> in the input are replaced by its production recursively. + /// Non-<tt>Sequence</tt> symbols, they internally have other symbols those + /// internal symbols also get flattened. When flattening is done, the only place + /// there might be Sequence symbols is in the productions of a Repeater, + /// Alternative, or the symToParse and symToSkip in a UnionAdjustAction or + /// SkipAction. + /// + /// Why is this done? We want our parsers to be fast. If we left the grammars + /// unflattened, then the parser would be constantly copying the contents of + /// nested Sequence productions onto the parsing stack. Instead, because of + /// flattening, we have a long top-level production with no Sequences unless the + /// Sequence is absolutely needed, e.g., in the case of a Repeater or an + /// Alternative. + /// + /// Well, this is not exactly true when recursion is involved. Where there is a + /// recursive record, that record will be "inlined" once, but any internal (ie, + /// recursive) references to that record will be a Sequence for the record. That + /// Sequence will not further inline itself Issue Time Tracking ------------------- Worklog Id: (was: 804024) Time Spent: 8h 40m (was: 8.5h) > JsonEncode Decode support for C# > -------------------------------- > > Key: AVRO-3001 > URL: https://issues.apache.org/jira/browse/AVRO-3001 > Project: Apache Avro > Issue Type: Improvement > Components: csharp > Affects Versions: 1.10.0, 1.11.0 > Reporter: Krishnan Unni > Assignee: Robert Yokota > Priority: Major > Labels: pull-request-available > Fix For: 1.12.0 > > Time Spent: 8h 40m > Remaining Estimate: 0h > > The C# library for avro currently supports only the Binary encoding and also > with compile time types (Generic support only). As part of a project I am > doing I need to validate the avro schema against the incoming json data on > the fly without a predefined type (generated class). So basically comparing > an avro schema (string/json representation) against a raw json string. It is > possible with the Java library since it supports both non generic types and > streams as well as json encoding. With C# currently this is not possible. Is > there a plan to extend the C# library to provide these features? If yes, is > there a timeline? If not is there any alternative to achieve this? -- This message was sent by Atlassian Jira (v8.20.10#820010)