To help clarify my understanding of Thrift's IDL and Protocol Structure, I
transliterated the following two documents into ABNF (RFC5234) documents.
http://wiki.apache.org/thrift/ThriftIDL
http://svn.apache.org/viewvc/incubator/thrift/trunk/doc/thrift.bnf?view=markup
Due to discrepancies between the above documents and a few of the thrift
implementations that I sampled, the correct specification is unclear to
me. I'd appreciate any corrections and/or feedback on the attached ABNF
documents.
regards,
Joe N.
--
[email protected]
;; Document ;;
;; [1] Document ::= Header* Definition*
Document = *Header *Definition
;; Header ;;
;; [2] Header ::= Include | CppInclude | Namespace
Header = Include/ CppInclude/ Namespace
;;; Thrift Include ;;;
;; [3] Include ::= 'include' Literal
Include = "include" Literal
;;; C++ Include ;;;
;; [4] CppInclude ::= 'cpp_include' Literal
CppInclude = "cpp_include" Literal
;;; Namespace ;;;
;; [5] Namespace ::= ( 'namespace' ( NamespaceScope Identifier ) |\
;; ( 'smalltalk.category' STIdentifier ) |\
;; ( 'smalltalk.prefix' Identifier ) ) |\
;; ( 'php_namespace' Literal ) |\
;; ( 'xsd_namespace' Literal )
Namespace = ( "namespace" NamespaceValue )/
( "php_namespace" Literal )/
( "xsd_namespace" Literal )
NamespaceValue = ( NamespaceScope Identifier )/
( "smalltalk.category" STIdentifier )/
( "smalltalk.prefix" Identifier )
;; [6] NamespaceScope ::= '*' | 'cpp' | 'java' | 'py' | 'perl' | 'rb' | 'cocoa'
| 'csharp'
NamespaceScope = "*"/ "cpp"/ "java"/ "py"/ "perl"/ "rb"/ "cocoa"/ "csharp"
;; Definition ;;
;; [7] Definition ::= Const | Typedef | Enum | Senum | Struct | Exception |
Service
Definition = Const/ Typedef/ Enum/ Senum/ Struct/ Exception/ Service
;;; Const ;;;
;; [8] Const ::= 'const' FieldType Identifier '=' ConstValue [ListSeparator]
Const = "const" FieldType Identifier "=" ConstValue [ListSeparator]
;;; Typedef ;;;
;; [9] Typedef ::= 'typedef' DefinitionType Identifier
Typedef = "typedef" DefinitionType Identifier
;;; Enum ;;;
;; [10] Enum ::= 'enum' Identifier '{' (Identifier ('=' IntConstant)?
ListSeparator?)* '}'
Enum = "enum" Identifier "{" *(Identifier [("=" IntConstant)]
[ListSeparator]) "}"
;;; Senum ;;;
;; [11] Senum ::= 'senum' Identifier '{' (Literal ListSeparator?)* '}'
Senum = "senum" Identifier "{" *(Literal [ListSeparator]) "}"
;;; Struct ;;;
;; [12] Struct ::= 'struct' Identifier 'xsd_all'? '{' Field* '}'
Struct = "struct" Identifier ["xsd_all"] "{" *Field "}"
;;; Exception ;;;
;; [13] Exception ::= 'exception' Identifier '{' Field* '}'
Exception = "exception" Identifier "{" *Field "}"
;;; Service ;;;
;; [14] Service ::= 'service' Identifier ( 'extends' Identifier )? '{'
Function* '}'
Service = "service" Identifier [( "extends" Identifier )] "{" *Function
"}"
;; Field ;;
;; [15] Field ::= FieldID? FieldReq? FieldType Identifier ('= ConstValue)?
XsdFieldOptions ListSeparator?
Field = [FieldID] [FieldReq] FieldType Identifier [("=" ConstValue)]
XsdFieldOptions [ListSeparator]
;;; Field ID ;;;
;; [16] FieldID ::= IntConstant ':'
FieldID = IntConstant ":"
;;; Field Requiredness ;;;
;; [17] FieldReq ::= 'required' | 'optional'
FieldReq = "required"/ "optional"
;;; XSD Options ;;;
;; [18] XsdFieldOptions ::= 'xsd_optional'? 'xsd_nillable'? XsdAttrs?
XsdFieldOptions= ["xsd_optional"] ["xsd_nillable"] [XsdAttrs]
;; [19] XsdAttrs ::= 'xsd_attrs' '{' Field* '}'
XsdAttrs = "xsd_attrs" "{" *Field "}"
;; Functions ;;
;; [20] Function ::= 'oneway'? FunctionType Identifier '(' Field* ')' Throws?
ListSeparator?
Function = ["oneway"] FunctionType Identifier "(" *Field ")" [Throws]
[ListSeparator]
;; [21] FunctionType ::= FieldType | 'void'
FunctionType = FieldType/ "void"
;; [22] Throws ::= 'throws' '(' Field* ')'
Throws = "throws" "(" *Field ")"
;; Types ;;
;; [23] FieldType ::= Identifier | BaseType | ContainerType
FieldType = Identifier/ BaseType/ ContainerType
;; [24] DefinitionType ::= BaseType | ContainerType
DefinitionType = BaseType/ ContainerType
;; [25] BaseType ::= 'bool' | 'byte' | 'i16' | 'i32' | 'i64' | 'double' |\
;; 'string' | 'binary' | 'slist'
BaseType = "bool"/ "byte"/ "i16"/ "i32"/ "i64"/ "double"/
"string"/ "binary"/ "slist"
;; [26] ContainerType ::= MapType | SetType | ListType
ContainerType = MapType/ SetType/ ListType
;; [27] MapType ::= 'map' CppType? '<' FieldType ',' FieldType '>'
MapType = "map" [CppType] "<" FieldType "," FieldType ">"
;; [28] SetType ::= 'set' CppType? '<' FieldType '>'
SetType = "set" [CppType] "<" FieldType ">"
;; [29] ListType ::= 'list' '<' FieldType '>' CppType?
ListType = "list" "<" FieldType ">" [CppType]
;; [30] CppType ::= 'cpp_type' Literal
CppType = "cpp_type" Literal
;; Constant Values ;;
;; [31] ConstValue ::= IntConstant | DoubleConstant | Literal | Identifier |
ConstList | ConstMap
ConstValue = IntConstant/ DoubleConstant/ Literal/ Identifier/ ConstList/
ConstMap
;; [32] IntConstant ::= ('+' | '-')? Digit+
IntConstant = [("+"/ "-")] 1*Digit
;; [33] DoubleConstant ::= ('+' | '-')? Digit* ('.' Digit+)? ( ('E' | 'e')
IntConstant )?
DoubleConstant = [("+"/ "-")] *Digit [("." 1*Digit)] [( "E" IntConstant )]
;; [34] ConstList ::= '[' (ConstValue ListSeparator?)* ']'
ConstList = "[" *(ConstValue [ListSeparator]) "]"
;; [35] ConstMap ::= '{' (ConstValue ':' ConstValue ListSeparator?)* '}'
ConstMap = "{" *(ConstValue ":" ConstValue [ListSeparator]) "}"
;; Basic Definitions ;;
;;; Literal ;;;
;; [36] Literal ::= ('"' [^"]* '"') | ("'" [^']* "'")
Literal = (%x22 *(%x20-21 / %x23-7E) %x22)/ (%x27 *(%x20-26 / %x28-7E)
%x27)
;;; Identifier ;;;
;; [37] Identifier ::= ( Letter | '_' ) ( Letter | Digit | '.' | '_' )*
Identifier = ( Letter/ "_" ) *( Letter/ Digit/ "."/ "_" )
;; [38] STIdentifier ::= ( Letter | '_' ) ( Letter | Digit | '.' | '_' | '-' )*
STIdentifier = ( Letter/ "_" ) *( Letter/ Digit/ "."/ "_"/ "-" )
;;; List Separator ;;;
;; [39] ListSeparator ::= ',' | ';'
ListSeparator = ","/ ";"
;;; Letters and Digits ;;;
;; [40] Letter ::= ['A'-'Z'] | ['a'-'z']
Letter = %x41-5A/ %x61-7A
;; [41] Digit ::= ['0'-'9']
Digit = %x30-39
BOOL = %x00/ %x01
BYTE = OCTET
I08 = OCTET
I16 = 2*OCTET
I32 = 4*OCTET
U64 = 8*OCTET
I64 = 8*OCTET
DOUBLE = 8*OCTET
STRING = I32 UTF8-octets
BINARY = I32 *OCTET
T-CALL = %x01
T-REPLY = %x02
T-EXCEPTION = %x03
T-ONEWAY = %x04
T-STOP = %x00
T-VOID = %x01
T-BOOL = %x02
T-BYTE = %x03
T-I08 = %x05
T-I16 = %x06
T-I32 = %x08
T-U64 = %x09
T-I64 = %x0a
T-DOUBLE = %x04
T-STRING = %x0b
T-BINARY = %x0b
T-STRUCT = %x0c
T-MAP = %x0d
T-SET = %x0e
T-LIST = %x0f
;; <message> ::= <message-begin> <struct> <message-end>
message = message-begin struct message-end
;; <message-begin> ::= <method-name> <message-type> <message-seqid>
message-begin = method-name message-type message-seqid
message-end = ""
;; <method-name> ::= STRING
method-name = STRING
;; <message-type> ::= T_CALL | T_REPLY | T_EXCEPTION
message-type = T-CALL/ T-REPLY/ T-EXCEPTION
;; <message-seqid> ::= I32
message-seqid = I32
;; <struct> ::= <struct-begin> <field>* <field-stop> <struct-end>
struct = struct-begin *field field-stop struct-end
;; <struct-begin> ::= <struct-name>
struct-begin = struct-name
struct-end = ""
;; <struct-name> ::= STRING
struct-name = STRING
;; <field-stop> ::= T_STOP
field-stop = T-STOP
;; <field> ::= <field-begin> <field-data> <field-end>
field = field-begin field-data field-end
;; <field-begin> ::= <field-name> <field-type> <field-id>
field-begin = field-name field-type field-id
field-end = ""
;; <field-name> ::= STRING
field-name = STRING
;; <field-type> ::= T_BOOL | T_BYTE | T_I8 | T_I16 | T_I32 | T_I64 | T_DOUBLE
;; | T_STRING | T_BINARY | T_STRUCT | T_MAP | T_SET | T_LIST
field-type = T-STOP/ T-VOID/ T-BOOL/ T-BYTE/ T-I08/ T-I16/ T-I32/ T-U64/
T-I64/ T-DOUBLE/
T-STRING/ T-BINARY/ T-STRUCT/ T-MAP/ T-SET/ T-LIST
;; <field-id> ::= I16
field-id = I16
;; <field-data> ::= I8 | I16 | I32 | I64 | DOUBLE | STRING | BINARY
;; <struct> | <map> | <list> | <set>
field-data = BOOL/ I08/ I16/ I32/ U64/ I64/ DOUBLE/ STRING/ BINARY/
struct/ map/ list/ set
;; TBD - is this correct?
field-datum = field-data field-data
;; <map> ::= <map-begin> <field-datum>* <map-end>
map = map-begin *field-datum map-end
;; <map-begin> ::= <map-key-type> <map-value-type> <map-size>
map-begin = map-key-type map-value-type map-size
map-end = ""
;; <map-key-type> ::= <field-type>
map-key-type = field-type
;;<map-value-type> ::= <field-type>
map-value-type = field-type
;; <map-size> ::= I32
map-size = I32
;; <list> ::= <list-begin> <field-data>* <list-end>
list = list-begin *field-data list-end
;; <list-begin> ::= <list-elem-type> <list-size>
list-begin = list-elem-type list-size
list-end = ""
;; <list-elem-type> ::= <field-type>
list-elem-type = field-type
;; <list-size> ::= I32
list-size = I32
;; <set> ::= <set-begin> <field-data>* <set-end>
set = set-begin *field-data set-end
;; <set-begin> ::= <set-elem-type> <set-size>
set-begin = set-elem-type set-size
set-end = ""
;; <set-elem-type> ::= <field-type>
set-elem-type = field-type
;; <set-size> ::= I32
set-size = I32
ALPHA = %x41-5A/ %x61-7A
BIT = "0"/ "1"
CHAR = %x01-7F
CR = %x0D
CRLF = LF/ (CR LF)
CTL = %x00-1F/ %x7F
DIGIT = %x30-39
DQUOTE = %x22
HEXDIG = DIGIT/ "A"/ "B"/ "C"/ "D"/ "E"/ "F"
HTAB = %x09
LF = %x0A
LWSP = *(WSP/ CRLF WSP)
OCTET = %x00-FF
SP = %x20
VCHAR = %x21-7E
WSP = SP/ HTAB
UTF8-octets = *( UTF8-char )
UTF8-char = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4
UTF8-1 = %x00-7F
UTF8-2 = %xC2-DF UTF8-tail
UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
%xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
%xF4 %x80-8F 2( UTF8-tail )
UTF8-tail = %x80-BF