chaokunyang commented on code in PR #2492:
URL: https://github.com/apache/fory/pull/2492#discussion_r2298178388


##########
rust/fory-core/src/meta/type_meta.rs:
##########
@@ -45,125 +128,217 @@ impl FieldInfo {
         }
     }
 
-    fn from_bytes(reader: &mut Reader) -> FieldInfo {
+    pub fn from_bytes(reader: &mut Reader) -> FieldInfo {
         let header = reader.u8();
-        let encoding = Self::u8_to_encoding((header & 0b11000) >> 3).unwrap();
-        let mut size = (header & 0b11100000) as i32 >> 5;
-        size = if size == 0b111 {
-            reader.var_int32() + 7
-        } else {
-            size
-        };
-        let type_id = reader.i16();
+        // println!("read field_header:{:?}", header);
+        // let nullability = (header & 0b10) != 0;
+        // let ref_tracking = (header & 0b1) != 0;
+        // let decoding_idx = (header >> 6) & 0b11;
+        // println!("decoding_idx:{:?}", decoding_idx);
+        let encoding = Self::u8_to_encoding((header >> 6) & 0b11).unwrap();
+        let mut name_size = ((header & 0b0011_1100) >> 2) as usize;
+        if name_size == 15 {
+            name_size += reader.var_int32() as usize;
+        }
+        name_size += 1;
+
+        let field_type = FieldTypeResolver::from_bytes(reader, false);
+
+        let field_name_bytes = reader.bytes(name_size);
+
         let field_name = MetaStringDecoder::new()
-            .decode(reader.bytes(size as usize), encoding)
+            .decode(field_name_bytes, encoding)
             .unwrap();
         FieldInfo {
             field_name,
-            field_id: type_id,
+            field_type,
         }
     }
 
     fn to_bytes(&self) -> Result<Vec<u8>, Error> {
+        // field_bytes: | header | type_info | field_name |
         let mut writer = Writer::default();
-        let meta_string = MetaStringEncoder::new().encode(&self.field_name)?;
-        let mut header = 1 << 2;
-        let encoded = meta_string.bytes.as_slice();
-        let size = encoded.len() as u32;
-        header |= (meta_string.encoding as u8) << 3;
-        let big_size = size >= 7;
-        if big_size {
-            header |= 0b11100000;
-            writer.u8(header);
-            writer.var_int32((size - 7) as i32);
-        } else {
-            header |= (size << 5) as u8;
-            writer.u8(header);
+        // header: | field_name_encoding:2bits | size:4bits | nullability:1bit 
| ref_tracking:1bit |
+        let encoding_options: &[Encoding] = &[
+            Encoding::Utf8,
+            Encoding::AllToLowerSpecial,
+            Encoding::LowerUpperDigitSpecial,
+        ];
+        let meta_string = MetaStringEncoder::new()
+            .set_options(Some(encoding_options))
+            .encode(&self.field_name)?;
+        let name_encoded = meta_string.bytes.as_slice();
+        let name_size = name_encoded.len() - 1;
+        let mut header: u8 = (min(0b1111, name_size) as u8) << 2;
+        let ref_tracking = false;
+        let nullability = false;
+        if ref_tracking {
+            header |= 1;
+        }
+        if nullability {
+            header |= 0b10;
+        }
+        let encoding_idx = encoding_options
+            .iter()
+            .position(|x| *x == meta_string.encoding)
+            .unwrap() as u8;
+        header |= encoding_idx << 6;
+        writer.u8(header);
+        if name_size >= 15 {
+            writer.var_int32((name_size - 15) as i32);
         }
-        writer.i16(self.field_id);
-        writer.bytes(encoded);
+        // write type_info
+        self.field_type.to_bytes(&mut writer, false)?;
+        // write field_name
+        writer.bytes(name_encoded);
         Ok(writer.dump())
     }
 }
 
+#[derive(Debug)]
 pub struct TypeMetaLayer {
     type_id: u32,
-    field_info: Vec<FieldInfo>,
+    field_infos: Vec<FieldInfo>,
 }
 
 impl TypeMetaLayer {
-    pub fn new(type_id: u32, field_info: Vec<FieldInfo>) -> TypeMetaLayer {
+    pub fn new(type_id: u32, field_infos: Vec<FieldInfo>) -> TypeMetaLayer {
         TypeMetaLayer {
             type_id,
-            field_info,
+            field_infos,
         }
     }
 
     pub fn get_type_id(&self) -> u32 {
         self.type_id
     }
 
-    pub fn get_field_info(&self) -> &Vec<FieldInfo> {
-        &self.field_info
+    pub fn get_field_infos(&self) -> &Vec<FieldInfo> {
+        &self.field_infos
     }
 
     fn to_bytes(&self) -> Result<Vec<u8>, Error> {
+        // layer_bytes:| meta_header | fields meta |
         let mut writer = Writer::default();
-        writer.var_int32(self.field_info.len() as i32);
-        writer.var_int32(self.type_id as i32);
-        for field in self.field_info.iter() {
+        let num_fields = self.field_infos.len() - 1;
+        let is_register_by_name = false;
+        // meta_header: | unuse:2 bits | is_register_by_id:1 bit | 
num_fields:4 bits |
+        let mut meta_header: u8 = min(num_fields, 0b1111) as u8;
+        if is_register_by_name {
+            meta_header |= 0b10_0000;
+        }
+        // println!("write meta_header:{:?}", meta_header);
+        writer.u8(meta_header);
+        if num_fields >= 0b1_1111 {
+            writer.var_int32(num_fields as i32 - 0b1_1111);
+        }
+        if is_register_by_name {
+            todo!()
+        } else {
+            writer.var_int32(self.type_id as i32);
+        }
+        for field in self.field_infos.iter() {
+            // println!("cur field:{:?}", field);
             writer.bytes(field.to_bytes()?.as_slice());
         }
         Ok(writer.dump())
     }
 
     fn from_bytes(reader: &mut Reader) -> TypeMetaLayer {
-        let field_num = reader.var_int32();
-        let type_id = reader.var_int32() as u32;
-        let field_info = (0..field_num)
-            .map(|_| FieldInfo::from_bytes(reader))
-            .collect();
-        TypeMetaLayer::new(type_id, field_info)
+        let meta_header = reader.u8();
+        // println!("read meta_header:{:?}", meta_header);
+        // let is_register_by_name = (meta_header & 0b10_0000) == 1;
+        let is_register_by_name = false;
+        let mut num_fields = (meta_header & 0b1111) as i32;
+        if num_fields == 15 {
+            num_fields += reader.var_int32();
+        }
+        num_fields += 1;
+        let type_id;
+        if is_register_by_name {
+            todo!()
+        } else {
+            type_id = reader.var_int32() as u32;
+        }
+        let mut field_infos = Vec::with_capacity(num_fields as usize);
+        for _ in 0..num_fields {
+            field_infos.push(FieldInfo::from_bytes(reader));
+        }
+
+        TypeMetaLayer::new(type_id, field_infos)
     }
 }
 
+#[derive(Debug)]
 pub struct TypeMeta {
-    hash: u64,
+    // hash: u64,
     layers: Vec<TypeMetaLayer>,
 }
 
 impl TypeMeta {
-    pub fn get_field_info(&self) -> &Vec<FieldInfo> {
-        self.layers.first().unwrap().get_field_info()
+    pub fn get_field_infos(&self) -> &Vec<FieldInfo> {
+        self.layers.first().unwrap().get_field_infos()
     }
 
     pub fn get_type_id(&self) -> u32 {
         self.layers.first().unwrap().get_type_id()
     }
 
-    pub fn from_fields(type_id: u32, field_info: Vec<FieldInfo>) -> TypeMeta {
+    pub fn from_fields(type_id: u32, field_infos: Vec<FieldInfo>) -> TypeMeta {
         TypeMeta {
-            hash: 0,
-            layers: vec![TypeMetaLayer::new(type_id, field_info)],
+            // hash: 0,
+            layers: vec![TypeMetaLayer::new(type_id, field_infos)],
         }
     }
-
+    #[allow(unused_assignments)]
     pub fn from_bytes(reader: &mut Reader) -> TypeMeta {
         let header = reader.u64();
-        let hash = header >> 8; // high 56bits indicate hash
-        let layer_count = header & 0b1111; // class count
-        let layers: Vec<TypeMetaLayer> = (0..layer_count)
-            .map(|_| TypeMetaLayer::from_bytes(reader))
-            .collect();
-        TypeMeta { hash, layers }
+        let mut meta_size = header & META_SIZE_MASK;
+        if meta_size == META_SIZE_MASK {
+            meta_size += reader.var_int32() as u64;
+        }
+
+        // let write_fields_meta = (header & (1 << 12)) != 0;
+        // let is_compressed: bool = (header & (1 << 13)) != 0;
+        // let meta_hash = header >> 14;
+
+        let mut layers = Vec::new();
+        // let current_meta_size = 0;
+        // while current_meta_size < meta_size {}
+        let layer = TypeMetaLayer::from_bytes(reader);
+        layers.push(layer);
+        TypeMeta { layers }
     }
 
     pub fn to_bytes(&self) -> Result<Vec<u8>, Error> {
-        let mut writer = Writer::default();
-        writer.u64((self.hash << 8) | (self.layers.len() as u64 & 0b1111));
-        for layer in self.layers.iter() {
-            writer.bytes(layer.to_bytes()?.as_slice());
+        // println!("{:#?}", self);
+        // | global_binary_header | layers_bytes |
+        let mut result = Writer::default();
+        let mut layers_writer = Writer::default();
+        // for layer in self.layers.iter() {
+        //     layers_writer.bytes(layer.to_bytes()?.as_slice());
+        // }
+        
layers_writer.bytes(self.layers.first().unwrap().to_bytes()?.as_slice());
+        // global_binary_header:| hash:50bits | is_compressed:1bit | 
write_fields_meta:1bit | meta_size:12bits |
+        let meta_size = layers_writer.len() as u64;
+        let mut header: u64 = min(META_SIZE_MASK, meta_size);
+        let write_meta_fields_flag = true;
+        if write_meta_fields_flag {
+            header |= 1 << 12;

Review Comment:
   Could we declate static variable for those numbers?



##########
rust/fory-core/src/meta/type_meta.rs:
##########
@@ -45,125 +128,217 @@ impl FieldInfo {
         }
     }
 
-    fn from_bytes(reader: &mut Reader) -> FieldInfo {
+    pub fn from_bytes(reader: &mut Reader) -> FieldInfo {
         let header = reader.u8();
-        let encoding = Self::u8_to_encoding((header & 0b11000) >> 3).unwrap();
-        let mut size = (header & 0b11100000) as i32 >> 5;
-        size = if size == 0b111 {
-            reader.var_int32() + 7
-        } else {
-            size
-        };
-        let type_id = reader.i16();
+        // println!("read field_header:{:?}", header);
+        // let nullability = (header & 0b10) != 0;
+        // let ref_tracking = (header & 0b1) != 0;
+        // let decoding_idx = (header >> 6) & 0b11;
+        // println!("decoding_idx:{:?}", decoding_idx);
+        let encoding = Self::u8_to_encoding((header >> 6) & 0b11).unwrap();
+        let mut name_size = ((header & 0b0011_1100) >> 2) as usize;
+        if name_size == 15 {
+            name_size += reader.var_int32() as usize;
+        }
+        name_size += 1;
+
+        let field_type = FieldTypeResolver::from_bytes(reader, false);
+
+        let field_name_bytes = reader.bytes(name_size);
+
         let field_name = MetaStringDecoder::new()
-            .decode(reader.bytes(size as usize), encoding)
+            .decode(field_name_bytes, encoding)
             .unwrap();
         FieldInfo {
             field_name,
-            field_id: type_id,
+            field_type,
         }
     }
 
     fn to_bytes(&self) -> Result<Vec<u8>, Error> {
+        // field_bytes: | header | type_info | field_name |
         let mut writer = Writer::default();
-        let meta_string = MetaStringEncoder::new().encode(&self.field_name)?;
-        let mut header = 1 << 2;
-        let encoded = meta_string.bytes.as_slice();
-        let size = encoded.len() as u32;
-        header |= (meta_string.encoding as u8) << 3;
-        let big_size = size >= 7;
-        if big_size {
-            header |= 0b11100000;
-            writer.u8(header);
-            writer.var_int32((size - 7) as i32);
-        } else {
-            header |= (size << 5) as u8;
-            writer.u8(header);
+        // header: | field_name_encoding:2bits | size:4bits | nullability:1bit 
| ref_tracking:1bit |
+        let encoding_options: &[Encoding] = &[
+            Encoding::Utf8,
+            Encoding::AllToLowerSpecial,
+            Encoding::LowerUpperDigitSpecial,
+        ];
+        let meta_string = MetaStringEncoder::new()
+            .set_options(Some(encoding_options))
+            .encode(&self.field_name)?;
+        let name_encoded = meta_string.bytes.as_slice();
+        let name_size = name_encoded.len() - 1;
+        let mut header: u8 = (min(0b1111, name_size) as u8) << 2;
+        let ref_tracking = false;
+        let nullability = false;
+        if ref_tracking {
+            header |= 1;
+        }
+        if nullability {
+            header |= 0b10;
+        }
+        let encoding_idx = encoding_options
+            .iter()
+            .position(|x| *x == meta_string.encoding)
+            .unwrap() as u8;
+        header |= encoding_idx << 6;
+        writer.u8(header);
+        if name_size >= 15 {
+            writer.var_int32((name_size - 15) as i32);
         }
-        writer.i16(self.field_id);
-        writer.bytes(encoded);
+        // write type_info
+        self.field_type.to_bytes(&mut writer, false)?;
+        // write field_name
+        writer.bytes(name_encoded);
         Ok(writer.dump())
     }
 }
 
+#[derive(Debug)]
 pub struct TypeMetaLayer {
     type_id: u32,
-    field_info: Vec<FieldInfo>,
+    field_infos: Vec<FieldInfo>,
 }
 
 impl TypeMetaLayer {
-    pub fn new(type_id: u32, field_info: Vec<FieldInfo>) -> TypeMetaLayer {
+    pub fn new(type_id: u32, field_infos: Vec<FieldInfo>) -> TypeMetaLayer {
         TypeMetaLayer {
             type_id,
-            field_info,
+            field_infos,
         }
     }
 
     pub fn get_type_id(&self) -> u32 {
         self.type_id
     }
 
-    pub fn get_field_info(&self) -> &Vec<FieldInfo> {
-        &self.field_info
+    pub fn get_field_infos(&self) -> &Vec<FieldInfo> {
+        &self.field_infos
     }
 
     fn to_bytes(&self) -> Result<Vec<u8>, Error> {
+        // layer_bytes:| meta_header | fields meta |
         let mut writer = Writer::default();
-        writer.var_int32(self.field_info.len() as i32);
-        writer.var_int32(self.type_id as i32);
-        for field in self.field_info.iter() {
+        let num_fields = self.field_infos.len() - 1;
+        let is_register_by_name = false;
+        // meta_header: | unuse:2 bits | is_register_by_id:1 bit | 
num_fields:4 bits |
+        let mut meta_header: u8 = min(num_fields, 0b1111) as u8;
+        if is_register_by_name {
+            meta_header |= 0b10_0000;
+        }
+        // println!("write meta_header:{:?}", meta_header);
+        writer.u8(meta_header);
+        if num_fields >= 0b1_1111 {
+            writer.var_int32(num_fields as i32 - 0b1_1111);
+        }
+        if is_register_by_name {
+            todo!()
+        } else {
+            writer.var_int32(self.type_id as i32);
+        }
+        for field in self.field_infos.iter() {
+            // println!("cur field:{:?}", field);
             writer.bytes(field.to_bytes()?.as_slice());
         }
         Ok(writer.dump())
     }
 
     fn from_bytes(reader: &mut Reader) -> TypeMetaLayer {
-        let field_num = reader.var_int32();
-        let type_id = reader.var_int32() as u32;
-        let field_info = (0..field_num)
-            .map(|_| FieldInfo::from_bytes(reader))
-            .collect();
-        TypeMetaLayer::new(type_id, field_info)
+        let meta_header = reader.u8();
+        // println!("read meta_header:{:?}", meta_header);
+        // let is_register_by_name = (meta_header & 0b10_0000) == 1;
+        let is_register_by_name = false;
+        let mut num_fields = (meta_header & 0b1111) as i32;
+        if num_fields == 15 {
+            num_fields += reader.var_int32();
+        }
+        num_fields += 1;
+        let type_id;
+        if is_register_by_name {
+            todo!()
+        } else {
+            type_id = reader.var_int32() as u32;
+        }
+        let mut field_infos = Vec::with_capacity(num_fields as usize);
+        for _ in 0..num_fields {
+            field_infos.push(FieldInfo::from_bytes(reader));
+        }
+
+        TypeMetaLayer::new(type_id, field_infos)
     }
 }
 
+#[derive(Debug)]
 pub struct TypeMeta {
-    hash: u64,
+    // hash: u64,
     layers: Vec<TypeMetaLayer>,
 }
 
 impl TypeMeta {
-    pub fn get_field_info(&self) -> &Vec<FieldInfo> {
-        self.layers.first().unwrap().get_field_info()
+    pub fn get_field_infos(&self) -> &Vec<FieldInfo> {
+        self.layers.first().unwrap().get_field_infos()
     }
 
     pub fn get_type_id(&self) -> u32 {
         self.layers.first().unwrap().get_type_id()
     }
 
-    pub fn from_fields(type_id: u32, field_info: Vec<FieldInfo>) -> TypeMeta {
+    pub fn from_fields(type_id: u32, field_infos: Vec<FieldInfo>) -> TypeMeta {
         TypeMeta {
-            hash: 0,
-            layers: vec![TypeMetaLayer::new(type_id, field_info)],
+            // hash: 0,
+            layers: vec![TypeMetaLayer::new(type_id, field_infos)],
         }
     }
-
+    #[allow(unused_assignments)]
     pub fn from_bytes(reader: &mut Reader) -> TypeMeta {
         let header = reader.u64();
-        let hash = header >> 8; // high 56bits indicate hash
-        let layer_count = header & 0b1111; // class count
-        let layers: Vec<TypeMetaLayer> = (0..layer_count)
-            .map(|_| TypeMetaLayer::from_bytes(reader))
-            .collect();
-        TypeMeta { hash, layers }
+        let mut meta_size = header & META_SIZE_MASK;
+        if meta_size == META_SIZE_MASK {
+            meta_size += reader.var_int32() as u64;
+        }
+
+        // let write_fields_meta = (header & (1 << 12)) != 0;
+        // let is_compressed: bool = (header & (1 << 13)) != 0;
+        // let meta_hash = header >> 14;
+
+        let mut layers = Vec::new();
+        // let current_meta_size = 0;
+        // while current_meta_size < meta_size {}
+        let layer = TypeMetaLayer::from_bytes(reader);
+        layers.push(layer);
+        TypeMeta { layers }
     }
 
     pub fn to_bytes(&self) -> Result<Vec<u8>, Error> {
-        let mut writer = Writer::default();
-        writer.u64((self.hash << 8) | (self.layers.len() as u64 & 0b1111));
-        for layer in self.layers.iter() {
-            writer.bytes(layer.to_bytes()?.as_slice());
+        // println!("{:#?}", self);
+        // | global_binary_header | layers_bytes |
+        let mut result = Writer::default();
+        let mut layers_writer = Writer::default();
+        // for layer in self.layers.iter() {
+        //     layers_writer.bytes(layer.to_bytes()?.as_slice());
+        // }
+        
layers_writer.bytes(self.layers.first().unwrap().to_bytes()?.as_slice());
+        // global_binary_header:| hash:50bits | is_compressed:1bit | 
write_fields_meta:1bit | meta_size:12bits |
+        let meta_size = layers_writer.len() as u64;
+        let mut header: u64 = min(META_SIZE_MASK, meta_size);
+        let write_meta_fields_flag = true;
+        if write_meta_fields_flag {
+            header |= 1 << 12;

Review Comment:
   Could we declare static variable for those numbers?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to