abdullah alamoudi has submitted this change and it was merged. Change subject: Fix Decoding of byte[] Records ......................................................................
Fix Decoding of byte[] Records Change-Id: I71c3d8b8dfa5a98123725f139247d2b5ce10012e Reviewed-on: https://asterix-gerrit.ics.uci.edu/951 Reviewed-by: Yingyi Bu <buyin...@gmail.com> Reviewed-by: Jenkins <jenk...@fulliautomatix.ics.uci.edu> Tested-by: Jenkins <jenk...@fulliautomatix.ics.uci.edu> --- M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/CharArrayRecord.java M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/converter/DCPMessageToRecordConverter.java A asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/parser/test/ByteBufUTF8DecodeTest.java A asterixdb/asterix-external-data/src/test/resources/ICanEatGlass.txt A asterixdb/asterix-external-data/src/test/resources/record.json 5 files changed, 637 insertions(+), 10 deletions(-) Approvals: Yingyi Bu: Looks good to me, approved Jenkins: Looks good to me, but someone else must approve; Verified diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/CharArrayRecord.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/CharArrayRecord.java index f174962..33f9673 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/CharArrayRecord.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/CharArrayRecord.java @@ -69,7 +69,7 @@ throw new IOException( "Record is too large!. Maximum record size is " + ExternalDataConstants.MAX_RECORD_SIZE); } - int newSize = Math.min((int)(len * ExternalDataConstants.DEFAULT_BUFFER_INCREMENT_FACTOR), + int newSize = Math.min((int) (len * ExternalDataConstants.DEFAULT_BUFFER_INCREMENT_FACTOR), ExternalDataConstants.MAX_RECORD_SIZE); value = Arrays.copyOf(value, newSize); } @@ -88,7 +88,7 @@ @Override public String toString() { - return String.valueOf(value, 0, size); + return String.valueOf(value, 0, size == 0 ? 0 : size - 1); } public void endRecord() throws IOException { diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/converter/DCPMessageToRecordConverter.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/converter/DCPMessageToRecordConverter.java index 6ce5e98..01466fd 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/converter/DCPMessageToRecordConverter.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/converter/DCPMessageToRecordConverter.java @@ -39,17 +39,15 @@ import com.couchbase.client.deps.io.netty.buffer.ByteBuf; import com.couchbase.client.deps.io.netty.util.ReferenceCountUtil; -public class DCPMessageToRecordConverter - implements IRecordToRecordWithMetadataAndPKConverter<DCPRequest, char[]> { +public class DCPMessageToRecordConverter implements IRecordToRecordWithMetadataAndPKConverter<DCPRequest, char[]> { private final RecordWithMetadataAndPK<char[]> recordWithMetadata; private final CharArrayRecord value; private final CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder(); - private final ByteBuffer bytes = ByteBuffer.allocateDirect(ExternalDataConstants.DEFAULT_BUFFER_SIZE); + private final ByteBuffer bytes = ByteBuffer.allocate(ExternalDataConstants.DEFAULT_BUFFER_SIZE); private final CharBuffer chars = CharBuffer.allocate(ExternalDataConstants.DEFAULT_BUFFER_SIZE); - private static final IAType[] CB_META_TYPES = new IAType[] { /*ID*/BuiltinType.ASTRING, - /*VBID*/BuiltinType.AINT32, /*SEQ*/BuiltinType.AINT64, /*CAS*/BuiltinType.AINT64, - /*EXPIRATION*/BuiltinType.AINT32, + private static final IAType[] CB_META_TYPES = new IAType[] { /*ID*/BuiltinType.ASTRING, /*VBID*/BuiltinType.AINT32, + /*SEQ*/BuiltinType.AINT64, /*CAS*/BuiltinType.AINT64, /*EXPIRATION*/BuiltinType.AINT32, /*FLAGS*/BuiltinType.AINT32, /*REV*/BuiltinType.AINT64, /*LOCK*/BuiltinType.AINT32 }; private static final int[] PK_INDICATOR = { 1 }; private static final int[] PK_INDEXES = { 0 }; @@ -105,16 +103,22 @@ int position = content.readerIndex(); final int limit = content.writerIndex(); final int contentSize = content.readableBytes(); + bytes.clear(); while (position < limit) { - bytes.clear(); chars.clear(); if ((contentSize - position) < bytes.capacity()) { bytes.limit(contentSize - position); } - content.getBytes(position, bytes); + content.getBytes(position + bytes.position(), bytes); position += bytes.position(); bytes.flip(); decoder.decode(bytes, chars, false); + if (bytes.hasRemaining()) { + bytes.compact(); + position -= bytes.position(); + } else { + bytes.clear(); + } chars.flip(); record.append(chars); } diff --git a/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/parser/test/ByteBufUTF8DecodeTest.java b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/parser/test/ByteBufUTF8DecodeTest.java new file mode 100644 index 0000000..c238f1c --- /dev/null +++ b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/parser/test/ByteBufUTF8DecodeTest.java @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.asterix.external.parser.test; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.net.URISyntaxException; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.StandardCharsets; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; + +import org.apache.asterix.external.api.IRawRecord; +import org.apache.asterix.external.input.record.CharArrayRecord; +import org.apache.asterix.external.input.record.converter.DCPMessageToRecordConverter; +import org.apache.asterix.external.input.record.reader.stream.SemiStructuredRecordReader; +import org.apache.asterix.external.input.stream.LocalFSInputStream; +import org.apache.asterix.external.util.FileSystemWatcher; +import org.junit.Assert; +import org.junit.Test; + +import com.couchbase.client.deps.io.netty.buffer.ByteBuf; +import com.couchbase.client.deps.io.netty.buffer.UnpooledByteBufAllocator; + +public class ByteBufUTF8DecodeTest { + + private final int BUFFER_SIZE = 8; // Small buffer size to ensure multiple loop execution in the decode call + private final int KB32 = 32768; + private final CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder(); + private final ByteBuffer bytes = ByteBuffer.allocate(BUFFER_SIZE); + private final CharBuffer chars = CharBuffer.allocate(BUFFER_SIZE); + private final CharArrayRecord value = new CharArrayRecord(); + private final ByteBuf nettyBuffer = UnpooledByteBufAllocator.DEFAULT.heapBuffer(KB32, Integer.MAX_VALUE); + + @Test + public void eatGlass() { + try { + String fileName = getClass().getResource("/ICanEatGlass.txt").toURI().getPath(); + try (BufferedReader br = new BufferedReader(new FileReader(new File(fileName)))) { + for (String line; (line = br.readLine()) != null;) { + process(line); + } + } + } catch (Throwable e) { + e.printStackTrace(); + Assert.fail(e.getMessage()); + } + } + + @Test + public void testDecodingJsonRecords() throws URISyntaxException, IOException { + String jsonFileName = "/record.json"; + List<Path> paths = new ArrayList<>(); + paths.add(Paths.get(getClass().getResource(jsonFileName).toURI())); + FileSystemWatcher watcher = new FileSystemWatcher(paths, null, false); + LocalFSInputStream in = new LocalFSInputStream(watcher); + try (SemiStructuredRecordReader recordReader = new SemiStructuredRecordReader(in, "{", "}")) { + while (recordReader.hasNext()) { + try { + IRawRecord<char[]> record = recordReader.next(); + process(record.toString()); + } catch (Throwable th) { + th.printStackTrace(); + Assert.fail(th.getMessage()); + } + } + } + } + + private void process(String input) throws IOException { + value.reset(); + nettyBuffer.clear(); + nettyBuffer.writeBytes(input.getBytes(StandardCharsets.UTF_8)); + DCPMessageToRecordConverter.set(nettyBuffer, decoder, bytes, chars, value); + Assert.assertEquals(input, value.toString()); + } +} diff --git a/asterixdb/asterix-external-data/src/test/resources/ICanEatGlass.txt b/asterixdb/asterix-external-data/src/test/resources/ICanEatGlass.txt new file mode 100644 index 0000000..a3d9ca6 --- /dev/null +++ b/asterixdb/asterix-external-data/src/test/resources/ICanEatGlass.txt @@ -0,0 +1,149 @@ +Sanskrit: काचं शक्नोम्यत्तुम् । नोपहिनस्ति माम् ॥ +Sanskrit (standard transcription): kācaṃ śaknomyattum; nopahinasti mām. +Classical Greek: ὕαλον ϕαγεῖν δύναμαι· τοῦτο οὔ με βλάπτει. +Greek (monotonic): Μπορώ να φάω σπασμένα γυαλιά χωρίς να πάθω τίποτα. +Greek (polytonic): Μπορῶ νὰ φάω σπασμένα γυαλιὰ χωρὶς νὰ πάθω τίποτα. +Latin: Vitrum edere possum; mihi non nocet. +Old French: Je puis mangier del voirre. Ne me nuit. +French: Je peux manger du verre, ça ne me fait pas mal. +Provençal / Occitan: Pòdi manjar de veire, me nafrariá pas. +Québécois: J'peux manger d'la vitre, ça m'fa pas mal. +Walloon: Dji pou magnî do vêre, çoula m' freut nén må. +Picard: Ch'peux mingi du verre, cha m'foé mie n'ma. +Kreyòl Ayisyen (Haitï): Mwen kap manje vè, li pa blese'm. +Basque: Kristala jan dezaket, ez dit minik ematen. +Catalan / Català: Puc menjar vidre, que no em fa mal. +Spanish: Puedo comer vidrio, no me hace daño. +Aragonés: Puedo minchar beire, no me'n fa mal. +Galician: Eu podo xantar cristais e non cortarme. +European Portuguese: Posso comer vidro, não me faz mal. +Brazilian Portuguese: Posso comer vidro, não me machuca. +Caboverdiano/Kabuverdianu (Cape Verde): M' podê cumê vidru, ca ta maguâ-m'. +Papiamentu: Ami por kome glas anto e no ta hasimi daño. +Italian: Posso mangiare il vetro e non mi fa male. +Milanese: Sôn bôn de magnà el véder, el me fa minga mal. +Roman: Me posso magna' er vetro, e nun me fa male. +Napoletano: M' pozz magna' o'vetr, e nun m' fa mal. +Venetian: Mi posso magnare el vetro, no'l me fa mae. +Zeneise (Genovese): Pòsso mangiâ o veddro e o no me fà mâ. +Sicilian: Puotsu mangiari u vitru, nun mi fa mali. +Romansch (Grischun): Jau sai mangiar vaider, senza che quai fa donn a mai. +Romanian: Pot să mănânc sticlă și ea nu mă rănește. +Esperanto: Mi povas manĝi vitron, ĝi ne damaĝas min. +Cornish: Mý a yl dybry gwéder hag éf ny wra ow ankenya. +Welsh: Dw i'n gallu bwyta gwydr, 'dyw e ddim yn gwneud dolur i mi. +Manx Gaelic: Foddym gee glonney agh cha jean eh gortaghey mee. +Old Irish (Ogham): ᚛᚛ᚉᚑᚅᚔᚉᚉᚔᚋ ᚔᚈᚔ ᚍᚂᚐᚅᚑ ᚅᚔᚋᚌᚓᚅᚐ᚜ +Old Irish (Latin): Con·iccim ithi nglano. Ním·géna. +Irish: Is féidir liom gloinne a ithe. Ní dhéanann sí dochar ar bith dom. +Ulster Gaelic: Ithim-sa gloine agus ní miste damh é. +Scottish Gaelic: S urrainn dhomh gloinne ithe; cha ghoirtich i mi. +Anglo-Saxon (Runes): ᛁᚳ᛫ᛗᚨᚷ᛫ᚷᛚᚨᛋ᛫ᛖᚩᛏᚪᚾ᛫ᚩᚾᛞ᛫ᚻᛁᛏ᛫ᚾᛖ᛫ᚻᛖᚪᚱᛗᛁᚪᚧ᛫ᛗᛖ᛬ +Anglo-Saxon (Latin): Ic mæg glæs eotan ond hit ne hearmiað me. +Middle English: Ich canne glas eten and hit hirtiþ me nouȝt. +English: I can eat glass and it doesn't hurt me. +English (IPA): [aɪ kæn iːt glɑːs ænd ɪt dɐz nɒt hɜːt miː] (Received Pronunciation) +English (Braille): ⠊⠀⠉⠁⠝⠀⠑⠁⠞⠀⠛⠇⠁⠎⠎⠀⠁⠝⠙⠀⠊⠞⠀⠙⠕⠑⠎⠝⠞⠀⠓⠥⠗⠞⠀⠍⠑ +Jamaican: Mi kian niam glas han i neba hot mi. +Lalland Scots / Doric: Ah can eat gless, it disnae hurt us. +Gothic: ЌЌЌ ЌЌЌЍ Ќ̈ЍЌЌ, ЌЌ ЌЌЍ ЍЌ ЌЌЌЌ ЌЍЌЌЌЌЌ. +Old Norse (Runes): ᛖᚴ ᚷᛖᛏ ᛖᛏᛁ ᚧ ᚷᛚᛖᚱ ᛘᚾ ᚦᛖᛋᛋ ᚨᚧ ᚡᛖ ᚱᚧᚨ ᛋᚨᚱ +Old Norse (Latin): Ek get etið gler án þess að verða sár. +Norsk / Norwegian (Nynorsk): Eg kan eta glas utan å skada meg. +Norsk / Norwegian (Bokmål): Jeg kan spise glass uten å skade meg. +Føroyskt / Faroese: Eg kann eta glas, skaðaleysur. +Íslenska / Icelandic: Ég get etið gler án þess að meiða mig. +Svenska / Swedish: Jag kan äta glas utan att skada mig. +Dansk / Danish: Jeg kan spise glas, det gør ikke ondt på mig. +Sønderjysk: Æ ka æe glass uhen at det go mæ naue. +Frysk / Frisian: Ik kin glês ite, it docht me net sear. +Nederlands / Dutch: Ik kan glas eten, het doet mij geen kwaad. +Kirchröadsj/Bôchesserplat: Iech ken glaas èèse, mer 't deet miech jing pieng. +Afrikaans: Ek kan glas eet, maar dit doen my nie skade nie. +Lëtzebuergescht / Luxemburgish: Ech kan Glas iessen, daat deet mir nët wei. +Deutsch / German: Ich kann Glas essen, ohne mir zu schaden. +Ruhrdeutsch: Ich kann Glas verkasematuckeln, ohne dattet mich wat jucken tut. +Langenfelder Platt: Isch kann Jlaas kimmeln, uuhne datt mich datt weh dääd. +Lausitzer Mundart ("Lusatian"): Ich koann Gloos assn und doas dudd merr ni wii. +Odenwälderisch: Iech konn glaasch voschbachteln ohne dass es mir ebbs daun doun dud. +Sächsisch / Saxon: 'sch kann Glos essn, ohne dass'sch mer wehtue. +Pfälzisch: Isch konn Glass fresse ohne dasses mer ebbes ausmache dud. +Schwäbisch / Swabian: I kå Glas frässa, ond des macht mr nix! +Deutsch (Voralberg): I ka glas eassa, ohne dass mar weh tuat. +Bayrisch / Bavarian: I koh Glos esa, und es duard ma ned wei. +Allemannisch: I kaun Gloos essen, es tuat ma ned weh. +Schwyzerdütsch (Zürich): Ich chan Glaas ässe, das schadt mir nöd. +Schwyzerdütsch (Luzern): Ech cha Glâs ässe, das schadt mer ned. +Hungarian: Meg tudom enni az üveget, nem lesz tőle bajom. +Suomi / Finnish: Voin syödä lasia, se ei vahingoita minua. +Sami (Northern): Sáhtán borrat lása, dat ii leat bávččas. +Erzian: Мон ярсан суликадо, ды зыян эйстэнзэ а ули. +Northern Karelian: Mie voin syvvä lasie ta minla ei ole kipie. +Southern Karelian: Minä voin syvvä st'oklua dai minule ei ole kibie. +Estonian: Ma võin klaasi süüa, see ei tee mulle midagi. +Latvian: Es varu ēst stiklu, tas man nekaitē. +Lithuanian: Aš galiu valgyti stiklą ir jis manęs nežeidžia. +Czech: Mohu jíst sklo, neublíží mi. +Slovak: Môžem jesť sklo. Nezraní ma. +Polska / Polish: Mogę jeść szkło i mi nie szkodzi. +Slovenian: Lahko jem steklo, ne da bi mi škodovalo. +Bosnian, Croatian, Montenegrin and Serbian (Latin): Ja mogu jesti staklo, i to mi ne šteti. +Bosnian, Montenegrin and Serbian (Cyrillic): Ја могу јести стакло, и то ми не штети. +Macedonian: Можам да јадам стакло, а не ме штета. +Russian: Я могу есть стекло, оно мне не вредит. +Belarusian (Cyrillic): Я магу есці шкло, яно мне не шкодзіць. +Belarusian (Lacinka): Ja mahu jeści škło, jano mne ne škodzić. +Ukrainian: Я можу їсти скло, і воно мені не зашкодить. +Bulgarian: Мога да ям стъкло, то не ми вреди. +Georgian: მინას ვჭამ და არა მტკივა. +Armenian: Կրնամ ապակի ուտել և ինծի անհանգիստ չըներ։. +Albanian: Unë mund të ha qelq dhe nuk më gjen gjë. +Turkish: Cam yiyebilirim, bana zararı dokunmaz. +Turkish (Ottoman): جام ييه بلورم بڭا ضررى طوقونمز +Bangla / Bengali: আমি কাঁচ খেতে পারি, তাতে আমার কোনো ক্ষতি হয় না। +Marathi: मी काच खाऊ शकतो, मला ते दुखत नाही. +Kannada: ನನಗೆ ಹಾನಿ ಆಗದೆ, ನಾನು ಗಜನ್ನು ತಿನಬಹುದು. +Hindi: मैं काँच खा सकता हूँ और मुझे उससे कोई चोट नहीं पहुंचती. +Tamil: நான் கண்ணாடி சாப்பிடுவேன், அதனால் எனக்கு ஒரு கேடும் வராது. +Telugu: నేను గాజు తినగలను మరియు అలా చేసినా నాకు ఏమి ఇబ్బంది లేదు. +Sinhalese: මට වීදුරු කෑමට හැකියි. එයින් මට කිසි හානියක් සිදු නොවේ. +Urdu: میں کانچ کھا سکتا ہوں اور مجھے تکلیف نہیں ہوتی ۔ +Pashto: زه شيشه خوړلې شم، هغه ما نه خوږوي +Farsi: .من می توانم بدونِ احساس درد شيشه بخورم +Arabic: أنا قادر على أكل الزجاج و هذا لا يؤلمني. +Maltese: Nista' niekol il-ħġieġ u ma jagħmilli xejn. +Hebrew: אני יכול לאכול זכוכית וזה לא מזיק לי. +Yiddish: איך קען עסן גלאָז און עס טוט מיר נישט װײ. +Twi: Metumi awe tumpan, ɜnyɜ me hwee. +Hausa (Latin): Inā iya taunar gilāshi kuma in gamā lāfiyā. +Hausa (Ajami): إِنا إِىَ تَونَر غِلَاشِ كُمَ إِن غَمَا لَافِىَا +Yoruba: Mo lè je̩ dígí, kò ní pa mí lára. +Lingala: Nakokí kolíya biténi bya milungi, ekosála ngáí mabé tɛ́. +(Ki)Swahili: Naweza kula bilauri na sikunyui. +Malay: Saya boleh makan kaca dan ia tidak mencederakan saya. +Tagalog: Kaya kong kumain nang bubog at hindi ako masaktan. +Chamorro: Siña yo' chumocho krestat, ti ha na'lalamen yo'. +Fijian: Au rawa ni kana iloilo, ia au sega ni vakacacani kina. +Javanese: Aku isa mangan beling tanpa lara. +Burmese: က္ယ္ဝန္တော္၊က္ယ္ဝန္မ မ္ယက္စားနုိင္သည္။ ၎က္ရောင့္ ထိခုိက္မ္ဟု မရ္ဟိပာ။. +Vietnamese (quốc ngữ): Tôi có thể ăn thủy tinh mà không hại gì. +Vietnamese (nôm): 些 ࣎ 世 咹 水 晶 ও 空 ࣎ 害 咦. +Khmer: ខ្ញុំអាចញុំកញ្ចក់បាន ដោយគ្មានបញ្ហារ. +Lao: ຂອ້ຍກິນແກ້ວໄດ້ໂດຍທີ່ມັນບໍ່ໄດ້ເຮັດໃຫ້ຂອ້ຍເຈັບ. +Thai: ฉันกินกระจกได้ แต่มันไม่ทำให้ฉันเจ็บ. +Mongolian (Cyrillic): Би шил идэй чадна, надад хортой биш. +Mongolian (Classic): ᠪᠢ ᠰᠢᠯᠢ ᠢᠳᠡᠶᠦ ᠴᠢᠳᠠᠨᠠ ᠂ ᠨᠠᠳᠤᠷ ᠬᠣᠤᠷᠠᠳᠠᠢ ᠪᠢᠰᠢ. +Nepali: म काँच खान सक्छू र मलाई केहि नी हुन्न् ।. +Tibetan: ཤེལ་སྒོ་ཟ་ནས་ང་ན་གི་མ་རེད།. +Chinese: 我能吞下玻璃而不伤身体。. +Taiwanese: Góa ē-tàng chia̍h po-lê, mā bē tio̍h-siong. +Japanese: 私はガラスを食べられます。それは私を傷つけません。. +Korean: 나는 유리를 먹을 수 있어요. 그래도 아프지 않아요. +Bislama: Mi save kakae glas, hemi no save katem mi. +Hawaiian: Hiki iaʻu ke ʻai i ke aniani; ʻaʻole nō lā au e ʻeha. +Marquesan: E koʻana e kai i te karahi, mea ʻā, ʻaʻe hauhau. +Inuktitut: ᐊᓕᒍᖅ ᓂᕆᔭᕌᖓᒃᑯ ᓱᕋᙱᑦᑐᓐᓇᖅᑐᖓ. +Chinook Jargon: Naika məkmək kakshət labutay, pi weyk ukuk munk-sik nay. +Navajo: Tsésǫʼ yishą́ągo bííníshghah dóó doo shił neezgai da. +Lojban: mi kakne le nu citka le blaci .iku'i le se go'i na xrani mi. +Nórdicg: Ljœr ye caudran créneþ ý jor cẃran. \ No newline at end of file diff --git a/asterixdb/asterix-external-data/src/test/resources/record.json b/asterixdb/asterix-external-data/src/test/resources/record.json new file mode 100644 index 0000000..9b32a5d --- /dev/null +++ b/asterixdb/asterix-external-data/src/test/resources/record.json @@ -0,0 +1,375 @@ +{ + "quoted_status": { + "in_reply_to_status_id_str": null, + "in_reply_to_status_id": null, + "possibly_sensitive": false, + "coordinates": null, + "created_at": "Wed Sep 02 07:24:48 +0000 2015", + "truncated": false, + "in_reply_to_user_id_str": null, + "source": "<a href=\"http://twitter.com\" rel=\"nofollow\">Twitter Web Client</a>", + "retweet_count": 0, + "retweeted": false, + "geo": null, + "filter_level": "low", + "in_reply_to_screen_name": null, + "entities": { + "urls": [ + { + "expanded_url": "http://www.bigdata-insider.de/infrastruktur/articles/498946/?cmp=sm-tw-swyn&utm_source=twitter&utm_medium=sm&utm_campaign=twitter-swyn", + "display_url": "bigdata-insider.de/infrastruktur/…", + "indices": [ + 54, + 76 + ], + "url": "http://t.co/8inseWDWIE" + } + ], + "hashtags": [ + { + "indices": [ + 16, + 22 + ], + "text": "NoSQL" + }, + { + "indices": [ + 24, + 36 + ], + "text": "Datenbanken" + } + ], + "user_mentions": [ + { + "name": "EnterpriseDB_DE", + "indices": [ + 77, + 93 + ], + "id": 1219531897, + "screen_name": "EnterpriseDB_DE", + "id_str": "1219531897" + } + ], + "trends": [], + "symbols": [] + }, + "id_str": "638975848138285056", + "in_reply_to_user_id": null, + "favorite_count": 0, + "id": 638975848138285000, + "text": "Relationale und #NoSQL- #Datenbanken wachsen zusammen http://t.co/8inseWDWIE @EnterpriseDB_DE", + "place": null, + "contributors": null, + "lang": "de", + "user": { + "utc_offset": null, + "friends_count": 1440, + "profile_image_url_https": "https://pbs.twimg.com/profile_images/494807363572875265/EUm9CELG_normal.jpeg", + "listed_count": 54, + "profile_background_image_url": "http://abs.twimg.com/images/themes/theme1/bg.png", + "default_profile_image": false, + "favourites_count": 11, + "description": "BigData-Insider.de – Entscheiderwissen für Big Data Professionals", + "created_at": "Mon Jun 30 10:40:17 +0000 2014", + "is_translator": false, + "profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme1/bg.png", + "protected": false, + "screen_name": "bigdata_insider", + "id_str": "2596163432", + "profile_link_color": "177535", + "id": 2596163432, + "geo_enabled": false, + "profile_background_color": "55965F", + "lang": "de", + "profile_sidebar_border_color": "FFFFFF", + "profile_text_color": "333333", + "verified": false, + "profile_image_url": "http://pbs.twimg.com/profile_images/494807363572875265/EUm9CELG_normal.jpeg", + "time_zone": null, + "url": "http://www.bigdata-insider.de", + "contributors_enabled": false, + "profile_background_tile": false, + "profile_banner_url": "https://pbs.twimg.com/profile_banners/2596163432/1405605723", + "statuses_count": 325, + "follow_request_sent": null, + "followers_count": 817, + "profile_use_background_image": false, + "default_profile": false, + "following": null, + "name": "BigData-Insider", + "location": "Augsburg, Germany", + "profile_sidebar_fill_color": "DDEEF6", + "notifications": null + }, + "favorited": false + }, + "in_reply_to_status_id_str": null, + "in_reply_to_status_id": null, + "created_at": "Wed Sep 02 08:17:29 +0000 2015", + "in_reply_to_user_id_str": null, + "source": "<a href=\"http://twitter.com\" rel=\"nofollow\">Twitter Web Client</a>", + "quoted_status_id": 638975848138285000, + "retweet_count": 0, + "retweeted": false, + "geo": null, + "filter_level": "low", + "in_reply_to_screen_name": null, + "id_str": "638989106882736128", + "in_reply_to_user_id": null, + "favorite_count": 0, + "id": 638989106882736100, + "text": "RT: Datenbanken im IoT-Zeitalter - mehr lesen auf @bigdata_insider https://t.co/Yt0Pzij3tK", + "place": null, + "lang": "de", + "favorited": false, + "possibly_sensitive": false, + "coordinates": null, + "truncated": false, + "timestamp_ms": "1441181849581", + "entities": { + "urls": [ + { + "expanded_url": "https://twitter.com/bigdata_insider/status/638975848138285056", + "display_url": "twitter.com/bigdata_inside…", + "indices": [ + 68, + 91 + ], + "url": "https://t.co/Yt0Pzij3tK" + } + ], + "hashtags": [], + "user_mentions": [ + { + "name": "BigData-Insider", + "indices": [ + 50, + 66 + ], + "id": 2596163432, + "screen_name": "bigdata_insider", + "id_str": "2596163432" + } + ], + "trends": [], + "symbols": [] + }, + "quoted_status_id_str": "638975848138285056", + "contributors": null, + "user": { + "utc_offset": 7200, + "friends_count": 382, + "profile_image_url_https": "https://pbs.twimg.com/profile_images/600331462982946816/IzBC43SR_normal.png", + "listed_count": 22, + "profile_background_image_url": "http://abs.twimg.com/images/themes/theme14/bg.gif", + "default_profile_image": false, + "favourites_count": 56, + "description": "EnterpriseDB ist weltgrößter und führender Anbieter von Enterprise Lösungen und Services basierend auf PostgreSQL, die fortschrittlichste Open Source Datenbank.", + "created_at": "Mon Feb 25 18:37:11 +0000 2013", + "is_translator": false, + "profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme14/bg.gif", + "protected": false, + "screen_name": "EnterpriseDB_DE", + "id_str": "1219531897", + "profile_link_color": "EC7224", + "id": 1219531897, + "geo_enabled": false, + "profile_background_color": "EC7224", + "lang": "de", + "profile_sidebar_border_color": "FFFFFF", + "profile_text_color": "333333", + "verified": false, + "profile_image_url": "http://pbs.twimg.com/profile_images/600331462982946816/IzBC43SR_normal.png", + "time_zone": "Berlin", + "url": "http://www.enterprisedb.com", + "contributors_enabled": false, + "profile_background_tile": false, + "statuses_count": 941, + "follow_request_sent": null, + "followers_count": 336, + "profile_use_background_image": true, + "default_profile": false, + "following": null, + "name": "EnterpriseDB_DE", + "location": "Berlin, Germany", + "profile_sidebar_fill_color": "DDEEF6", + "notifications": null + } +} +{ + "in_reply_to_status_id_str": null, + "in_reply_to_status_id": null, + "created_at": "Fri May 06 12:36:44 +0000 2016", + "in_reply_to_user_id_str": null, + "source": "<a href=\"http://twitter.com/download/iphone\" rel=\"nofollow\">Twitter for iPhone</a>", + "retweeted_status": { + "in_reply_to_status_id_str": null, + "in_reply_to_status_id": null, + "created_at": "Fri May 06 11:09:20 +0000 2016", + "in_reply_to_user_id_str": null, + "source": "<a href=\"http://jp.techcrunch.com/\" rel=\"nofollow\">TC Japan RTbot</a>", + "retweet_count": 4, + "retweeted": false, + "geo": null, + "filter_level": "low", + "in_reply_to_screen_name": null, + "is_quote_status": false, + "id_str": "728542158676852736", + "in_reply_to_user_id": null, + "favorite_count": 3, + "id": 728542158676852700, + "text": "16shares: Bashoが時系列データ専用NoSQLデータベースRiak TSをオープンソース化してIoTへの浸透をねらう https://t.co/vYi3iI3XkZ", + "place": null, + "lang": "ja", + "favorited": false, + "possibly_sensitive": false, + "coordinates": null, + "truncated": false, + "entities": { + "urls": [ + { + "expanded_url": "http://jp.techcrunch.com/2016/05/06/20160505basho-open-sources-its-riak-ts-database-for-the-internet-of-things/", + "display_url": "jp.techcrunch.com/2016/05/06/201…", + "indices": [ + 65, + 88 + ], + "url": "https://t.co/vYi3iI3XkZ" + } + ], + "hashtags": [], + "user_mentions": [], + "symbols": [] + }, + "contributors": null, + "user": { + "utc_offset": 32400, + "friends_count": 456, + "profile_image_url_https": "https://pbs.twimg.com/profile_images/542903207098212352/S02CeC4c_normal.png", + "listed_count": 4277, + "profile_background_image_url": "http://abs.twimg.com/images/themes/theme1/bg.png", + "default_profile_image": false, + "favourites_count": 420, + "description": "TechCrunch Japanの公式アカウントです", + "created_at": "Fri Apr 22 10:46:18 +0000 2011", + "is_translator": false, + "profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme1/bg.png", + "protected": false, + "screen_name": "jptechcrunch", + "id_str": "286106104", + "profile_link_color": "0A9E01", + "id": 286106104, + "geo_enabled": false, + "profile_background_color": "FFFFFF", + "lang": "ja", + "profile_sidebar_border_color": "C0DEED", + "profile_text_color": "333333", + "verified": false, + "profile_image_url": "http://pbs.twimg.com/profile_images/542903207098212352/S02CeC4c_normal.png", + "time_zone": "Tokyo", + "url": "http://jp.techcrunch.com", + "contributors_enabled": false, + "profile_background_tile": false, + "profile_banner_url": "https://pbs.twimg.com/profile_banners/286106104/1427898894", + "statuses_count": 24997, + "follow_request_sent": null, + "followers_count": 58290, + "profile_use_background_image": true, + "default_profile": false, + "following": null, + "name": "TechCrunch Japan", + "location": "Tokyo", + "profile_sidebar_fill_color": "DDEEF6", + "notifications": null + } + }, + "retweet_count": 0, + "retweeted": false, + "geo": null, + "filter_level": "low", + "in_reply_to_screen_name": null, + "is_quote_status": false, + "id_str": "728564152130658304", + "in_reply_to_user_id": null, + "favorite_count": 0, + "id": 728564152130658300, + "text": "RT @jptechcrunch: 16shares: Bashoが時系列データ専用NoSQLデータベースRiak TSをオープンソース化してIoTへの浸透をねらう https://t.co/vYi3iI3XkZ", + "place": null, + "lang": "ja", + "favorited": false, + "possibly_sensitive": false, + "coordinates": null, + "truncated": false, + "timestamp_ms": "1462538204592", + "entities": { + "urls": [ + { + "expanded_url": "http://jp.techcrunch.com/2016/05/06/20160505basho-open-sources-its-riak-ts-database-for-the-internet-of-things/", + "display_url": "jp.techcrunch.com/2016/05/06/201…", + "indices": [ + 83, + 106 + ], + "url": "https://t.co/vYi3iI3XkZ" + } + ], + "hashtags": [], + "user_mentions": [ + { + "name": "TechCrunch Japan", + "indices": [ + 3, + 16 + ], + "id": 286106104, + "screen_name": "jptechcrunch", + "id_str": "286106104" + } + ], + "symbols": [] + }, + "contributors": null, + "user": { + "utc_offset": -25200, + "friends_count": 184, + "profile_image_url_https": "https://pbs.twimg.com/profile_images/615865274592432128/fYOAh2iR_normal.jpg", + "listed_count": 10, + "profile_background_image_url": "http://abs.twimg.com/images/themes/theme1/bg.png", + "default_profile_image": false, + "favourites_count": 1523, + "description": "自作系、ランニング、筋トレを主とする生態系 。宗教上の理由でASRock、nVIDIAを崇拝。水冷化に向けて倹約中の身。炭酸飲料は血液。今後ともよろしく……", + "created_at": "Fri Jun 26 19:56:49 +0000 2015", + "is_translator": false, + "profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme1/bg.png", + "protected": false, + "screen_name": "774Inside_X79", + "id_str": "3257040751", + "profile_link_color": "0084B4", + "id": 3257040751, + "geo_enabled": true, + "profile_background_color": "C0DEED", + "lang": "ja", + "profile_sidebar_border_color": "C0DEED", + "profile_text_color": "333333", + "verified": false, + "profile_image_url": "http://pbs.twimg.com/profile_images/615865274592432128/fYOAh2iR_normal.jpg", + "time_zone": "Pacific Time (US & Canada)", + "url": "http://twpf.jp/774Inside_X79", + "contributors_enabled": false, + "profile_background_tile": false, + "profile_banner_url": "https://pbs.twimg.com/profile_banners/3257040751/1458988346", + "statuses_count": 3694, + "follow_request_sent": null, + "followers_count": 144, + "profile_use_background_image": true, + "default_profile": true, + "following": null, + "name": "ラォ", + "location": "背後", + "profile_sidebar_fill_color": "DDEEF6", + "notifications": null + } +} -- To view, visit https://asterix-gerrit.ics.uci.edu/951 To unsubscribe, visit https://asterix-gerrit.ics.uci.edu/settings Gerrit-MessageType: merged Gerrit-Change-Id: I71c3d8b8dfa5a98123725f139247d2b5ce10012e Gerrit-PatchSet: 8 Gerrit-Project: asterixdb Gerrit-Branch: master Gerrit-Owner: abdullah alamoudi <bamou...@gmail.com> Gerrit-Reviewer: Jenkins <jenk...@fulliautomatix.ics.uci.edu> Gerrit-Reviewer: Yingyi Bu <buyin...@gmail.com> Gerrit-Reviewer: abdullah alamoudi <bamou...@gmail.com>