Re: Geode - store and query JSON documents
Thanks a lot Xiaojian Zhou for your clear explanation and detailed reply. This has helped a lot to proceed with my experiments. Ankit. On Fri, Nov 27, 2020, 5:48 AM Xiaojian Zhou wrote: > Ankit: > > I wrote some lucene sample code using your data and query. > > I also provided gfsh commands to create nested query. > > Note: I purposely provided 2 data to show the difference of query. > > package examples; > > import org.apache.geode.cache.Region; > import org.apache.geode.cache.client.ClientCache; > import org.apache.geode.cache.client.ClientCacheFactory; > import org.apache.geode.cache.client.ClientRegionShortcut; > import org.apache.geode.cache.lucene.LuceneQuery; > import org.apache.geode.cache.lucene.LuceneQueryException; > import org.apache.geode.cache.lucene.LuceneServiceProvider; > import org.apache.geode.cache.lucene.PageableLuceneQueryResults; > import org.apache.geode.cache.lucene.internal.LuceneIndexImpl; > import org.apache.geode.cache.lucene.internal.LuceneServiceImpl; > import org.apache.geode.pdx.JSONFormatter; > import org.apache.geode.pdx.PdxInstance; > > import java.io.IOException; > import java.util.HashSet; > import java.util.LinkedList; > import java.util.List; > import java.util.concurrent.TimeUnit; > import java.util.concurrent.atomic.AtomicInteger; > > public class JSONTest { > //NOTE: Below is truncated json, single json document can max contain an > array of col1...col30 (30 diff attributes) > // within data. > public final static String jsonDoc_2 = "{" + > "\"data\":[{" + > "\"col1\": {" + > "\"k11\": \"aaa\"," + > "\"k12\":true," + > "\"k13\": ," + > "\"k14\": \"2020-12-31:00:00:00\"" + > "}," + > "\"col2\":[{" + > "\"k21\": \"22\"," + > "\"k22\": true" + > "}]" + > "}]" + > "}"; > public final static String jsonDoc_3 = "{" + > "\"data\":[{" + > "\"col1\": {" + > "\"k11\": \"bbb\"," + > "\"k12\":true," + > "\"k13\": ," + > "\"k14\": \"2020-12-31:00:00:00\"" + > "}," + > "\"col2\":[{" + > "\"k21\": \"33\"," + > "\"k22\": true" + > "}]" + > "}]" + > "}"; > > //NOTE: Col1col30 are mix of JSONObject ({}) and JSONArray ([]) as > shown above in jsonDoc_2; > > public final static String REGION_NAME = "REGION_NAME"; > > public static void main(String[] args) throws InterruptedException, > LuceneQueryException { > > //create client-cache > ClientCache cache = new > ClientCacheFactory().addPoolLocator("localhost", > 10334).setPdxReadSerialized(true).create(); > Region region = cache. > PdxInstance>createClientRegionFactory(ClientRegionShortcut.CACHING_PROXY) > .create(REGION_NAME); > > //store json document > region.put("key", JSONFormatter.fromJSON(jsonDoc_2)); > region.put("key3", JSONFormatter.fromJSON(jsonDoc_3)); > > LuceneServiceImpl service = (LuceneServiceImpl) > LuceneServiceProvider.get(cache); > LuceneIndexImpl index = (LuceneIndexImpl) > service.getIndex("jsonIndex", "REGION_NAME"); > if (index != null) { > service.waitUntilFlushed("jsonIndex", "REGION_NAME", 6, > TimeUnit.MILLISECONDS); > } > > LuceneQuery query = > service.createLuceneQueryFactory().create("jsonIndex", "REGION_NAME", > "22 OR 33", "data.col2.k21"); > System.out.println("Query 22 OR 33"); > HashSet results = getResults(query, "REGION_NAME"); > > LuceneQuery query2 = > service.createLuceneQueryFactory().create("jsonIndex", "REGION_NAME", > "aaa OR xxx OR yyy", "data.col1.k11"); > System.out.println("Query aaa OR xxx OR yyy"); > results = getResults(query2, "REGION_NAME"); > > // server side: > // gfsh> start locator > // gfsh> start server --name=server50505 --server-port=50505 > // gfsh> create lucene index --name=jsonIndex --region=/REGION_NAME > --field=data.col2.k21,data.col1.k11 > // --serializer=org.apache.geode.cache.lucene.FlatFormatSerializer > // gfsh> create region --name=REGION_NAME --type=PARTITION > --redundant-copies=1 --total-num-buckets=61 > > // How to query json document like, > > // 1. select col2.k21, col1, col20 from /REGION_NAME where > //data.col2.k21 = '22' OR data.col2.k21 = '33' > > // 2. select col2.k21, col1.k11, col1 from /REGION_NAME where > // data.col1.k11 in ('aaa', 'xxx', 'yyy') > } > > private static HashSet getResults(LuceneQuery query, String regionName) > throws LuceneQueryException { > if (query == null) { > return null; > } > > PageableLuceneQueryResults results = query.findPages(); > if (results.size() > 0) { > System.out.println("Search found " + results.size() + " results in " > + regionName + ", page size is " + query.getPageSize()); > } > >
Re: Geode - store and query JSON documents
Ankit: I wrote some lucene sample code using your data and query. I also provided gfsh commands to create nested query. Note: I purposely provided 2 data to show the difference of query. package examples; import org.apache.geode.cache.Region; import org.apache.geode.cache.client.ClientCache; import org.apache.geode.cache.client.ClientCacheFactory; import org.apache.geode.cache.client.ClientRegionShortcut; import org.apache.geode.cache.lucene.LuceneQuery; import org.apache.geode.cache.lucene.LuceneQueryException; import org.apache.geode.cache.lucene.LuceneServiceProvider; import org.apache.geode.cache.lucene.PageableLuceneQueryResults; import org.apache.geode.cache.lucene.internal.LuceneIndexImpl; import org.apache.geode.cache.lucene.internal.LuceneServiceImpl; import org.apache.geode.pdx.JSONFormatter; import org.apache.geode.pdx.PdxInstance; import java.io.IOException; import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; public class JSONTest { //NOTE: Below is truncated json, single json document can max contain an array of col1...col30 (30 diff attributes) // within data. public final static String jsonDoc_2 = "{" + "\"data\":[{" + "\"col1\": {" + "\"k11\": \"aaa\"," + "\"k12\":true," + "\"k13\": ," + "\"k14\": \"2020-12-31:00:00:00\"" + "}," + "\"col2\":[{" + "\"k21\": \"22\"," + "\"k22\": true" + "}]" + "}]" + "}"; public final static String jsonDoc_3 = "{" + "\"data\":[{" + "\"col1\": {" + "\"k11\": \"bbb\"," + "\"k12\":true," + "\"k13\": ," + "\"k14\": \"2020-12-31:00:00:00\"" + "}," + "\"col2\":[{" + "\"k21\": \"33\"," + "\"k22\": true" + "}]" + "}]" + "}"; //NOTE: Col1col30 are mix of JSONObject ({}) and JSONArray ([]) as shown above in jsonDoc_2; public final static String REGION_NAME = "REGION_NAME"; public static void main(String[] args) throws InterruptedException, LuceneQueryException { //create client-cache ClientCache cache = new ClientCacheFactory().addPoolLocator("localhost", 10334).setPdxReadSerialized(true).create(); Region region = cache.createClientRegionFactory(ClientRegionShortcut.CACHING_PROXY) .create(REGION_NAME); //store json document region.put("key", JSONFormatter.fromJSON(jsonDoc_2)); region.put("key3", JSONFormatter.fromJSON(jsonDoc_3)); LuceneServiceImpl service = (LuceneServiceImpl) LuceneServiceProvider.get(cache); LuceneIndexImpl index = (LuceneIndexImpl) service.getIndex("jsonIndex", "REGION_NAME"); if (index != null) { service.waitUntilFlushed("jsonIndex", "REGION_NAME", 6, TimeUnit.MILLISECONDS); } LuceneQuery query = service.createLuceneQueryFactory().create("jsonIndex", "REGION_NAME", "22 OR 33", "data.col2.k21"); System.out.println("Query 22 OR 33"); HashSet results = getResults(query, "REGION_NAME"); LuceneQuery query2 = service.createLuceneQueryFactory().create("jsonIndex", "REGION_NAME", "aaa OR xxx OR yyy", "data.col1.k11"); System.out.println("Query aaa OR xxx OR yyy"); results = getResults(query2, "REGION_NAME"); // server side: // gfsh> start locator // gfsh> start server --name=server50505 --server-port=50505 // gfsh> create lucene index --name=jsonIndex --region=/REGION_NAME --field=data.col2.k21,data.col1.k11 // --serializer=org.apache.geode.cache.lucene.FlatFormatSerializer // gfsh> create region --name=REGION_NAME --type=PARTITION --redundant-copies=1 --total-num-buckets=61 // How to query json document like, // 1. select col2.k21, col1, col20 from /REGION_NAME where //data.col2.k21 = '22' OR data.col2.k21 = '33' // 2. select col2.k21, col1.k11, col1 from /REGION_NAME where // data.col1.k11 in ('aaa', 'xxx', 'yyy') } private static HashSet getResults(LuceneQuery query, String regionName) throws LuceneQueryException { if (query == null) { return null; } PageableLuceneQueryResults results = query.findPages(); if (results.size() > 0) { System.out.println("Search found " + results.size() + " results in " + regionName + ", page size is " + query.getPageSize()); } HashSet values = new HashSet<>(); while (results.hasNext()) { results.next().stream() .forEach(struct -> { Object value = struct.getValue(); if (value instanceof PdxInstance) { PdxInstance pdx = (PdxInstance) value; String jsonString = JSONFormatter.toJSON(pdx); List dataList = (LinkedList)pdx.getField("data");
Re: Geode - store and query JSON documents
Hi Anil, Thanks a lot for your guidance. This has really helped me to proceed. My intended queries are working and returning projected data (as a struct). Just wondering is there any api that does a struct --> JSON string conversation.. -Ankit. On Wed, Nov 25, 2020, 12:21 AM Anilkumar Gingade wrote: > Ankit, > > Here is how to query col2. > "SELECT d.col2 FROM /JsonRegion v, v.data d, d.col2 c where c.k21 = > '22'"; > > You can find example on how to query nested collections: > > https://geode.apache.org/docs/guide/18/getting_started/querying_quick_reference.html > > When you want to select a nested collection and inspect its value; you > need to create iterator in the from clause (E.g. d.col2 in the above query) > > You can find other ways to query arrays in the above sample. > > -Anil. > > > > On 11/23/20, 10:02 PM, "ankit Soni" wrote: > > Hi Anil, > > Thanks a lot for your reply. This really helps to proceed. The query > shared > by you worked but I need a slight variation of it, i.e where clause > contains col2 (data.col2.k21 = '22') which is array unlike col1 > (object). > > FYI: value is stored in cache. > PDX[28847624, __GEMFIRE_JSON]{ > data=[PDX[28847624, __GEMFIRE_JSON] { > col1=PDX[28626794, __GEMFIRE_JSON] {k11=aaa, k12=true, k13=, > k14=2020-12-31T00..} > Col2=[PDX[25385544, __GEMFIRE_JSON]{k21=, k22=true}]}]} > Based on OQL querying doc shared, tried few ways but no luck on > querying > based on Col2. > > It will be really helpful if you share updated query. > > Thanks > Ankit. > > On Tue, Nov 24, 2020, 2:42 AM Anilkumar Gingade > wrote: > > > Ankit, > > > > Here is how you can query your JSON object. > > > > String queryStr = "SELECT d.col1 FROM /JsonRegion v, v.data d where > > d.col1.k11 = 'aaa'"; > > > > As replied earlier; the data is stored as PdxInstance type in the > cache. > > In the PdxInstance, the data is stored as top level or nested > collection of > > objects/values based on input JSON object structure. > > The query engine queries on the PdxInstance type and returns the > value. > > > > To see, how the PdxInstance data looks like in the cache, you can > print > > the returned value from querying the region values: > > E.g.: > > String queryStr = "SELECT v FROM /JsonRegion v"; > > SelectResults results = (SelectResults) > > QueryService().newQuery(queryStr).execute(); > > Object[] value = results.asList().toArray(); > > System.out.println(" Projected value: " + value[0]); > > > > You can find sample queries on different type of objects > (collections, > > etc) at: > > > > > https://geode.apache.org/docs/guide/18/getting_started/querying_quick_reference.html > > > > Also in order to determine where the time is getting spent, can you > > separate out object creation through JSONFormatter from put > operation. > > E.g.: > > PdxInstance pdxInstance = JSONFormatter.fromJSON(jsonDoc_2); > > // Time taken to format: > > region.put("1", pdxInstance); > > // Time taken to add to cache: > > > > And measure the time separately. It will help to see if the time is > spent > > in getting the PdxInstance or in doing puts. Also, can you measure > the time > > in avg. > > E.g. Say time measured for puts from 1000 to 2000 and avg time for > those > > puts. > > > > -Anil. > > > > > > On 11/23/20, 11:27 AM, "ankit Soni" > wrote: > > > > Hello geode-dev, > > > > I am *evaluating usage of Geode (1.12) with storing JSON > documents and > > querying the same*. I am able to store the json records > successfully in > > geode but seeking guidance on how to query them. > > More details on code and sample json is, > > > > > > *Sample client-code* > > > > import org.apache.geode.cache.client.ClientCache; > > import org.apache.geode.cache.client.ClientCacheFactory; > > import org.apache.geode.cache.client.ClientRegionShortcut; > > import org.apache.geode.pdx.JSONFormatter; > > import org.apache.geode.pdx.PdxInstance; > > > > public class MyTest { > > > > *//NOTE: Below is truncated json, single json document can > max > > contain an array of col1...col30 (30 diff attributes) within > data. * > > public final static String jsonDoc_2 = "{" + > > "\"data\":[{" + > > "\"col1\": {" + > > "\"k11\": \"aaa\"," + > > "\"k12\":true," + > > "\"k13\": ," + > > "\"k14\": > \"2020-12-31:00:00:00\"" + > > "}," + > >
Re: Geode - store and query JSON documents
Ankit, Here is how to query col2. "SELECT d.col2 FROM /JsonRegion v, v.data d, d.col2 c where c.k21 = '22'"; You can find example on how to query nested collections: https://geode.apache.org/docs/guide/18/getting_started/querying_quick_reference.html When you want to select a nested collection and inspect its value; you need to create iterator in the from clause (E.g. d.col2 in the above query) You can find other ways to query arrays in the above sample. -Anil. On 11/23/20, 10:02 PM, "ankit Soni" wrote: Hi Anil, Thanks a lot for your reply. This really helps to proceed. The query shared by you worked but I need a slight variation of it, i.e where clause contains col2 (data.col2.k21 = '22') which is array unlike col1 (object). FYI: value is stored in cache. PDX[28847624, __GEMFIRE_JSON]{ data=[PDX[28847624, __GEMFIRE_JSON] { col1=PDX[28626794, __GEMFIRE_JSON] {k11=aaa, k12=true, k13=, k14=2020-12-31T00..} Col2=[PDX[25385544, __GEMFIRE_JSON]{k21=, k22=true}]}]} Based on OQL querying doc shared, tried few ways but no luck on querying based on Col2. It will be really helpful if you share updated query. Thanks Ankit. On Tue, Nov 24, 2020, 2:42 AM Anilkumar Gingade wrote: > Ankit, > > Here is how you can query your JSON object. > > String queryStr = "SELECT d.col1 FROM /JsonRegion v, v.data d where > d.col1.k11 = 'aaa'"; > > As replied earlier; the data is stored as PdxInstance type in the cache. > In the PdxInstance, the data is stored as top level or nested collection of > objects/values based on input JSON object structure. > The query engine queries on the PdxInstance type and returns the value. > > To see, how the PdxInstance data looks like in the cache, you can print > the returned value from querying the region values: > E.g.: > String queryStr = "SELECT v FROM /JsonRegion v"; > SelectResults results = (SelectResults) > QueryService().newQuery(queryStr).execute(); > Object[] value = results.asList().toArray(); > System.out.println(" Projected value: " + value[0]); > > You can find sample queries on different type of objects (collections, > etc) at: > > https://geode.apache.org/docs/guide/18/getting_started/querying_quick_reference.html > > Also in order to determine where the time is getting spent, can you > separate out object creation through JSONFormatter from put operation. > E.g.: > PdxInstance pdxInstance = JSONFormatter.fromJSON(jsonDoc_2); > // Time taken to format: > region.put("1", pdxInstance); > // Time taken to add to cache: > > And measure the time separately. It will help to see if the time is spent > in getting the PdxInstance or in doing puts. Also, can you measure the time > in avg. > E.g. Say time measured for puts from 1000 to 2000 and avg time for those > puts. > > -Anil. > > > On 11/23/20, 11:27 AM, "ankit Soni" wrote: > > Hello geode-dev, > > I am *evaluating usage of Geode (1.12) with storing JSON documents and > querying the same*. I am able to store the json records successfully in > geode but seeking guidance on how to query them. > More details on code and sample json is, > > > *Sample client-code* > > import org.apache.geode.cache.client.ClientCache; > import org.apache.geode.cache.client.ClientCacheFactory; > import org.apache.geode.cache.client.ClientRegionShortcut; > import org.apache.geode.pdx.JSONFormatter; > import org.apache.geode.pdx.PdxInstance; > > public class MyTest { > > *//NOTE: Below is truncated json, single json document can max > contain an array of col1...col30 (30 diff attributes) within data. * > public final static String jsonDoc_2 = "{" + > "\"data\":[{" + > "\"col1\": {" + > "\"k11\": \"aaa\"," + > "\"k12\":true," + > "\"k13\": ," + > "\"k14\": \"2020-12-31:00:00:00\"" + > "}," + > "\"col2\":[{" + > "\"k21\": \"22\"," + > "\"k22\": true" + > "}]" + > "}]" + > "}"; > > * //NOTE: Col1col30 are mix of JSONObject ({}) and JSONArray > ([]) as shown above in jsonDoc_2;* > > public static void main(String[] args){ > > //create client-cache > ClientCache cache = new
Re: Geode - store and query JSON documents
Hi Anil, Thanks a lot for your reply. This really helps to proceed. The query shared by you worked but I need a slight variation of it, i.e where clause contains col2 (data.col2.k21 = '22') which is array unlike col1 (object). FYI: value is stored in cache. PDX[28847624, __GEMFIRE_JSON]{ data=[PDX[28847624, __GEMFIRE_JSON] { col1=PDX[28626794, __GEMFIRE_JSON] {k11=aaa, k12=true, k13=, k14=2020-12-31T00..} Col2=[PDX[25385544, __GEMFIRE_JSON]{k21=, k22=true}]}]} Based on OQL querying doc shared, tried few ways but no luck on querying based on Col2. It will be really helpful if you share updated query. Thanks Ankit. On Tue, Nov 24, 2020, 2:42 AM Anilkumar Gingade wrote: > Ankit, > > Here is how you can query your JSON object. > > String queryStr = "SELECT d.col1 FROM /JsonRegion v, v.data d where > d.col1.k11 = 'aaa'"; > > As replied earlier; the data is stored as PdxInstance type in the cache. > In the PdxInstance, the data is stored as top level or nested collection of > objects/values based on input JSON object structure. > The query engine queries on the PdxInstance type and returns the value. > > To see, how the PdxInstance data looks like in the cache, you can print > the returned value from querying the region values: > E.g.: > String queryStr = "SELECT v FROM /JsonRegion v"; > SelectResults results = (SelectResults) > QueryService().newQuery(queryStr).execute(); > Object[] value = results.asList().toArray(); > System.out.println(" Projected value: " + value[0]); > > You can find sample queries on different type of objects (collections, > etc) at: > > https://geode.apache.org/docs/guide/18/getting_started/querying_quick_reference.html > > Also in order to determine where the time is getting spent, can you > separate out object creation through JSONFormatter from put operation. > E.g.: > PdxInstance pdxInstance = JSONFormatter.fromJSON(jsonDoc_2); > // Time taken to format: > region.put("1", pdxInstance); > // Time taken to add to cache: > > And measure the time separately. It will help to see if the time is spent > in getting the PdxInstance or in doing puts. Also, can you measure the time > in avg. > E.g. Say time measured for puts from 1000 to 2000 and avg time for those > puts. > > -Anil. > > > On 11/23/20, 11:27 AM, "ankit Soni" wrote: > > Hello geode-dev, > > I am *evaluating usage of Geode (1.12) with storing JSON documents and > querying the same*. I am able to store the json records successfully in > geode but seeking guidance on how to query them. > More details on code and sample json is, > > > *Sample client-code* > > import org.apache.geode.cache.client.ClientCache; > import org.apache.geode.cache.client.ClientCacheFactory; > import org.apache.geode.cache.client.ClientRegionShortcut; > import org.apache.geode.pdx.JSONFormatter; > import org.apache.geode.pdx.PdxInstance; > > public class MyTest { > > *//NOTE: Below is truncated json, single json document can max > contain an array of col1...col30 (30 diff attributes) within data. * > public final static String jsonDoc_2 = "{" + > "\"data\":[{" + > "\"col1\": {" + > "\"k11\": \"aaa\"," + > "\"k12\":true," + > "\"k13\": ," + > "\"k14\": \"2020-12-31:00:00:00\"" + > "}," + > "\"col2\":[{" + > "\"k21\": \"22\"," + > "\"k22\": true" + > "}]" + > "}]" + > "}"; > > * //NOTE: Col1col30 are mix of JSONObject ({}) and JSONArray > ([]) as shown above in jsonDoc_2;* > > public static void main(String[] args){ > > //create client-cache > ClientCache cache = new > ClientCacheFactory().addPoolLocator(LOCATOR_HOST, PORT).create(); > Region region = cache. > PdxInstance>createClientRegionFactory(ClientRegionShortcut.CACHING_PROXY) > .create(REGION_NAME); > > //store json document > region.put("key", JSONFormatter.fromJSON(jsonDoc_2)); > > //How to query json document like, > > // 1. select col2.k21, col1, col20 from /REGION_NAME where > data.col2.k21 = '22' OR data.col2.k21 = '33' > > // 2. select col2.k21, col1.k11, col1 from /REGION_NAME where > data.col1.k11 in ('aaa', 'xxx', 'yyy') > } > } > > *Server: Region-creation* > > gfsh> create region --name=REGION_NAME --type=PARTITION > --redundant-copies=1 --total-num-buckets=61 > > > *Setup: Distributed cluster of 3 nodes > * > > *My Observations/Problems* > - Put operation takes excessive
Re: Geode - store and query JSON documents
Ankit, Here is how you can query your JSON object. String queryStr = "SELECT d.col1 FROM /JsonRegion v, v.data d where d.col1.k11 = 'aaa'"; As replied earlier; the data is stored as PdxInstance type in the cache. In the PdxInstance, the data is stored as top level or nested collection of objects/values based on input JSON object structure. The query engine queries on the PdxInstance type and returns the value. To see, how the PdxInstance data looks like in the cache, you can print the returned value from querying the region values: E.g.: String queryStr = "SELECT v FROM /JsonRegion v"; SelectResults results = (SelectResults) QueryService().newQuery(queryStr).execute(); Object[] value = results.asList().toArray(); System.out.println(" Projected value: " + value[0]); You can find sample queries on different type of objects (collections, etc) at: https://geode.apache.org/docs/guide/18/getting_started/querying_quick_reference.html Also in order to determine where the time is getting spent, can you separate out object creation through JSONFormatter from put operation. E.g.: PdxInstance pdxInstance = JSONFormatter.fromJSON(jsonDoc_2); // Time taken to format: region.put("1", pdxInstance); // Time taken to add to cache: And measure the time separately. It will help to see if the time is spent in getting the PdxInstance or in doing puts. Also, can you measure the time in avg. E.g. Say time measured for puts from 1000 to 2000 and avg time for those puts. -Anil. On 11/23/20, 11:27 AM, "ankit Soni" wrote: Hello geode-dev, I am *evaluating usage of Geode (1.12) with storing JSON documents and querying the same*. I am able to store the json records successfully in geode but seeking guidance on how to query them. More details on code and sample json is, *Sample client-code* import org.apache.geode.cache.client.ClientCache; import org.apache.geode.cache.client.ClientCacheFactory; import org.apache.geode.cache.client.ClientRegionShortcut; import org.apache.geode.pdx.JSONFormatter; import org.apache.geode.pdx.PdxInstance; public class MyTest { *//NOTE: Below is truncated json, single json document can max contain an array of col1...col30 (30 diff attributes) within data. * public final static String jsonDoc_2 = "{" + "\"data\":[{" + "\"col1\": {" + "\"k11\": \"aaa\"," + "\"k12\":true," + "\"k13\": ," + "\"k14\": \"2020-12-31:00:00:00\"" + "}," + "\"col2\":[{" + "\"k21\": \"22\"," + "\"k22\": true" + "}]" + "}]" + "}"; * //NOTE: Col1col30 are mix of JSONObject ({}) and JSONArray ([]) as shown above in jsonDoc_2;* public static void main(String[] args){ //create client-cache ClientCache cache = new ClientCacheFactory().addPoolLocator(LOCATOR_HOST, PORT).create(); Region region = cache.createClientRegionFactory(ClientRegionShortcut.CACHING_PROXY) .create(REGION_NAME); //store json document region.put("key", JSONFormatter.fromJSON(jsonDoc_2)); //How to query json document like, // 1. select col2.k21, col1, col20 from /REGION_NAME where data.col2.k21 = '22' OR data.col2.k21 = '33' // 2. select col2.k21, col1.k11, col1 from /REGION_NAME where data.col1.k11 in ('aaa', 'xxx', 'yyy') } } *Server: Region-creation* gfsh> create region --name=REGION_NAME --type=PARTITION --redundant-copies=1 --total-num-buckets=61 *Setup: Distributed cluster of 3 nodes * *My Observations/Problems* - Put operation takes excessive time: region.put("key", JSONFormatter.fromJSON(jsonDoc_2)); - Fetching a single record from () a file and Storing in geode approx. takes . 3 secs Is there any suggestions/configuration related to JSONFormatter API or other to optimize this...? *Looking forward to guidance on querying this JOSN for above sample queries.* *Thanks* *Ankit.*
Geode - store and query JSON documents
Hello geode-dev, I am *evaluating usage of Geode (1.12) with storing JSON documents and querying the same*. I am able to store the json records successfully in geode but seeking guidance on how to query them. More details on code and sample json is, *Sample client-code* import org.apache.geode.cache.client.ClientCache; import org.apache.geode.cache.client.ClientCacheFactory; import org.apache.geode.cache.client.ClientRegionShortcut; import org.apache.geode.pdx.JSONFormatter; import org.apache.geode.pdx.PdxInstance; public class MyTest { *//NOTE: Below is truncated json, single json document can max contain an array of col1...col30 (30 diff attributes) within data. * public final static String jsonDoc_2 = "{" + "\"data\":[{" + "\"col1\": {" + "\"k11\": \"aaa\"," + "\"k12\":true," + "\"k13\": ," + "\"k14\": \"2020-12-31:00:00:00\"" + "}," + "\"col2\":[{" + "\"k21\": \"22\"," + "\"k22\": true" + "}]" + "}]" + "}"; * //NOTE: Col1col30 are mix of JSONObject ({}) and JSONArray ([]) as shown above in jsonDoc_2;* public static void main(String[] args){ //create client-cache ClientCache cache = new ClientCacheFactory().addPoolLocator(LOCATOR_HOST, PORT).create(); Region region = cache.createClientRegionFactory(ClientRegionShortcut.CACHING_PROXY) .create(REGION_NAME); //store json document region.put("key", JSONFormatter.fromJSON(jsonDoc_2)); //How to query json document like, // 1. select col2.k21, col1, col20 from /REGION_NAME where data.col2.k21 = '22' OR data.col2.k21 = '33' // 2. select col2.k21, col1.k11, col1 from /REGION_NAME where data.col1.k11 in ('aaa', 'xxx', 'yyy') } } *Server: Region-creation* gfsh> create region --name=REGION_NAME --type=PARTITION --redundant-copies=1 --total-num-buckets=61 *Setup: Distributed cluster of 3 nodes * *My Observations/Problems* - Put operation takes excessive time: region.put("key", JSONFormatter.fromJSON(jsonDoc_2)); - Fetching a single record from () a file and Storing in geode approx. takes . 3 secs Is there any suggestions/configuration related to JSONFormatter API or other to optimize this...? *Looking forward to guidance on querying this JOSN for above sample queries.* *Thanks* *Ankit.*
Geode - store and query JSON documents
Hello geode-dev, I am *evaluating usage of Geode (1.12) with storing JSON documents and querying the same*. I am able to store the json records successfully in geode but seeking guidance on how to query them. More details on code and sample json is, *Sample client-code* import org.apache.geode.cache.client.ClientCache; import org.apache.geode.cache.client.ClientCacheFactory; import org.apache.geode.cache.client.ClientRegionShortcut; import org.apache.geode.pdx.JSONFormatter; import org.apache.geode.pdx.PdxInstance; public class MyTest { *//NOTE: Below is truncated json, single json document can max contain an array of col1...col30 (30 diff attributes) within data. * public final static String jsonDoc_2 = "{" + "\"data\":[{" + "\"col1\": {" + "\"k11\": \"aaa\"," + "\"k12\":true," + "\"k13\": ," + "\"k14\": \"2020-12-31:00:00:00\"" + "}," + "\"col2\":[{" + "\"k21\": \"22\"," + "\"k22\": true" + "}]" + "}]" + "}"; * //NOTE: Col1col30 are mix of JSONObject ({}) and JSONArray ([]) as shown above in jsonDoc_2;* public static void main(String[] args){ //create client-cache ClientCache cache = new ClientCacheFactory().addPoolLocator(LOCATOR_HOST, PORT).create(); Region region = cache.createClientRegionFactory(ClientRegionShortcut.CACHING_PROXY) .create(REGION_NAME); //store json document region.put("key", JSONFormatter.fromJSON(jsonDoc_2)); //How to query json document like, // 1. select col2.k21, col1, col20 from /REGION_NAME where data.col2.k21 = '22' OR data.col2.k21 = '33' // 2. select col2.k21, col1.k11, col1 from /REGION_NAME where data.col1.k11 in ('aaa', 'xxx', 'yyy') } } *Server: Region-creation* gfsh> create region --name=REGION_NAME --type=PARTITION --redundant-copies=1 --total-num-buckets=61 *Setup: Distributed cluster of 3 nodes * *My Observations/Problems* - Put operation takes excessive time: region.put("key", JSONFormatter.fromJSON(jsonDoc_2)); - Fetching a single record from () a file and Storing in geode approx. takes . 3 secs Is there any suggestions/configuration related to JSONFormatter API or other to optimize this...? *Looking forward to guidance on querying this JOSN for above sample queries.* *Thanks* *Ankit.*
Re: Geode - store and query JSON documents
Ankit: Anil can provide you some sample code of OQL query on JSON. I will find some lucene sample code on JSON for you. Regards Xiaojian On 11/23/20, 9:27 AM, "ankit Soni" wrote: Hi I am looking for any means of querying (OQL/Lucene/API etc..?) this stored data. Looking for achieving this functionality first and second, in a performant way. I shared the OQL like syntax, to share my use-case easily and based on some reference found on doc. I am ok if a Lucene query or some other way can fetch the results. It will be of great help if you share the sample query/code fetching this data . Thanks Ankit. On Mon, 23 Nov 2020 at 22:43, Xiaojian Zhou wrote: > Anil: > > The syntax is OQL. But I understand they want to query JSON object base on > the criteria. > > On 11/23/20, 9:08 AM, "Anilkumar Gingade" wrote: > > Gester, Looking at the sample query, I Believe Ankit is asking about > OQL query not Lucene... > > -Anil. > > > On 11/23/20, 9:02 AM, "Xiaojian Zhou" wrote: > > Ankit: > > Geode provided lucene query on json field. Your query can be > supported. > > https://nam04.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgemfire.docs.pivotal.io%2F910%2Fgeode%2Ftools_modules%2Flucene_integration.htmldata=04%7C01%7Czhouxh%40vmware.com%7Cf39e257a59314869f37108d88fd51348%7Cb39138ca3cee4b4aa4d6cd83d9dd62f0%7C0%7C0%7C637417492605622263%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=TzvDCdlG6olUERrjYy%2F1L0ZqwbyaPgW6FCzXWoOSLJw%3Dreserved=0 > > However in above document, it did not provided a query example on > JSON object. > > I can give you some sample code to query on JSON. > > Regards > Xiaojian Zhou > > On 11/22/20, 11:53 AM, "ankit Soni" > wrote: > > Hello geode-devs, please provide a guidance on this. > > Ankit. > > On Sat, 21 Nov 2020 at 10:23, ankit Soni < > ankit.soni.ge...@gmail.com> wrote: > > > Hello team, > > > > I am *evaluating usage of Geode (1.12) with storing JSON > documents and > > querying the same*. I am able to store the json records > successfully in > > geode but seeking guidance on how to query them. > > More details on code and sample json is, > > > > > > *Sample client-code* > > > > import org.apache.geode.cache.client.ClientCache; > > import org.apache.geode.cache.client.ClientCacheFactory; > > import org.apache.geode.cache.client.ClientRegionShortcut; > > import org.apache.geode.pdx.JSONFormatter; > > import org.apache.geode.pdx.PdxInstance; > > > > public class MyTest { > > > > *//NOTE: Below is truncated json, single json document > can max contain an array of col1...col30 (30 diff attributes) within data. * > > public final static String jsonDoc_2 = "{" + > > "\"data\":[{" + > > "\"col1\": {" + > > "\"k11\": \"aaa\"," + > > "\"k12\":true," + > > "\"k13\": ," + > > "\"k14\": > \"2020-12-31:00:00:00\"" + > > "}," + > > "\"col2\":[{" + > > "\"k21\": \"22\"," + > > "\"k22\": true" + > > "}]" + > > "}]" + > > "}"; > > > > * //NOTE: Col1col30 are mix of JSONObject ({}) and > JSONArray ([]) as shown above in jsonDoc_2;* > > > > public static void main(String[] args){ > > > > //create client-cache > > ClientCache cache = new > ClientCacheFactory().addPoolLocator(LOCATOR_HOST, PORT).create(); > > Region region = cache. PdxInstance>createClientRegionFactory(ClientRegionShortcut.CACHING_PROXY) > > .create(REGION_NAME); > > > > //store json document > > region.put("key", JSONFormatter.fromJSON(jsonDoc_2)); > > > > //How to
Re: Geode - store and query JSON documents
Hi I am looking for any means of querying (OQL/Lucene/API etc..?) this stored data. Looking for achieving this functionality first and second, in a performant way. I shared the OQL like syntax, to share my use-case easily and based on some reference found on doc. I am ok if a Lucene query or some other way can fetch the results. It will be of great help if you share the sample query/code fetching this data . Thanks Ankit. On Mon, 23 Nov 2020 at 22:43, Xiaojian Zhou wrote: > Anil: > > The syntax is OQL. But I understand they want to query JSON object base on > the criteria. > > On 11/23/20, 9:08 AM, "Anilkumar Gingade" wrote: > > Gester, Looking at the sample query, I Believe Ankit is asking about > OQL query not Lucene... > > -Anil. > > > On 11/23/20, 9:02 AM, "Xiaojian Zhou" wrote: > > Ankit: > > Geode provided lucene query on json field. Your query can be > supported. > > https://nam04.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgemfire.docs.pivotal.io%2F910%2Fgeode%2Ftools_modules%2Flucene_integration.htmldata=04%7C01%7Czhouxh%40vmware.com%7Ca1c897031e4b481a2f1508d88fd270f6%7Cb39138ca3cee4b4aa4d6cd83d9dd62f0%7C0%7C0%7C637417481290223899%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=pxnkFepPHN61G0wIyfROqIFx5J9aRdyg1GpGHN%2FCU74%3Dreserved=0 > > However in above document, it did not provided a query example on > JSON object. > > I can give you some sample code to query on JSON. > > Regards > Xiaojian Zhou > > On 11/22/20, 11:53 AM, "ankit Soni" > wrote: > > Hello geode-devs, please provide a guidance on this. > > Ankit. > > On Sat, 21 Nov 2020 at 10:23, ankit Soni < > ankit.soni.ge...@gmail.com> wrote: > > > Hello team, > > > > I am *evaluating usage of Geode (1.12) with storing JSON > documents and > > querying the same*. I am able to store the json records > successfully in > > geode but seeking guidance on how to query them. > > More details on code and sample json is, > > > > > > *Sample client-code* > > > > import org.apache.geode.cache.client.ClientCache; > > import org.apache.geode.cache.client.ClientCacheFactory; > > import org.apache.geode.cache.client.ClientRegionShortcut; > > import org.apache.geode.pdx.JSONFormatter; > > import org.apache.geode.pdx.PdxInstance; > > > > public class MyTest { > > > > *//NOTE: Below is truncated json, single json document > can max contain an array of col1...col30 (30 diff attributes) within data. * > > public final static String jsonDoc_2 = "{" + > > "\"data\":[{" + > > "\"col1\": {" + > > "\"k11\": \"aaa\"," + > > "\"k12\":true," + > > "\"k13\": ," + > > "\"k14\": > \"2020-12-31:00:00:00\"" + > > "}," + > > "\"col2\":[{" + > > "\"k21\": \"22\"," + > > "\"k22\": true" + > > "}]" + > > "}]" + > > "}"; > > > > * //NOTE: Col1col30 are mix of JSONObject ({}) and > JSONArray ([]) as shown above in jsonDoc_2;* > > > > public static void main(String[] args){ > > > > //create client-cache > > ClientCache cache = new > ClientCacheFactory().addPoolLocator(LOCATOR_HOST, PORT).create(); > > Region region = cache. PdxInstance>createClientRegionFactory(ClientRegionShortcut.CACHING_PROXY) > > .create(REGION_NAME); > > > > //store json document > > region.put("key", JSONFormatter.fromJSON(jsonDoc_2)); > > > > //How to query json document like, > > > > // 1. select col2.k21, col1, col20 from /REGION_NAME > where data.col2.k21 = '22' OR data.col2.k21 = '33' > > > > // 2. select col2.k21, col1.k11, col1 from > /REGION_NAME where data.col1.k11 in ('aaa', 'xxx', 'yyy') > > } > > } > > > > *Server: Region-creation* > > > > gfsh> create region --name=REGION_NAME --type=PARTITION > --redundant-copies=1 --total-num-buckets=61 > > > > > >
Re: Geode - store and query JSON documents
Anil: The syntax is OQL. But I understand they want to query JSON object base on the criteria. On 11/23/20, 9:08 AM, "Anilkumar Gingade" wrote: Gester, Looking at the sample query, I Believe Ankit is asking about OQL query not Lucene... -Anil. On 11/23/20, 9:02 AM, "Xiaojian Zhou" wrote: Ankit: Geode provided lucene query on json field. Your query can be supported. https://nam04.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgemfire.docs.pivotal.io%2F910%2Fgeode%2Ftools_modules%2Flucene_integration.htmldata=04%7C01%7Czhouxh%40vmware.com%7Ca1c897031e4b481a2f1508d88fd270f6%7Cb39138ca3cee4b4aa4d6cd83d9dd62f0%7C0%7C0%7C637417481290223899%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=pxnkFepPHN61G0wIyfROqIFx5J9aRdyg1GpGHN%2FCU74%3Dreserved=0 However in above document, it did not provided a query example on JSON object. I can give you some sample code to query on JSON. Regards Xiaojian Zhou On 11/22/20, 11:53 AM, "ankit Soni" wrote: Hello geode-devs, please provide a guidance on this. Ankit. On Sat, 21 Nov 2020 at 10:23, ankit Soni wrote: > Hello team, > > I am *evaluating usage of Geode (1.12) with storing JSON documents and > querying the same*. I am able to store the json records successfully in > geode but seeking guidance on how to query them. > More details on code and sample json is, > > > *Sample client-code* > > import org.apache.geode.cache.client.ClientCache; > import org.apache.geode.cache.client.ClientCacheFactory; > import org.apache.geode.cache.client.ClientRegionShortcut; > import org.apache.geode.pdx.JSONFormatter; > import org.apache.geode.pdx.PdxInstance; > > public class MyTest { > > *//NOTE: Below is truncated json, single json document can max contain an array of col1...col30 (30 diff attributes) within data. * > public final static String jsonDoc_2 = "{" + > "\"data\":[{" + > "\"col1\": {" + > "\"k11\": \"aaa\"," + > "\"k12\":true," + > "\"k13\": ," + > "\"k14\": \"2020-12-31:00:00:00\"" + > "}," + > "\"col2\":[{" + > "\"k21\": \"22\"," + > "\"k22\": true" + > "}]" + > "}]" + > "}"; > > * //NOTE: Col1col30 are mix of JSONObject ({}) and JSONArray ([]) as shown above in jsonDoc_2;* > > public static void main(String[] args){ > > //create client-cache > ClientCache cache = new ClientCacheFactory().addPoolLocator(LOCATOR_HOST, PORT).create(); > Region region = cache.createClientRegionFactory(ClientRegionShortcut.CACHING_PROXY) > .create(REGION_NAME); > > //store json document > region.put("key", JSONFormatter.fromJSON(jsonDoc_2)); > > //How to query json document like, > > // 1. select col2.k21, col1, col20 from /REGION_NAME where data.col2.k21 = '22' OR data.col2.k21 = '33' > > // 2. select col2.k21, col1.k11, col1 from /REGION_NAME where data.col1.k11 in ('aaa', 'xxx', 'yyy') > } > } > > *Server: Region-creation* > > gfsh> create region --name=REGION_NAME --type=PARTITION --redundant-copies=1 --total-num-buckets=61 > > > *Setup: Distributed cluster of 3 nodes > * > > *My Observations/Problems* > - Put operation takes excessive time: region.put("key", > JSONFormatter.fromJSON(jsonDoc_2)); - Fetching a single record from () a > file and Storing in geode approx. takes . 3 secs >Is there any suggestions/configuration related to JSONFormatter API or > other to optimize this...? > > *Looking forward to guidance on querying this JOSN for above sample > queries.* > > *Thanks* > *Ankit* >
Re: Geode - store and query JSON documents
Gester, Looking at the sample query, I Believe Ankit is asking about OQL query not Lucene... -Anil. On 11/23/20, 9:02 AM, "Xiaojian Zhou" wrote: Ankit: Geode provided lucene query on json field. Your query can be supported. https://nam04.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgemfire.docs.pivotal.io%2F910%2Fgeode%2Ftools_modules%2Flucene_integration.htmldata=04%7C01%7Cagingade%40vmware.com%7Cd513ee6b680c483830df08d88fd194f5%7Cb39138ca3cee4b4aa4d6cd83d9dd62f0%7C0%7C0%7C637417477593275133%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=l4RfUYfWLRnun%2BOYKtIE0pjkC047LsWBBNMdQb3MY2M%3Dreserved=0 However in above document, it did not provided a query example on JSON object. I can give you some sample code to query on JSON. Regards Xiaojian Zhou On 11/22/20, 11:53 AM, "ankit Soni" wrote: Hello geode-devs, please provide a guidance on this. Ankit. On Sat, 21 Nov 2020 at 10:23, ankit Soni wrote: > Hello team, > > I am *evaluating usage of Geode (1.12) with storing JSON documents and > querying the same*. I am able to store the json records successfully in > geode but seeking guidance on how to query them. > More details on code and sample json is, > > > *Sample client-code* > > import org.apache.geode.cache.client.ClientCache; > import org.apache.geode.cache.client.ClientCacheFactory; > import org.apache.geode.cache.client.ClientRegionShortcut; > import org.apache.geode.pdx.JSONFormatter; > import org.apache.geode.pdx.PdxInstance; > > public class MyTest { > > *//NOTE: Below is truncated json, single json document can max contain an array of col1...col30 (30 diff attributes) within data. * > public final static String jsonDoc_2 = "{" + > "\"data\":[{" + > "\"col1\": {" + > "\"k11\": \"aaa\"," + > "\"k12\":true," + > "\"k13\": ," + > "\"k14\": \"2020-12-31:00:00:00\"" + > "}," + > "\"col2\":[{" + > "\"k21\": \"22\"," + > "\"k22\": true" + > "}]" + > "}]" + > "}"; > > * //NOTE: Col1col30 are mix of JSONObject ({}) and JSONArray ([]) as shown above in jsonDoc_2;* > > public static void main(String[] args){ > > //create client-cache > ClientCache cache = new ClientCacheFactory().addPoolLocator(LOCATOR_HOST, PORT).create(); > Region region = cache.createClientRegionFactory(ClientRegionShortcut.CACHING_PROXY) > .create(REGION_NAME); > > //store json document > region.put("key", JSONFormatter.fromJSON(jsonDoc_2)); > > //How to query json document like, > > // 1. select col2.k21, col1, col20 from /REGION_NAME where data.col2.k21 = '22' OR data.col2.k21 = '33' > > // 2. select col2.k21, col1.k11, col1 from /REGION_NAME where data.col1.k11 in ('aaa', 'xxx', 'yyy') > } > } > > *Server: Region-creation* > > gfsh> create region --name=REGION_NAME --type=PARTITION --redundant-copies=1 --total-num-buckets=61 > > > *Setup: Distributed cluster of 3 nodes > * > > *My Observations/Problems* > - Put operation takes excessive time: region.put("key", > JSONFormatter.fromJSON(jsonDoc_2)); - Fetching a single record from () a > file and Storing in geode approx. takes . 3 secs >Is there any suggestions/configuration related to JSONFormatter API or > other to optimize this...? > > *Looking forward to guidance on querying this JOSN for above sample > queries.* > > *Thanks* > *Ankit* >
Re: Geode - store and query JSON documents
Ankit: Geode provided lucene query on json field. Your query can be supported. https://gemfire.docs.pivotal.io/910/geode/tools_modules/lucene_integration.html However in above document, it did not provided a query example on JSON object. I can give you some sample code to query on JSON. Regards Xiaojian Zhou On 11/22/20, 11:53 AM, "ankit Soni" wrote: Hello geode-devs, please provide a guidance on this. Ankit. On Sat, 21 Nov 2020 at 10:23, ankit Soni wrote: > Hello team, > > I am *evaluating usage of Geode (1.12) with storing JSON documents and > querying the same*. I am able to store the json records successfully in > geode but seeking guidance on how to query them. > More details on code and sample json is, > > > *Sample client-code* > > import org.apache.geode.cache.client.ClientCache; > import org.apache.geode.cache.client.ClientCacheFactory; > import org.apache.geode.cache.client.ClientRegionShortcut; > import org.apache.geode.pdx.JSONFormatter; > import org.apache.geode.pdx.PdxInstance; > > public class MyTest { > > *//NOTE: Below is truncated json, single json document can max contain an array of col1...col30 (30 diff attributes) within data. * > public final static String jsonDoc_2 = "{" + > "\"data\":[{" + > "\"col1\": {" + > "\"k11\": \"aaa\"," + > "\"k12\":true," + > "\"k13\": ," + > "\"k14\": \"2020-12-31:00:00:00\"" + > "}," + > "\"col2\":[{" + > "\"k21\": \"22\"," + > "\"k22\": true" + > "}]" + > "}]" + > "}"; > > * //NOTE: Col1col30 are mix of JSONObject ({}) and JSONArray ([]) as shown above in jsonDoc_2;* > > public static void main(String[] args){ > > //create client-cache > ClientCache cache = new ClientCacheFactory().addPoolLocator(LOCATOR_HOST, PORT).create(); > Region region = cache.createClientRegionFactory(ClientRegionShortcut.CACHING_PROXY) > .create(REGION_NAME); > > //store json document > region.put("key", JSONFormatter.fromJSON(jsonDoc_2)); > > //How to query json document like, > > // 1. select col2.k21, col1, col20 from /REGION_NAME where data.col2.k21 = '22' OR data.col2.k21 = '33' > > // 2. select col2.k21, col1.k11, col1 from /REGION_NAME where data.col1.k11 in ('aaa', 'xxx', 'yyy') > } > } > > *Server: Region-creation* > > gfsh> create region --name=REGION_NAME --type=PARTITION --redundant-copies=1 --total-num-buckets=61 > > > *Setup: Distributed cluster of 3 nodes > * > > *My Observations/Problems* > - Put operation takes excessive time: region.put("key", > JSONFormatter.fromJSON(jsonDoc_2)); - Fetching a single record from () a > file and Storing in geode approx. takes . 3 secs >Is there any suggestions/configuration related to JSONFormatter API or > other to optimize this...? > > *Looking forward to guidance on querying this JOSN for above sample > queries.* > > *Thanks* > *Ankit* >
Re: Geode - store and query JSON documents
Hi @ankit Soni<mailto:ankit.soni.ge...@gmail.com>, I would say the kind of request you want to execute can't be (or at least easily) done. And let me explain what I mean. JSON objects are encapsulated as something called PdxInstance's and there are certain restrictions when it comes to querying these type objects: You can't make queries iterating over the elements of an array. I.E's: * SELECT data[*].col1 FROM /REGION_NAME WHERE data[*].col1 * SELECT data[*].col1 FROM /REGION_NAME WHERE data[*].col2[*].k21 = '22' * SELECT * FROM /REGION_NAME WHERE data[*].col2[*].k21 = '22' The query syntax in a Elasticsearch fashion is not available in Geode as I am aware. Maybe, someone else know If there is a way to execute these queries with Lucene? Sorry not to be able to help too much :S BR, Mario. From: ankit Soni Sent: Sunday, November 22, 2020 8:52 PM To: dev@geode.apache.org Subject: Re: Geode - store and query JSON documents Hello geode-devs, please provide a guidance on this. Ankit. On Sat, 21 Nov 2020 at 10:23, ankit Soni wrote: > Hello team, > > I am *evaluating usage of Geode (1.12) with storing JSON documents and > querying the same*. I am able to store the json records successfully in > geode but seeking guidance on how to query them. > More details on code and sample json is, > > > *Sample client-code* > > import org.apache.geode.cache.client.ClientCache; > import org.apache.geode.cache.client.ClientCacheFactory; > import org.apache.geode.cache.client.ClientRegionShortcut; > import org.apache.geode.pdx.JSONFormatter; > import org.apache.geode.pdx.PdxInstance; > > public class MyTest { > > *//NOTE: Below is truncated json, single json document can max contain an > array of col1...col30 (30 diff attributes) within data. * > public final static String jsonDoc_2 = "{" + > "\"data\":[{" + > "\"col1\": {" + > "\"k11\": \"aaa\"," + > "\"k12\":true," + > "\"k13\": ," + > "\"k14\": \"2020-12-31:00:00:00\"" + > "}," + > "\"col2\":[{" + > "\"k21\": \"22\"," + > "\"k22\": true" + > "}]" + > "}]" + > "}"; > > * //NOTE: Col1col30 are mix of JSONObject ({}) and JSONArray ([]) as > shown above in jsonDoc_2;* > > public static void main(String[] args){ > > //create client-cache > ClientCache cache = new > ClientCacheFactory().addPoolLocator(LOCATOR_HOST, PORT).create(); > Region region = cache. PdxInstance>createClientRegionFactory(ClientRegionShortcut.CACHING_PROXY) > .create(REGION_NAME); > > //store json document > region.put("key", JSONFormatter.fromJSON(jsonDoc_2)); > > //How to query json document like, > > // 1. select col2.k21, col1, col20 from /REGION_NAME where > data.col2.k21 = '22' OR data.col2.k21 = '33' > > // 2. select col2.k21, col1.k11, col1 from /REGION_NAME where > data.col1.k11 in ('aaa', 'xxx', 'yyy') > } > } > > *Server: Region-creation* > > gfsh> create region --name=REGION_NAME --type=PARTITION --redundant-copies=1 > --total-num-buckets=61 > > > *Setup: Distributed cluster of 3 nodes > * > > *My Observations/Problems* > - Put operation takes excessive time: region.put("key", > JSONFormatter.fromJSON(jsonDoc_2)); - Fetching a single record from () a > file and Storing in geode approx. takes . 3 secs >Is there any suggestions/configuration related to JSONFormatter API or > other to optimize this...? > > *Looking forward to guidance on querying this JOSN for above sample > queries.* > > *Thanks* > *Ankit* >
Re: Geode - store and query JSON documents
Hello geode-devs, please provide a guidance on this. Ankit. On Sat, 21 Nov 2020 at 10:23, ankit Soni wrote: > Hello team, > > I am *evaluating usage of Geode (1.12) with storing JSON documents and > querying the same*. I am able to store the json records successfully in > geode but seeking guidance on how to query them. > More details on code and sample json is, > > > *Sample client-code* > > import org.apache.geode.cache.client.ClientCache; > import org.apache.geode.cache.client.ClientCacheFactory; > import org.apache.geode.cache.client.ClientRegionShortcut; > import org.apache.geode.pdx.JSONFormatter; > import org.apache.geode.pdx.PdxInstance; > > public class MyTest { > > *//NOTE: Below is truncated json, single json document can max contain an > array of col1...col30 (30 diff attributes) within data. * > public final static String jsonDoc_2 = "{" + > "\"data\":[{" + > "\"col1\": {" + > "\"k11\": \"aaa\"," + > "\"k12\":true," + > "\"k13\": ," + > "\"k14\": \"2020-12-31:00:00:00\"" + > "}," + > "\"col2\":[{" + > "\"k21\": \"22\"," + > "\"k22\": true" + > "}]" + > "}]" + > "}"; > > * //NOTE: Col1col30 are mix of JSONObject ({}) and JSONArray ([]) as > shown above in jsonDoc_2;* > > public static void main(String[] args){ > > //create client-cache > ClientCache cache = new > ClientCacheFactory().addPoolLocator(LOCATOR_HOST, PORT).create(); > Region region = cache. PdxInstance>createClientRegionFactory(ClientRegionShortcut.CACHING_PROXY) > .create(REGION_NAME); > > //store json document > region.put("key", JSONFormatter.fromJSON(jsonDoc_2)); > > //How to query json document like, > > // 1. select col2.k21, col1, col20 from /REGION_NAME where > data.col2.k21 = '22' OR data.col2.k21 = '33' > > // 2. select col2.k21, col1.k11, col1 from /REGION_NAME where > data.col1.k11 in ('aaa', 'xxx', 'yyy') > } > } > > *Server: Region-creation* > > gfsh> create region --name=REGION_NAME --type=PARTITION --redundant-copies=1 > --total-num-buckets=61 > > > *Setup: Distributed cluster of 3 nodes > * > > *My Observations/Problems* > - Put operation takes excessive time: region.put("key", > JSONFormatter.fromJSON(jsonDoc_2)); - Fetching a single record from () a > file and Storing in geode approx. takes . 3 secs >Is there any suggestions/configuration related to JSONFormatter API or > other to optimize this...? > > *Looking forward to guidance on querying this JOSN for above sample > queries.* > > *Thanks* > *Ankit* >
Geode - store and query JSON documents
Hello team, I am *evaluating usage of Geode (1.12) with storing JSON documents and querying the same*. I am able to store the json records successfully in geode but seeking guidance on how to query them. More details on code and sample json is, *Sample client-code* import org.apache.geode.cache.client.ClientCache; import org.apache.geode.cache.client.ClientCacheFactory; import org.apache.geode.cache.client.ClientRegionShortcut; import org.apache.geode.pdx.JSONFormatter; import org.apache.geode.pdx.PdxInstance; public class MyTest { *//NOTE: Below is truncated json, single json document can max contain an array of col1...col30 (30 diff attributes) within data. * public final static String jsonDoc_2 = "{" + "\"data\":[{" + "\"col1\": {" + "\"k11\": \"aaa\"," + "\"k12\":true," + "\"k13\": ," + "\"k14\": \"2020-12-31:00:00:00\"" + "}," + "\"col2\":[{" + "\"k21\": \"22\"," + "\"k22\": true" + "}]" + "}]" + "}"; * //NOTE: Col1col30 are mix of JSONObject ({}) and JSONArray ([]) as shown above in jsonDoc_2;* public static void main(String[] args){ //create client-cache ClientCache cache = new ClientCacheFactory().addPoolLocator(LOCATOR_HOST, PORT).create(); Region region = cache.createClientRegionFactory(ClientRegionShortcut.CACHING_PROXY) .create(REGION_NAME); //store json document region.put("key", JSONFormatter.fromJSON(jsonDoc_2)); //How to query json document like, // 1. select col2.k21, col1, col20 from /REGION_NAME where data.col2.k21 = '22' OR data.col2.k21 = '33' // 2. select col2.k21, col1.k11, col1 from /REGION_NAME where data.col1.k11 in ('aaa', 'xxx', 'yyy') } } *Server: Region-creation* gfsh> create region --name=REGION_NAME --type=PARTITION --redundant-copies=1 --total-num-buckets=61 *Setup: Distributed cluster of 3 nodes * *My Observations/Problems* - Put operation takes excessive time: region.put("key", JSONFormatter.fromJSON(jsonDoc_2)); - Fetching a single record from () a file and Storing in geode approx. takes . 3 secs Is there any suggestions/configuration related to JSONFormatter API or other to optimize this...? *Looking forward to guidance on querying this JOSN for above sample queries.* *Thanks* *Ankit*