Re: Geode - store and query JSON documents

2020-12-02 Thread ankit Soni
Thanks a lot Xiaojian Zhou for your clear explanation and detailed reply.
This has helped a lot to proceed with my experiments.

Ankit.

On Fri, Nov 27, 2020, 5:48 AM Xiaojian Zhou  wrote:

> Ankit:
>
> I wrote some lucene sample code using your data and query.
>
> I also provided gfsh commands to create nested query.
>
> Note: I purposely provided 2 data to show the difference of query.
>
> package examples;
>
> import org.apache.geode.cache.Region;
> import org.apache.geode.cache.client.ClientCache;
> import org.apache.geode.cache.client.ClientCacheFactory;
> import org.apache.geode.cache.client.ClientRegionShortcut;
> import org.apache.geode.cache.lucene.LuceneQuery;
> import org.apache.geode.cache.lucene.LuceneQueryException;
> import org.apache.geode.cache.lucene.LuceneServiceProvider;
> import org.apache.geode.cache.lucene.PageableLuceneQueryResults;
> import org.apache.geode.cache.lucene.internal.LuceneIndexImpl;
> import org.apache.geode.cache.lucene.internal.LuceneServiceImpl;
> import org.apache.geode.pdx.JSONFormatter;
> import org.apache.geode.pdx.PdxInstance;
>
> import java.io.IOException;
> import java.util.HashSet;
> import java.util.LinkedList;
> import java.util.List;
> import java.util.concurrent.TimeUnit;
> import java.util.concurrent.atomic.AtomicInteger;
>
> public class JSONTest {
>   //NOTE: Below is truncated json, single json document can max contain an
> array of col1...col30 (30 diff attributes)
>   // within data.
>   public final static String jsonDoc_2 = "{" +
>   "\"data\":[{" +
>   "\"col1\": {" +
>   "\"k11\": \"aaa\"," +
>   "\"k12\":true," +
>   "\"k13\": ," +
>   "\"k14\": \"2020-12-31:00:00:00\"" +
>   "}," +
>   "\"col2\":[{" +
>   "\"k21\": \"22\"," +
>   "\"k22\": true" +
>   "}]" +
>   "}]" +
>   "}";
>   public final static String jsonDoc_3 = "{" +
>   "\"data\":[{" +
>   "\"col1\": {" +
>   "\"k11\": \"bbb\"," +
>   "\"k12\":true," +
>   "\"k13\": ," +
>   "\"k14\": \"2020-12-31:00:00:00\"" +
>   "}," +
>   "\"col2\":[{" +
>   "\"k21\": \"33\"," +
>   "\"k22\": true" +
>   "}]" +
>   "}]" +
>   "}";
>
>   //NOTE: Col1col30 are mix of JSONObject ({}) and JSONArray  ([]) as
> shown above in jsonDoc_2;
>
>   public final static String REGION_NAME = "REGION_NAME";
>
>   public static void main(String[] args) throws InterruptedException,
> LuceneQueryException {
>
> //create client-cache
> ClientCache cache = new
> ClientCacheFactory().addPoolLocator("localhost",
> 10334).setPdxReadSerialized(true).create();
> Region region = cache.
> PdxInstance>createClientRegionFactory(ClientRegionShortcut.CACHING_PROXY)
> .create(REGION_NAME);
>
> //store json document
> region.put("key", JSONFormatter.fromJSON(jsonDoc_2));
> region.put("key3", JSONFormatter.fromJSON(jsonDoc_3));
>
> LuceneServiceImpl service = (LuceneServiceImpl)
> LuceneServiceProvider.get(cache);
> LuceneIndexImpl index = (LuceneIndexImpl)
> service.getIndex("jsonIndex", "REGION_NAME");
> if (index != null) {
>   service.waitUntilFlushed("jsonIndex", "REGION_NAME", 6,
> TimeUnit.MILLISECONDS);
> }
>
> LuceneQuery query =
> service.createLuceneQueryFactory().create("jsonIndex", "REGION_NAME",
> "22 OR 33", "data.col2.k21");
> System.out.println("Query 22 OR 33");
> HashSet results = getResults(query, "REGION_NAME");
>
> LuceneQuery query2 =
> service.createLuceneQueryFactory().create("jsonIndex", "REGION_NAME",
> "aaa OR xxx OR yyy", "data.col1.k11");
> System.out.println("Query aaa OR xxx OR yyy");
> results = getResults(query2, "REGION_NAME");
>
> // server side:
> // gfsh> start locator
> // gfsh> start server --name=server50505 --server-port=50505
> // gfsh> create lucene index --name=jsonIndex --region=/REGION_NAME
> --field=data.col2.k21,data.col1.k11
> // --serializer=org.apache.geode.cache.lucene.FlatFormatSerializer
> // gfsh> create region --name=REGION_NAME --type=PARTITION
> --redundant-copies=1 --total-num-buckets=61
>
> // How to query json document like,
>
> // 1. select col2.k21, col1, col20 from /REGION_NAME where
> //data.col2.k21 = '22' OR data.col2.k21 = '33'
>
> // 2. select col2.k21, col1.k11, col1 from /REGION_NAME where
> // data.col1.k11 in ('aaa', 'xxx', 'yyy')
>   }
>
>   private static HashSet getResults(LuceneQuery query, String regionName)
> throws LuceneQueryException {
> if (query == null) {
>   return null;
> }
>
> PageableLuceneQueryResults results = query.findPages();
> if (results.size() > 0) {
>   System.out.println("Search found " + results.size() + " results in "
> + regionName + ", page size is " + query.getPageSize());
> }
>
> 

Re: Geode - store and query JSON documents

2020-11-26 Thread Xiaojian Zhou
Ankit:

I wrote some lucene sample code using your data and query.

I also provided gfsh commands to create nested query. 

Note: I purposely provided 2 data to show the difference of query. 

package examples;

import org.apache.geode.cache.Region;
import org.apache.geode.cache.client.ClientCache;
import org.apache.geode.cache.client.ClientCacheFactory;
import org.apache.geode.cache.client.ClientRegionShortcut;
import org.apache.geode.cache.lucene.LuceneQuery;
import org.apache.geode.cache.lucene.LuceneQueryException;
import org.apache.geode.cache.lucene.LuceneServiceProvider;
import org.apache.geode.cache.lucene.PageableLuceneQueryResults;
import org.apache.geode.cache.lucene.internal.LuceneIndexImpl;
import org.apache.geode.cache.lucene.internal.LuceneServiceImpl;
import org.apache.geode.pdx.JSONFormatter;
import org.apache.geode.pdx.PdxInstance;

import java.io.IOException;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;

public class JSONTest {
  //NOTE: Below is truncated json, single json document can max contain an 
array of col1...col30 (30 diff attributes)
  // within data.
  public final static String jsonDoc_2 = "{" +
  "\"data\":[{" +
  "\"col1\": {" +
  "\"k11\": \"aaa\"," +
  "\"k12\":true," +
  "\"k13\": ," +
  "\"k14\": \"2020-12-31:00:00:00\"" +
  "}," +
  "\"col2\":[{" +
  "\"k21\": \"22\"," +
  "\"k22\": true" +
  "}]" +
  "}]" +
  "}";
  public final static String jsonDoc_3 = "{" +
  "\"data\":[{" +
  "\"col1\": {" +
  "\"k11\": \"bbb\"," +
  "\"k12\":true," +
  "\"k13\": ," +
  "\"k14\": \"2020-12-31:00:00:00\"" +
  "}," +
  "\"col2\":[{" +
  "\"k21\": \"33\"," +
  "\"k22\": true" +
  "}]" +
  "}]" +
  "}";

  //NOTE: Col1col30 are mix of JSONObject ({}) and JSONArray  ([]) as shown 
above in jsonDoc_2;

  public final static String REGION_NAME = "REGION_NAME";

  public static void main(String[] args) throws InterruptedException, 
LuceneQueryException {

//create client-cache
ClientCache cache = new
ClientCacheFactory().addPoolLocator("localhost", 
10334).setPdxReadSerialized(true).create();
Region region = cache.createClientRegionFactory(ClientRegionShortcut.CACHING_PROXY)
.create(REGION_NAME);

//store json document
region.put("key", JSONFormatter.fromJSON(jsonDoc_2));
region.put("key3", JSONFormatter.fromJSON(jsonDoc_3));

LuceneServiceImpl service = (LuceneServiceImpl) 
LuceneServiceProvider.get(cache);
LuceneIndexImpl index = (LuceneIndexImpl) service.getIndex("jsonIndex", 
"REGION_NAME");
if (index != null) {
  service.waitUntilFlushed("jsonIndex", "REGION_NAME", 6, 
TimeUnit.MILLISECONDS);
}

LuceneQuery query = service.createLuceneQueryFactory().create("jsonIndex", 
"REGION_NAME",
"22 OR 33", "data.col2.k21");
System.out.println("Query 22 OR 33");
HashSet results = getResults(query, "REGION_NAME");

LuceneQuery query2 = service.createLuceneQueryFactory().create("jsonIndex", 
"REGION_NAME",
"aaa OR xxx OR yyy", "data.col1.k11");
System.out.println("Query aaa OR xxx OR yyy");
results = getResults(query2, "REGION_NAME");

// server side:
// gfsh> start locator
// gfsh> start server --name=server50505 --server-port=50505
// gfsh> create lucene index --name=jsonIndex --region=/REGION_NAME 
--field=data.col2.k21,data.col1.k11
// --serializer=org.apache.geode.cache.lucene.FlatFormatSerializer
// gfsh> create region --name=REGION_NAME --type=PARTITION 
--redundant-copies=1 --total-num-buckets=61

// How to query json document like,

// 1. select col2.k21, col1, col20 from /REGION_NAME where
//data.col2.k21 = '22' OR data.col2.k21 = '33'

// 2. select col2.k21, col1.k11, col1 from /REGION_NAME where
// data.col1.k11 in ('aaa', 'xxx', 'yyy')
  }

  private static HashSet getResults(LuceneQuery query, String regionName) 
throws LuceneQueryException {
if (query == null) {
  return null;
}

PageableLuceneQueryResults results = query.findPages();
if (results.size() > 0) {
  System.out.println("Search found " + results.size() + " results in " + 
regionName + ", page size is " + query.getPageSize());
}

HashSet values = new HashSet<>();
while (results.hasNext()) {
  results.next().stream()
  .forEach(struct -> {
Object value = struct.getValue();
if (value instanceof PdxInstance) {
  PdxInstance pdx = (PdxInstance) value;
  String jsonString = JSONFormatter.toJSON(pdx);
  List dataList = 
(LinkedList)pdx.getField("data");
 

Re: Geode - store and query JSON documents

2020-11-25 Thread ankit Soni
Hi Anil, Thanks a lot for your guidance. This has really helped me to
proceed.

My intended queries are working and returning projected data (as a struct).
Just wondering is there any api that does a struct --> JSON string
conversation..

-Ankit.

On Wed, Nov 25, 2020, 12:21 AM Anilkumar Gingade 
wrote:

> Ankit,
>
> Here is how to query col2.
> "SELECT d.col2 FROM /JsonRegion v, v.data d, d.col2 c where c.k21 =
> '22'";
>
> You can find example on how to query nested collections:
>
> https://geode.apache.org/docs/guide/18/getting_started/querying_quick_reference.html
>
> When you want to select a nested collection and inspect its value; you
> need to create iterator in the from clause (E.g.  d.col2 in the above query)
>
> You can find other ways to query arrays in the above sample.
>
> -Anil.
>
>
>
> On 11/23/20, 10:02 PM, "ankit Soni"  wrote:
>
> Hi Anil,
>
> Thanks a lot for your reply. This really helps to proceed. The query
> shared
> by you worked but I need a slight variation of it, i.e where clause
> contains col2 (data.col2.k21 = '22') which is array unlike col1
> (object).
>
> FYI: value is stored in cache.
> PDX[28847624, __GEMFIRE_JSON]{
> data=[PDX[28847624, __GEMFIRE_JSON] {
> col1=PDX[28626794, __GEMFIRE_JSON] {k11=aaa, k12=true, k13=,
> k14=2020-12-31T00..}
> Col2=[PDX[25385544, __GEMFIRE_JSON]{k21=, k22=true}]}]}
> Based on OQL querying doc shared, tried few ways but no luck on
> querying
> based on Col2.
>
> It will be really helpful if you share updated query.
>
> Thanks
> Ankit.
>
> On Tue, Nov 24, 2020, 2:42 AM Anilkumar Gingade 
> wrote:
>
> > Ankit,
> >
> > Here is how you can query your JSON object.
> >
> > String queryStr = "SELECT d.col1 FROM /JsonRegion v, v.data d where
> > d.col1.k11 = 'aaa'";
> >
> > As replied earlier; the data is stored as PdxInstance type in the
> cache.
> > In the PdxInstance, the data is stored as top level or nested
> collection of
> > objects/values based on input JSON object structure.
> > The query engine queries on the PdxInstance type and returns the
> value.
> >
> > To see, how the PdxInstance data looks like in the cache, you can
> print
> > the returned value from querying the region values:
> > E.g.:
> >  String queryStr = "SELECT v FROM /JsonRegion v";
> >  SelectResults results = (SelectResults)
> > QueryService().newQuery(queryStr).execute();
> >   Object[] value = results.asList().toArray();
> >   System.out.println(" Projected value: " + value[0]);
> >
> > You can find sample queries on different type of objects
> (collections,
> > etc) at:
> >
> >
> https://geode.apache.org/docs/guide/18/getting_started/querying_quick_reference.html
> >
> > Also in order to determine where the time is getting spent, can you
> > separate out object creation through JSONFormatter from put
> operation.
> > E.g.:
> > PdxInstance pdxInstance = JSONFormatter.fromJSON(jsonDoc_2);
> > // Time taken to format:
> > region.put("1", pdxInstance);
> > // Time taken to add to cache:
> >
> > And measure the time separately. It will help to see if the time is
> spent
> > in getting the PdxInstance or in doing puts. Also, can you measure
> the time
> > in avg.
> > E.g. Say time measured for puts from 1000 to 2000 and avg time for
> those
> > puts.
> >
> > -Anil.
> >
> >
> > On 11/23/20, 11:27 AM, "ankit Soni" 
> wrote:
> >
> >  Hello geode-dev,
> >
> > I am *evaluating usage of Geode (1.12) with storing JSON
> documents and
> > querying the same*. I am able to store the json records
> successfully in
> > geode but seeking guidance on how to query them.
> > More details on code and sample json is,
> >
> >
> > *Sample client-code*
> >
> > import org.apache.geode.cache.client.ClientCache;
> > import org.apache.geode.cache.client.ClientCacheFactory;
> > import org.apache.geode.cache.client.ClientRegionShortcut;
> > import org.apache.geode.pdx.JSONFormatter;
> > import org.apache.geode.pdx.PdxInstance;
> >
> > public class MyTest {
> >
> > *//NOTE: Below is truncated json, single json document can
> max
> > contain an array of col1...col30 (30 diff attributes) within
> data. *
> > public final static  String jsonDoc_2 = "{" +
> > "\"data\":[{" +
> > "\"col1\": {" +
> > "\"k11\": \"aaa\"," +
> > "\"k12\":true," +
> > "\"k13\": ," +
> > "\"k14\":
> \"2020-12-31:00:00:00\"" +
> > "}," +
> >   

Re: Geode - store and query JSON documents

2020-11-24 Thread Anilkumar Gingade
Ankit,

Here is how to query col2.
"SELECT d.col2 FROM /JsonRegion v, v.data d, d.col2 c where c.k21 = '22'";

You can find example on how to query nested collections:
https://geode.apache.org/docs/guide/18/getting_started/querying_quick_reference.html

When you want to select a nested collection and inspect its value; you need to 
create iterator in the from clause (E.g.  d.col2 in the above query)

You can find other ways to query arrays in the above sample.

-Anil.



On 11/23/20, 10:02 PM, "ankit Soni"  wrote:

Hi Anil,

Thanks a lot for your reply. This really helps to proceed. The query shared
by you worked but I need a slight variation of it, i.e where clause
contains col2 (data.col2.k21 = '22') which is array unlike col1
(object).

FYI: value is stored in cache.
PDX[28847624, __GEMFIRE_JSON]{
data=[PDX[28847624, __GEMFIRE_JSON] {
col1=PDX[28626794, __GEMFIRE_JSON] {k11=aaa, k12=true, k13=,
k14=2020-12-31T00..}
Col2=[PDX[25385544, __GEMFIRE_JSON]{k21=, k22=true}]}]}
Based on OQL querying doc shared, tried few ways but no luck on querying
based on Col2.

It will be really helpful if you share updated query.

Thanks
Ankit.

On Tue, Nov 24, 2020, 2:42 AM Anilkumar Gingade  wrote:

> Ankit,
>
> Here is how you can query your JSON object.
>
> String queryStr = "SELECT d.col1 FROM /JsonRegion v, v.data d where
> d.col1.k11 = 'aaa'";
>
> As replied earlier; the data is stored as PdxInstance type in the cache.
> In the PdxInstance, the data is stored as top level or nested collection 
of
> objects/values based on input JSON object structure.
> The query engine queries on the PdxInstance type and returns the value.
>
> To see, how the PdxInstance data looks like in the cache, you can print
> the returned value from querying the region values:
> E.g.:
>  String queryStr = "SELECT v FROM /JsonRegion v";
>  SelectResults results = (SelectResults)
> QueryService().newQuery(queryStr).execute();
>   Object[] value = results.asList().toArray();
>   System.out.println(" Projected value: " + value[0]);
>
> You can find sample queries on different type of objects (collections,
> etc) at:
>
> 
https://geode.apache.org/docs/guide/18/getting_started/querying_quick_reference.html
>
> Also in order to determine where the time is getting spent, can you
> separate out object creation through JSONFormatter from put operation.
> E.g.:
> PdxInstance pdxInstance = JSONFormatter.fromJSON(jsonDoc_2);
> // Time taken to format:
> region.put("1", pdxInstance);
> // Time taken to add to cache:
>
> And measure the time separately. It will help to see if the time is spent
> in getting the PdxInstance or in doing puts. Also, can you measure the 
time
> in avg.
> E.g. Say time measured for puts from 1000 to 2000 and avg time for those
> puts.
>
> -Anil.
>
>
> On 11/23/20, 11:27 AM, "ankit Soni"  wrote:
>
>  Hello geode-dev,
>
> I am *evaluating usage of Geode (1.12) with storing JSON documents and
> querying the same*. I am able to store the json records successfully 
in
> geode but seeking guidance on how to query them.
> More details on code and sample json is,
>
>
> *Sample client-code*
>
> import org.apache.geode.cache.client.ClientCache;
> import org.apache.geode.cache.client.ClientCacheFactory;
> import org.apache.geode.cache.client.ClientRegionShortcut;
> import org.apache.geode.pdx.JSONFormatter;
> import org.apache.geode.pdx.PdxInstance;
>
> public class MyTest {
>
> *//NOTE: Below is truncated json, single json document can max
> contain an array of col1...col30 (30 diff attributes) within data. *
> public final static  String jsonDoc_2 = "{" +
> "\"data\":[{" +
> "\"col1\": {" +
> "\"k11\": \"aaa\"," +
> "\"k12\":true," +
> "\"k13\": ," +
> "\"k14\": \"2020-12-31:00:00:00\"" +
> "}," +
> "\"col2\":[{" +
> "\"k21\": \"22\"," +
> "\"k22\": true" +
> "}]" +
> "}]" +
> "}";
>
> * //NOTE: Col1col30 are mix of JSONObject ({}) and JSONArray
> ([]) as shown above in jsonDoc_2;*
>
> public static void main(String[] args){
>
> //create client-cache
> ClientCache cache = new

Re: Geode - store and query JSON documents

2020-11-23 Thread ankit Soni
Hi Anil,

Thanks a lot for your reply. This really helps to proceed. The query shared
by you worked but I need a slight variation of it, i.e where clause
contains col2 (data.col2.k21 = '22') which is array unlike col1
(object).

FYI: value is stored in cache.
PDX[28847624, __GEMFIRE_JSON]{
data=[PDX[28847624, __GEMFIRE_JSON] {
col1=PDX[28626794, __GEMFIRE_JSON] {k11=aaa, k12=true, k13=,
k14=2020-12-31T00..}
Col2=[PDX[25385544, __GEMFIRE_JSON]{k21=, k22=true}]}]}
Based on OQL querying doc shared, tried few ways but no luck on querying
based on Col2.

It will be really helpful if you share updated query.

Thanks
Ankit.

On Tue, Nov 24, 2020, 2:42 AM Anilkumar Gingade  wrote:

> Ankit,
>
> Here is how you can query your JSON object.
>
> String queryStr = "SELECT d.col1 FROM /JsonRegion v, v.data d where
> d.col1.k11 = 'aaa'";
>
> As replied earlier; the data is stored as PdxInstance type in the cache.
> In the PdxInstance, the data is stored as top level or nested collection of
> objects/values based on input JSON object structure.
> The query engine queries on the PdxInstance type and returns the value.
>
> To see, how the PdxInstance data looks like in the cache, you can print
> the returned value from querying the region values:
> E.g.:
>  String queryStr = "SELECT v FROM /JsonRegion v";
>  SelectResults results = (SelectResults)
> QueryService().newQuery(queryStr).execute();
>   Object[] value = results.asList().toArray();
>   System.out.println(" Projected value: " + value[0]);
>
> You can find sample queries on different type of objects (collections,
> etc) at:
>
> https://geode.apache.org/docs/guide/18/getting_started/querying_quick_reference.html
>
> Also in order to determine where the time is getting spent, can you
> separate out object creation through JSONFormatter from put operation.
> E.g.:
> PdxInstance pdxInstance = JSONFormatter.fromJSON(jsonDoc_2);
> // Time taken to format:
> region.put("1", pdxInstance);
> // Time taken to add to cache:
>
> And measure the time separately. It will help to see if the time is spent
> in getting the PdxInstance or in doing puts. Also, can you measure the time
> in avg.
> E.g. Say time measured for puts from 1000 to 2000 and avg time for those
> puts.
>
> -Anil.
>
>
> On 11/23/20, 11:27 AM, "ankit Soni"  wrote:
>
>  Hello geode-dev,
>
> I am *evaluating usage of Geode (1.12) with storing JSON documents and
> querying the same*. I am able to store the json records successfully in
> geode but seeking guidance on how to query them.
> More details on code and sample json is,
>
>
> *Sample client-code*
>
> import org.apache.geode.cache.client.ClientCache;
> import org.apache.geode.cache.client.ClientCacheFactory;
> import org.apache.geode.cache.client.ClientRegionShortcut;
> import org.apache.geode.pdx.JSONFormatter;
> import org.apache.geode.pdx.PdxInstance;
>
> public class MyTest {
>
> *//NOTE: Below is truncated json, single json document can max
> contain an array of col1...col30 (30 diff attributes) within data. *
> public final static  String jsonDoc_2 = "{" +
> "\"data\":[{" +
> "\"col1\": {" +
> "\"k11\": \"aaa\"," +
> "\"k12\":true," +
> "\"k13\": ," +
> "\"k14\": \"2020-12-31:00:00:00\"" +
> "}," +
> "\"col2\":[{" +
> "\"k21\": \"22\"," +
> "\"k22\": true" +
> "}]" +
> "}]" +
> "}";
>
> * //NOTE: Col1col30 are mix of JSONObject ({}) and JSONArray
> ([]) as shown above in jsonDoc_2;*
>
> public static void main(String[] args){
>
> //create client-cache
> ClientCache cache = new
> ClientCacheFactory().addPoolLocator(LOCATOR_HOST, PORT).create();
> Region region = cache.
> PdxInstance>createClientRegionFactory(ClientRegionShortcut.CACHING_PROXY)
> .create(REGION_NAME);
>
> //store json document
> region.put("key", JSONFormatter.fromJSON(jsonDoc_2));
>
> //How to query json document like,
>
> // 1. select col2.k21, col1, col20 from /REGION_NAME where
> data.col2.k21 = '22' OR data.col2.k21 = '33'
>
> // 2. select col2.k21, col1.k11, col1 from /REGION_NAME where
> data.col1.k11 in ('aaa', 'xxx', 'yyy')
> }
> }
>
> *Server: Region-creation*
>
> gfsh> create region --name=REGION_NAME --type=PARTITION
> --redundant-copies=1 --total-num-buckets=61
>
>
> *Setup: Distributed cluster of 3 nodes
> *
>
> *My Observations/Problems*
> -  Put operation takes excessive 

Re: Geode - store and query JSON documents

2020-11-23 Thread Anilkumar Gingade
Ankit,

Here is how you can query your JSON object.

String queryStr = "SELECT d.col1 FROM /JsonRegion v, v.data d where d.col1.k11 
= 'aaa'";

As replied earlier; the data is stored as PdxInstance type in the cache. In the 
PdxInstance, the data is stored as top level or nested collection of 
objects/values based on input JSON object structure. 
The query engine queries on the PdxInstance type and returns the value.

To see, how the PdxInstance data looks like in the cache, you can print the 
returned value from querying the region values:
E.g.:
 String queryStr = "SELECT v FROM /JsonRegion v";
 SelectResults results = (SelectResults) 
QueryService().newQuery(queryStr).execute();
  Object[] value = results.asList().toArray();
  System.out.println(" Projected value: " + value[0]);

You can find sample queries on different type of objects (collections, etc) at:
https://geode.apache.org/docs/guide/18/getting_started/querying_quick_reference.html

Also in order to determine where the time is getting spent, can you separate 
out object creation through JSONFormatter from put operation.
E.g.:
PdxInstance pdxInstance = JSONFormatter.fromJSON(jsonDoc_2);
// Time taken to format:
region.put("1", pdxInstance);
// Time taken to add to cache:

And measure the time separately. It will help to see if the time is spent in 
getting the PdxInstance or in doing puts. Also, can you measure the time in 
avg. 
E.g. Say time measured for puts from 1000 to 2000 and avg time for those puts. 

-Anil.


On 11/23/20, 11:27 AM, "ankit Soni"  wrote:

 Hello geode-dev,

I am *evaluating usage of Geode (1.12) with storing JSON documents and
querying the same*. I am able to store the json records successfully in
geode but seeking guidance on how to query them.
More details on code and sample json is,


*Sample client-code*

import org.apache.geode.cache.client.ClientCache;
import org.apache.geode.cache.client.ClientCacheFactory;
import org.apache.geode.cache.client.ClientRegionShortcut;
import org.apache.geode.pdx.JSONFormatter;
import org.apache.geode.pdx.PdxInstance;

public class MyTest {

*//NOTE: Below is truncated json, single json document can max
contain an array of col1...col30 (30 diff attributes) within data. *
public final static  String jsonDoc_2 = "{" +
"\"data\":[{" +
"\"col1\": {" +
"\"k11\": \"aaa\"," +
"\"k12\":true," +
"\"k13\": ," +
"\"k14\": \"2020-12-31:00:00:00\"" +
"}," +
"\"col2\":[{" +
"\"k21\": \"22\"," +
"\"k22\": true" +
"}]" +
"}]" +
"}";

* //NOTE: Col1col30 are mix of JSONObject ({}) and JSONArray
([]) as shown above in jsonDoc_2;*

public static void main(String[] args){

//create client-cache
ClientCache cache = new
ClientCacheFactory().addPoolLocator(LOCATOR_HOST, PORT).create();
Region region = cache.createClientRegionFactory(ClientRegionShortcut.CACHING_PROXY)
.create(REGION_NAME);

//store json document
region.put("key", JSONFormatter.fromJSON(jsonDoc_2));

//How to query json document like,

// 1. select col2.k21, col1, col20 from /REGION_NAME where
data.col2.k21 = '22' OR data.col2.k21 = '33'

// 2. select col2.k21, col1.k11, col1 from /REGION_NAME where
data.col1.k11 in ('aaa', 'xxx', 'yyy')
}
}

*Server: Region-creation*

gfsh> create region --name=REGION_NAME --type=PARTITION
--redundant-copies=1 --total-num-buckets=61


*Setup: Distributed cluster of 3 nodes
*

*My Observations/Problems*
-  Put operation takes excessive time: region.put("key",
JSONFormatter.fromJSON(jsonDoc_2));  - Fetching a single record from () a
file and Storing in geode approx. takes . 3 secs
   Is there any suggestions/configuration related to JSONFormatter API or
other to optimize this...?

*Looking forward to guidance on querying this JOSN for above sample
queries.*

*Thanks*
*Ankit.*



Geode - store and query JSON documents

2020-11-23 Thread ankit Soni
 Hello geode-dev,

I am *evaluating usage of Geode (1.12) with storing JSON documents and
querying the same*. I am able to store the json records successfully in
geode but seeking guidance on how to query them.
More details on code and sample json is,


*Sample client-code*

import org.apache.geode.cache.client.ClientCache;
import org.apache.geode.cache.client.ClientCacheFactory;
import org.apache.geode.cache.client.ClientRegionShortcut;
import org.apache.geode.pdx.JSONFormatter;
import org.apache.geode.pdx.PdxInstance;

public class MyTest {

*//NOTE: Below is truncated json, single json document can max
contain an array of col1...col30 (30 diff attributes) within data. *
public final static  String jsonDoc_2 = "{" +
"\"data\":[{" +
"\"col1\": {" +
"\"k11\": \"aaa\"," +
"\"k12\":true," +
"\"k13\": ," +
"\"k14\": \"2020-12-31:00:00:00\"" +
"}," +
"\"col2\":[{" +
"\"k21\": \"22\"," +
"\"k22\": true" +
"}]" +
"}]" +
"}";

* //NOTE: Col1col30 are mix of JSONObject ({}) and JSONArray
([]) as shown above in jsonDoc_2;*

public static void main(String[] args){

//create client-cache
ClientCache cache = new
ClientCacheFactory().addPoolLocator(LOCATOR_HOST, PORT).create();
Region region = cache.createClientRegionFactory(ClientRegionShortcut.CACHING_PROXY)
.create(REGION_NAME);

//store json document
region.put("key", JSONFormatter.fromJSON(jsonDoc_2));

//How to query json document like,

// 1. select col2.k21, col1, col20 from /REGION_NAME where
data.col2.k21 = '22' OR data.col2.k21 = '33'

// 2. select col2.k21, col1.k11, col1 from /REGION_NAME where
data.col1.k11 in ('aaa', 'xxx', 'yyy')
}
}

*Server: Region-creation*

gfsh> create region --name=REGION_NAME --type=PARTITION
--redundant-copies=1 --total-num-buckets=61


*Setup: Distributed cluster of 3 nodes
*

*My Observations/Problems*
-  Put operation takes excessive time: region.put("key",
JSONFormatter.fromJSON(jsonDoc_2));  - Fetching a single record from () a
file and Storing in geode approx. takes . 3 secs
   Is there any suggestions/configuration related to JSONFormatter API or
other to optimize this...?

*Looking forward to guidance on querying this JOSN for above sample
queries.*

*Thanks*
*Ankit.*


Geode - store and query JSON documents

2020-11-23 Thread ankit Soni
 Hello geode-dev,

I am *evaluating usage of Geode (1.12) with storing JSON documents and
querying the same*. I am able to store the json records successfully in
geode but seeking guidance on how to query them.
More details on code and sample json is,


*Sample client-code*

import org.apache.geode.cache.client.ClientCache;
import org.apache.geode.cache.client.ClientCacheFactory;
import org.apache.geode.cache.client.ClientRegionShortcut;
import org.apache.geode.pdx.JSONFormatter;
import org.apache.geode.pdx.PdxInstance;

public class MyTest {

*//NOTE: Below is truncated json, single json document can max
contain an array of col1...col30 (30 diff attributes) within data. *
public final static  String jsonDoc_2 = "{" +
"\"data\":[{" +
"\"col1\": {" +
"\"k11\": \"aaa\"," +
"\"k12\":true," +
"\"k13\": ," +
"\"k14\": \"2020-12-31:00:00:00\"" +
"}," +
"\"col2\":[{" +
"\"k21\": \"22\"," +
"\"k22\": true" +
"}]" +
"}]" +
"}";

* //NOTE: Col1col30 are mix of JSONObject ({}) and JSONArray
([]) as shown above in jsonDoc_2;*

public static void main(String[] args){

//create client-cache
ClientCache cache = new
ClientCacheFactory().addPoolLocator(LOCATOR_HOST, PORT).create();
Region region = cache.createClientRegionFactory(ClientRegionShortcut.CACHING_PROXY)
.create(REGION_NAME);

//store json document
region.put("key", JSONFormatter.fromJSON(jsonDoc_2));

//How to query json document like,

// 1. select col2.k21, col1, col20 from /REGION_NAME where
data.col2.k21 = '22' OR data.col2.k21 = '33'

// 2. select col2.k21, col1.k11, col1 from /REGION_NAME where
data.col1.k11 in ('aaa', 'xxx', 'yyy')
}
}

*Server: Region-creation*

gfsh> create region --name=REGION_NAME --type=PARTITION
--redundant-copies=1 --total-num-buckets=61


*Setup: Distributed cluster of 3 nodes
*

*My Observations/Problems*
-  Put operation takes excessive time: region.put("key",
JSONFormatter.fromJSON(jsonDoc_2));  - Fetching a single record from () a
file and Storing in geode approx. takes . 3 secs
   Is there any suggestions/configuration related to JSONFormatter API or
other to optimize this...?

*Looking forward to guidance on querying this JOSN for above sample
queries.*

*Thanks*
*Ankit.*


Re: Geode - store and query JSON documents

2020-11-23 Thread Xiaojian Zhou
Ankit:

Anil can provide you some sample code of OQL query on JSON.

I will find some lucene sample code on JSON for you. 

Regards
Xiaojian

On 11/23/20, 9:27 AM, "ankit Soni"  wrote:

Hi
I am looking for any means of querying (OQL/Lucene/API etc..?) this stored
data. Looking for achieving this functionality first and second, in a
performant way.

I shared the OQL like syntax, to share my use-case easily and based on some
reference found on doc. I am ok if a Lucene query or some other way can
fetch the results.

It will be of great help if you share the sample query/code fetching this
data .

Thanks
Ankit.


On Mon, 23 Nov 2020 at 22:43, Xiaojian Zhou  wrote:

> Anil:
>
> The syntax is OQL. But I understand they want to query JSON object base on
> the criteria.
>
> On 11/23/20, 9:08 AM, "Anilkumar Gingade"  wrote:
>
> Gester, Looking at the sample query, I Believe Ankit is asking about
> OQL query not Lucene...
>
> -Anil.
>
>
> On 11/23/20, 9:02 AM, "Xiaojian Zhou"  wrote:
>
> Ankit:
>
> Geode provided lucene query on json field. Your query can be
> supported.
>
> 
https://nam04.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgemfire.docs.pivotal.io%2F910%2Fgeode%2Ftools_modules%2Flucene_integration.htmldata=04%7C01%7Czhouxh%40vmware.com%7Cf39e257a59314869f37108d88fd51348%7Cb39138ca3cee4b4aa4d6cd83d9dd62f0%7C0%7C0%7C637417492605622263%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=TzvDCdlG6olUERrjYy%2F1L0ZqwbyaPgW6FCzXWoOSLJw%3Dreserved=0
>
> However in above document, it did not provided a query example on
> JSON object.
>
> I can give you some sample code to query on JSON.
>
> Regards
> Xiaojian Zhou
>
> On 11/22/20, 11:53 AM, "ankit Soni" 
> wrote:
>
> Hello geode-devs, please provide a guidance on this.
>
> Ankit.
>
> On Sat, 21 Nov 2020 at 10:23, ankit Soni <
> ankit.soni.ge...@gmail.com> wrote:
>
> > Hello team,
> >
> > I am *evaluating usage of Geode (1.12) with storing JSON
> documents and
> > querying the same*. I am able to store the json records
> successfully in
> > geode but seeking guidance on how to query them.
> > More details on code and sample json is,
> >
> >
> > *Sample client-code*
> >
> > import org.apache.geode.cache.client.ClientCache;
> > import org.apache.geode.cache.client.ClientCacheFactory;
> > import org.apache.geode.cache.client.ClientRegionShortcut;
> > import org.apache.geode.pdx.JSONFormatter;
> > import org.apache.geode.pdx.PdxInstance;
> >
> > public class MyTest {
> >
> > *//NOTE: Below is truncated json, single json document
> can max contain an array of col1...col30 (30 diff attributes) within 
data. *
> > public final static  String jsonDoc_2 = "{" +
> > "\"data\":[{" +
> > "\"col1\": {" +
> > "\"k11\": \"aaa\"," +
> > "\"k12\":true," +
> > "\"k13\": ," +
> > "\"k14\":
> \"2020-12-31:00:00:00\"" +
> > "}," +
> > "\"col2\":[{" +
> > "\"k21\": \"22\"," +
> > "\"k22\": true" +
> > "}]" +
> > "}]" +
> > "}";
> >
> > * //NOTE: Col1col30 are mix of JSONObject ({}) and
> JSONArray ([]) as shown above in jsonDoc_2;*
> >
> > public static void main(String[] args){
> >
> > //create client-cache
> > ClientCache cache = new
> ClientCacheFactory().addPoolLocator(LOCATOR_HOST, PORT).create();
> > Region region = cache. PdxInstance>createClientRegionFactory(ClientRegionShortcut.CACHING_PROXY)
> > .create(REGION_NAME);
> >
> > //store json document
> > region.put("key", 
JSONFormatter.fromJSON(jsonDoc_2));
> >
> > //How to 

Re: Geode - store and query JSON documents

2020-11-23 Thread ankit Soni
Hi
I am looking for any means of querying (OQL/Lucene/API etc..?) this stored
data. Looking for achieving this functionality first and second, in a
performant way.

I shared the OQL like syntax, to share my use-case easily and based on some
reference found on doc. I am ok if a Lucene query or some other way can
fetch the results.

It will be of great help if you share the sample query/code fetching this
data .

Thanks
Ankit.


On Mon, 23 Nov 2020 at 22:43, Xiaojian Zhou  wrote:

> Anil:
>
> The syntax is OQL. But I understand they want to query JSON object base on
> the criteria.
>
> On 11/23/20, 9:08 AM, "Anilkumar Gingade"  wrote:
>
> Gester, Looking at the sample query, I Believe Ankit is asking about
> OQL query not Lucene...
>
> -Anil.
>
>
> On 11/23/20, 9:02 AM, "Xiaojian Zhou"  wrote:
>
> Ankit:
>
> Geode provided lucene query on json field. Your query can be
> supported.
>
> https://nam04.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgemfire.docs.pivotal.io%2F910%2Fgeode%2Ftools_modules%2Flucene_integration.htmldata=04%7C01%7Czhouxh%40vmware.com%7Ca1c897031e4b481a2f1508d88fd270f6%7Cb39138ca3cee4b4aa4d6cd83d9dd62f0%7C0%7C0%7C637417481290223899%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=pxnkFepPHN61G0wIyfROqIFx5J9aRdyg1GpGHN%2FCU74%3Dreserved=0
>
> However in above document, it did not provided a query example on
> JSON object.
>
> I can give you some sample code to query on JSON.
>
> Regards
> Xiaojian Zhou
>
> On 11/22/20, 11:53 AM, "ankit Soni" 
> wrote:
>
> Hello geode-devs, please provide a guidance on this.
>
> Ankit.
>
> On Sat, 21 Nov 2020 at 10:23, ankit Soni <
> ankit.soni.ge...@gmail.com> wrote:
>
> > Hello team,
> >
> > I am *evaluating usage of Geode (1.12) with storing JSON
> documents and
> > querying the same*. I am able to store the json records
> successfully in
> > geode but seeking guidance on how to query them.
> > More details on code and sample json is,
> >
> >
> > *Sample client-code*
> >
> > import org.apache.geode.cache.client.ClientCache;
> > import org.apache.geode.cache.client.ClientCacheFactory;
> > import org.apache.geode.cache.client.ClientRegionShortcut;
> > import org.apache.geode.pdx.JSONFormatter;
> > import org.apache.geode.pdx.PdxInstance;
> >
> > public class MyTest {
> >
> > *//NOTE: Below is truncated json, single json document
> can max contain an array of col1...col30 (30 diff attributes) within data. *
> > public final static  String jsonDoc_2 = "{" +
> > "\"data\":[{" +
> > "\"col1\": {" +
> > "\"k11\": \"aaa\"," +
> > "\"k12\":true," +
> > "\"k13\": ," +
> > "\"k14\":
> \"2020-12-31:00:00:00\"" +
> > "}," +
> > "\"col2\":[{" +
> > "\"k21\": \"22\"," +
> > "\"k22\": true" +
> > "}]" +
> > "}]" +
> > "}";
> >
> > * //NOTE: Col1col30 are mix of JSONObject ({}) and
> JSONArray ([]) as shown above in jsonDoc_2;*
> >
> > public static void main(String[] args){
> >
> > //create client-cache
> > ClientCache cache = new
> ClientCacheFactory().addPoolLocator(LOCATOR_HOST, PORT).create();
> > Region region = cache. PdxInstance>createClientRegionFactory(ClientRegionShortcut.CACHING_PROXY)
> > .create(REGION_NAME);
> >
> > //store json document
> > region.put("key", JSONFormatter.fromJSON(jsonDoc_2));
> >
> > //How to query json document like,
> >
> > // 1. select col2.k21, col1, col20 from /REGION_NAME
> where data.col2.k21 = '22' OR data.col2.k21 = '33'
> >
> > // 2. select col2.k21, col1.k11, col1 from
> /REGION_NAME where data.col1.k11 in ('aaa', 'xxx', 'yyy')
> > }
> > }
> >
> > *Server: Region-creation*
> >
> > gfsh> create region --name=REGION_NAME --type=PARTITION
> --redundant-copies=1 --total-num-buckets=61
> >
> >
> > 

Re: Geode - store and query JSON documents

2020-11-23 Thread Xiaojian Zhou
Anil:

The syntax is OQL. But I understand they want to query JSON object base on the 
criteria. 

On 11/23/20, 9:08 AM, "Anilkumar Gingade"  wrote:

Gester, Looking at the sample query, I Believe Ankit is asking about OQL 
query not Lucene...

-Anil.


On 11/23/20, 9:02 AM, "Xiaojian Zhou"  wrote:

Ankit:

Geode provided lucene query on json field. Your query can be supported. 

https://nam04.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgemfire.docs.pivotal.io%2F910%2Fgeode%2Ftools_modules%2Flucene_integration.htmldata=04%7C01%7Czhouxh%40vmware.com%7Ca1c897031e4b481a2f1508d88fd270f6%7Cb39138ca3cee4b4aa4d6cd83d9dd62f0%7C0%7C0%7C637417481290223899%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=pxnkFepPHN61G0wIyfROqIFx5J9aRdyg1GpGHN%2FCU74%3Dreserved=0

However in above document, it did not provided a query example on JSON 
object. 

I can give you some sample code to query on JSON.

Regards
Xiaojian Zhou

On 11/22/20, 11:53 AM, "ankit Soni"  wrote:

Hello geode-devs, please provide a guidance on this.

Ankit.

On Sat, 21 Nov 2020 at 10:23, ankit Soni 
 wrote:

> Hello team,
>
> I am *evaluating usage of Geode (1.12) with storing JSON 
documents and
> querying the same*. I am able to store the json records 
successfully in
> geode but seeking guidance on how to query them.
> More details on code and sample json is,
>
>
> *Sample client-code*
>
> import org.apache.geode.cache.client.ClientCache;
> import org.apache.geode.cache.client.ClientCacheFactory;
> import org.apache.geode.cache.client.ClientRegionShortcut;
> import org.apache.geode.pdx.JSONFormatter;
> import org.apache.geode.pdx.PdxInstance;
>
> public class MyTest {
>
> *//NOTE: Below is truncated json, single json document can 
max contain an array of col1...col30 (30 diff attributes) within data. *
> public final static  String jsonDoc_2 = "{" +
> "\"data\":[{" +
> "\"col1\": {" +
> "\"k11\": \"aaa\"," +
> "\"k12\":true," +
> "\"k13\": ," +
> "\"k14\": 
\"2020-12-31:00:00:00\"" +
> "}," +
> "\"col2\":[{" +
> "\"k21\": \"22\"," +
> "\"k22\": true" +
> "}]" +
> "}]" +
> "}";
>
> * //NOTE: Col1col30 are mix of JSONObject ({}) and 
JSONArray ([]) as shown above in jsonDoc_2;*
>
> public static void main(String[] args){
>
> //create client-cache
> ClientCache cache = new 
ClientCacheFactory().addPoolLocator(LOCATOR_HOST, PORT).create();
> Region region = cache.createClientRegionFactory(ClientRegionShortcut.CACHING_PROXY)
> .create(REGION_NAME);
>
> //store json document
> region.put("key", JSONFormatter.fromJSON(jsonDoc_2));
>
> //How to query json document like,
>
> // 1. select col2.k21, col1, col20 from /REGION_NAME 
where data.col2.k21 = '22' OR data.col2.k21 = '33'
>
> // 2. select col2.k21, col1.k11, col1 from /REGION_NAME 
where data.col1.k11 in ('aaa', 'xxx', 'yyy')
> }
> }
>
> *Server: Region-creation*
>
> gfsh> create region --name=REGION_NAME --type=PARTITION 
--redundant-copies=1 --total-num-buckets=61
>
>
> *Setup: Distributed cluster of 3 nodes
> *
>
> *My Observations/Problems*
> -  Put operation takes excessive time: region.put("key",
> JSONFormatter.fromJSON(jsonDoc_2));  - Fetching a single record 
from () a
> file and Storing in geode approx. takes . 3 secs
>Is there any suggestions/configuration related to 
JSONFormatter API or
> other to optimize this...?
>
> *Looking forward to guidance on querying this JOSN for above 
sample
> queries.*
>
> *Thanks*
> *Ankit*
>





Re: Geode - store and query JSON documents

2020-11-23 Thread Anilkumar Gingade
Gester, Looking at the sample query, I Believe Ankit is asking about OQL query 
not Lucene...

-Anil.


On 11/23/20, 9:02 AM, "Xiaojian Zhou"  wrote:

Ankit:

Geode provided lucene query on json field. Your query can be supported. 

https://nam04.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgemfire.docs.pivotal.io%2F910%2Fgeode%2Ftools_modules%2Flucene_integration.htmldata=04%7C01%7Cagingade%40vmware.com%7Cd513ee6b680c483830df08d88fd194f5%7Cb39138ca3cee4b4aa4d6cd83d9dd62f0%7C0%7C0%7C637417477593275133%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=l4RfUYfWLRnun%2BOYKtIE0pjkC047LsWBBNMdQb3MY2M%3Dreserved=0

However in above document, it did not provided a query example on JSON 
object. 

I can give you some sample code to query on JSON.

Regards
Xiaojian Zhou

On 11/22/20, 11:53 AM, "ankit Soni"  wrote:

Hello geode-devs, please provide a guidance on this.

Ankit.

On Sat, 21 Nov 2020 at 10:23, ankit Soni  
wrote:

> Hello team,
>
> I am *evaluating usage of Geode (1.12) with storing JSON documents and
> querying the same*. I am able to store the json records successfully 
in
> geode but seeking guidance on how to query them.
> More details on code and sample json is,
>
>
> *Sample client-code*
>
> import org.apache.geode.cache.client.ClientCache;
> import org.apache.geode.cache.client.ClientCacheFactory;
> import org.apache.geode.cache.client.ClientRegionShortcut;
> import org.apache.geode.pdx.JSONFormatter;
> import org.apache.geode.pdx.PdxInstance;
>
> public class MyTest {
>
> *//NOTE: Below is truncated json, single json document can max 
contain an array of col1...col30 (30 diff attributes) within data. *
> public final static  String jsonDoc_2 = "{" +
> "\"data\":[{" +
> "\"col1\": {" +
> "\"k11\": \"aaa\"," +
> "\"k12\":true," +
> "\"k13\": ," +
> "\"k14\": \"2020-12-31:00:00:00\"" +
> "}," +
> "\"col2\":[{" +
> "\"k21\": \"22\"," +
> "\"k22\": true" +
> "}]" +
> "}]" +
> "}";
>
> * //NOTE: Col1col30 are mix of JSONObject ({}) and JSONArray 
([]) as shown above in jsonDoc_2;*
>
> public static void main(String[] args){
>
> //create client-cache
> ClientCache cache = new 
ClientCacheFactory().addPoolLocator(LOCATOR_HOST, PORT).create();
> Region region = cache.createClientRegionFactory(ClientRegionShortcut.CACHING_PROXY)
> .create(REGION_NAME);
>
> //store json document
> region.put("key", JSONFormatter.fromJSON(jsonDoc_2));
>
> //How to query json document like,
>
> // 1. select col2.k21, col1, col20 from /REGION_NAME where 
data.col2.k21 = '22' OR data.col2.k21 = '33'
>
> // 2. select col2.k21, col1.k11, col1 from /REGION_NAME where 
data.col1.k11 in ('aaa', 'xxx', 'yyy')
> }
> }
>
> *Server: Region-creation*
>
> gfsh> create region --name=REGION_NAME --type=PARTITION 
--redundant-copies=1 --total-num-buckets=61
>
>
> *Setup: Distributed cluster of 3 nodes
> *
>
> *My Observations/Problems*
> -  Put operation takes excessive time: region.put("key",
> JSONFormatter.fromJSON(jsonDoc_2));  - Fetching a single record from 
() a
> file and Storing in geode approx. takes . 3 secs
>Is there any suggestions/configuration related to JSONFormatter 
API or
> other to optimize this...?
>
> *Looking forward to guidance on querying this JOSN for above sample
> queries.*
>
> *Thanks*
> *Ankit*
>




Re: Geode - store and query JSON documents

2020-11-23 Thread Xiaojian Zhou
Ankit:
 
Geode provided lucene query on json field. Your query can be supported. 
https://gemfire.docs.pivotal.io/910/geode/tools_modules/lucene_integration.html

However in above document, it did not provided a query example on JSON object. 

I can give you some sample code to query on JSON.

Regards
Xiaojian Zhou

On 11/22/20, 11:53 AM, "ankit Soni"  wrote:

Hello geode-devs, please provide a guidance on this.

Ankit.

On Sat, 21 Nov 2020 at 10:23, ankit Soni  wrote:

> Hello team,
>
> I am *evaluating usage of Geode (1.12) with storing JSON documents and
> querying the same*. I am able to store the json records successfully in
> geode but seeking guidance on how to query them.
> More details on code and sample json is,
>
>
> *Sample client-code*
>
> import org.apache.geode.cache.client.ClientCache;
> import org.apache.geode.cache.client.ClientCacheFactory;
> import org.apache.geode.cache.client.ClientRegionShortcut;
> import org.apache.geode.pdx.JSONFormatter;
> import org.apache.geode.pdx.PdxInstance;
>
> public class MyTest {
>
> *//NOTE: Below is truncated json, single json document can max 
contain an array of col1...col30 (30 diff attributes) within data. *
> public final static  String jsonDoc_2 = "{" +
> "\"data\":[{" +
> "\"col1\": {" +
> "\"k11\": \"aaa\"," +
> "\"k12\":true," +
> "\"k13\": ," +
> "\"k14\": \"2020-12-31:00:00:00\"" +
> "}," +
> "\"col2\":[{" +
> "\"k21\": \"22\"," +
> "\"k22\": true" +
> "}]" +
> "}]" +
> "}";
>
> * //NOTE: Col1col30 are mix of JSONObject ({}) and JSONArray ([]) 
as shown above in jsonDoc_2;*
>
> public static void main(String[] args){
>
> //create client-cache
> ClientCache cache = new 
ClientCacheFactory().addPoolLocator(LOCATOR_HOST, PORT).create();
> Region region = cache.createClientRegionFactory(ClientRegionShortcut.CACHING_PROXY)
> .create(REGION_NAME);
>
> //store json document
> region.put("key", JSONFormatter.fromJSON(jsonDoc_2));
>
> //How to query json document like,
>
> // 1. select col2.k21, col1, col20 from /REGION_NAME where 
data.col2.k21 = '22' OR data.col2.k21 = '33'
>
> // 2. select col2.k21, col1.k11, col1 from /REGION_NAME where 
data.col1.k11 in ('aaa', 'xxx', 'yyy')
> }
> }
>
> *Server: Region-creation*
>
> gfsh> create region --name=REGION_NAME --type=PARTITION 
--redundant-copies=1 --total-num-buckets=61
>
>
> *Setup: Distributed cluster of 3 nodes
> *
>
> *My Observations/Problems*
> -  Put operation takes excessive time: region.put("key",
> JSONFormatter.fromJSON(jsonDoc_2));  - Fetching a single record from () a
> file and Storing in geode approx. takes . 3 secs
>Is there any suggestions/configuration related to JSONFormatter API or
> other to optimize this...?
>
> *Looking forward to guidance on querying this JOSN for above sample
> queries.*
>
> *Thanks*
> *Ankit*
>



Re: Geode - store and query JSON documents

2020-11-22 Thread Mario Salazar de Torres
Hi @ankit Soni<mailto:ankit.soni.ge...@gmail.com>,

I would say the kind of request you want to execute can't be (or at least 
easily) done. And let me explain what I mean.
JSON objects are encapsulated as something called PdxInstance's and there are 
certain restrictions when it comes to querying these type objects:

You can't make queries iterating over the elements of an array. I.E's:

  *   SELECT data[*].col1 FROM /REGION_NAME WHERE data[*].col1
  *   SELECT data[*].col1 FROM /REGION_NAME WHERE data[*].col2[*].k21 = '22'
  *   SELECT * FROM /REGION_NAME WHERE data[*].col2[*].k21 = '22'

The query syntax in a Elasticsearch fashion is not available in Geode as I am 
aware.
Maybe, someone else know If there is a way to execute these queries with Lucene?

Sorry not to be able to help too much :S
BR,
Mario.

From: ankit Soni 
Sent: Sunday, November 22, 2020 8:52 PM
To: dev@geode.apache.org 
Subject: Re: Geode - store and query JSON documents

Hello geode-devs, please provide a guidance on this.

Ankit.

On Sat, 21 Nov 2020 at 10:23, ankit Soni  wrote:

> Hello team,
>
> I am *evaluating usage of Geode (1.12) with storing JSON documents and
> querying the same*. I am able to store the json records successfully in
> geode but seeking guidance on how to query them.
> More details on code and sample json is,
>
>
> *Sample client-code*
>
> import org.apache.geode.cache.client.ClientCache;
> import org.apache.geode.cache.client.ClientCacheFactory;
> import org.apache.geode.cache.client.ClientRegionShortcut;
> import org.apache.geode.pdx.JSONFormatter;
> import org.apache.geode.pdx.PdxInstance;
>
> public class MyTest {
>
> *//NOTE: Below is truncated json, single json document can max contain an 
> array of col1...col30 (30 diff attributes) within data. *
> public final static  String jsonDoc_2 = "{" +
> "\"data\":[{" +
> "\"col1\": {" +
> "\"k11\": \"aaa\"," +
> "\"k12\":true," +
> "\"k13\": ," +
> "\"k14\": \"2020-12-31:00:00:00\"" +
> "}," +
> "\"col2\":[{" +
> "\"k21\": \"22\"," +
> "\"k22\": true" +
> "}]" +
> "}]" +
> "}";
>
> * //NOTE: Col1col30 are mix of JSONObject ({}) and JSONArray ([]) as 
> shown above in jsonDoc_2;*
>
> public static void main(String[] args){
>
> //create client-cache
> ClientCache cache = new 
> ClientCacheFactory().addPoolLocator(LOCATOR_HOST, PORT).create();
> Region region = cache. PdxInstance>createClientRegionFactory(ClientRegionShortcut.CACHING_PROXY)
> .create(REGION_NAME);
>
> //store json document
> region.put("key", JSONFormatter.fromJSON(jsonDoc_2));
>
> //How to query json document like,
>
> // 1. select col2.k21, col1, col20 from /REGION_NAME where 
> data.col2.k21 = '22' OR data.col2.k21 = '33'
>
> // 2. select col2.k21, col1.k11, col1 from /REGION_NAME where 
> data.col1.k11 in ('aaa', 'xxx', 'yyy')
> }
> }
>
> *Server: Region-creation*
>
> gfsh> create region --name=REGION_NAME --type=PARTITION --redundant-copies=1 
> --total-num-buckets=61
>
>
> *Setup: Distributed cluster of 3 nodes
> *
>
> *My Observations/Problems*
> -  Put operation takes excessive time: region.put("key",
> JSONFormatter.fromJSON(jsonDoc_2));  - Fetching a single record from () a
> file and Storing in geode approx. takes . 3 secs
>Is there any suggestions/configuration related to JSONFormatter API or
> other to optimize this...?
>
> *Looking forward to guidance on querying this JOSN for above sample
> queries.*
>
> *Thanks*
> *Ankit*
>


Re: Geode - store and query JSON documents

2020-11-22 Thread ankit Soni
Hello geode-devs, please provide a guidance on this.

Ankit.

On Sat, 21 Nov 2020 at 10:23, ankit Soni  wrote:

> Hello team,
>
> I am *evaluating usage of Geode (1.12) with storing JSON documents and
> querying the same*. I am able to store the json records successfully in
> geode but seeking guidance on how to query them.
> More details on code and sample json is,
>
>
> *Sample client-code*
>
> import org.apache.geode.cache.client.ClientCache;
> import org.apache.geode.cache.client.ClientCacheFactory;
> import org.apache.geode.cache.client.ClientRegionShortcut;
> import org.apache.geode.pdx.JSONFormatter;
> import org.apache.geode.pdx.PdxInstance;
>
> public class MyTest {
>
> *//NOTE: Below is truncated json, single json document can max contain an 
> array of col1...col30 (30 diff attributes) within data. *
> public final static  String jsonDoc_2 = "{" +
> "\"data\":[{" +
> "\"col1\": {" +
> "\"k11\": \"aaa\"," +
> "\"k12\":true," +
> "\"k13\": ," +
> "\"k14\": \"2020-12-31:00:00:00\"" +
> "}," +
> "\"col2\":[{" +
> "\"k21\": \"22\"," +
> "\"k22\": true" +
> "}]" +
> "}]" +
> "}";
>
> * //NOTE: Col1col30 are mix of JSONObject ({}) and JSONArray ([]) as 
> shown above in jsonDoc_2;*
>
> public static void main(String[] args){
>
> //create client-cache
> ClientCache cache = new 
> ClientCacheFactory().addPoolLocator(LOCATOR_HOST, PORT).create();
> Region region = cache. PdxInstance>createClientRegionFactory(ClientRegionShortcut.CACHING_PROXY)
> .create(REGION_NAME);
>
> //store json document
> region.put("key", JSONFormatter.fromJSON(jsonDoc_2));
>
> //How to query json document like,
>
> // 1. select col2.k21, col1, col20 from /REGION_NAME where 
> data.col2.k21 = '22' OR data.col2.k21 = '33'
>
> // 2. select col2.k21, col1.k11, col1 from /REGION_NAME where 
> data.col1.k11 in ('aaa', 'xxx', 'yyy')
> }
> }
>
> *Server: Region-creation*
>
> gfsh> create region --name=REGION_NAME --type=PARTITION --redundant-copies=1 
> --total-num-buckets=61
>
>
> *Setup: Distributed cluster of 3 nodes
> *
>
> *My Observations/Problems*
> -  Put operation takes excessive time: region.put("key",
> JSONFormatter.fromJSON(jsonDoc_2));  - Fetching a single record from () a
> file and Storing in geode approx. takes . 3 secs
>Is there any suggestions/configuration related to JSONFormatter API or
> other to optimize this...?
>
> *Looking forward to guidance on querying this JOSN for above sample
> queries.*
>
> *Thanks*
> *Ankit*
>


Geode - store and query JSON documents

2020-11-20 Thread ankit Soni
 Hello team,

I am *evaluating usage of Geode (1.12) with storing JSON documents and
querying the same*. I am able to store the json records successfully in
geode but seeking guidance on how to query them.
More details on code and sample json is,


*Sample client-code*

import org.apache.geode.cache.client.ClientCache;
import org.apache.geode.cache.client.ClientCacheFactory;
import org.apache.geode.cache.client.ClientRegionShortcut;
import org.apache.geode.pdx.JSONFormatter;
import org.apache.geode.pdx.PdxInstance;

public class MyTest {

*//NOTE: Below is truncated json, single json document can max
contain an array of col1...col30 (30 diff attributes) within data. *
public final static  String jsonDoc_2 = "{" +
"\"data\":[{" +
"\"col1\": {" +
"\"k11\": \"aaa\"," +
"\"k12\":true," +
"\"k13\": ," +
"\"k14\": \"2020-12-31:00:00:00\"" +
"}," +
"\"col2\":[{" +
"\"k21\": \"22\"," +
"\"k22\": true" +
"}]" +
"}]" +
"}";

* //NOTE: Col1col30 are mix of JSONObject ({}) and JSONArray
([]) as shown above in jsonDoc_2;*

public static void main(String[] args){

//create client-cache
ClientCache cache = new
ClientCacheFactory().addPoolLocator(LOCATOR_HOST, PORT).create();
Region region = cache.createClientRegionFactory(ClientRegionShortcut.CACHING_PROXY)
.create(REGION_NAME);

//store json document
region.put("key", JSONFormatter.fromJSON(jsonDoc_2));

//How to query json document like,

// 1. select col2.k21, col1, col20 from /REGION_NAME where
data.col2.k21 = '22' OR data.col2.k21 = '33'

// 2. select col2.k21, col1.k11, col1 from /REGION_NAME where
data.col1.k11 in ('aaa', 'xxx', 'yyy')
}
}

*Server: Region-creation*

gfsh> create region --name=REGION_NAME --type=PARTITION
--redundant-copies=1 --total-num-buckets=61


*Setup: Distributed cluster of 3 nodes
*

*My Observations/Problems*
-  Put operation takes excessive time: region.put("key",
JSONFormatter.fromJSON(jsonDoc_2));  - Fetching a single record from () a
file and Storing in geode approx. takes . 3 secs
   Is there any suggestions/configuration related to JSONFormatter API or
other to optimize this...?

*Looking forward to guidance on querying this JOSN for above sample
queries.*

*Thanks*
*Ankit*