Changeset: 1bef4498b2fd for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=1bef4498b2fd
Modified Files:
        monetdb5/extras/rdf/rdf.h
        monetdb5/extras/rdf/rdfalgebra.c
        monetdb5/extras/rdf/rdfscan.c
        monetdb5/extras/rdf/rdfscan.h
        monetdb5/extras/rdf/rdfschema.c
Branch: rdf
Log Message:

Make persistent bat for tknzr-converted_oid mapping


diffs (truncated from 365 to 300 lines):

diff --git a/monetdb5/extras/rdf/rdf.h b/monetdb5/extras/rdf/rdf.h
--- a/monetdb5/extras/rdf/rdf.h
+++ b/monetdb5/extras/rdf/rdf.h
@@ -58,6 +58,12 @@ TKNZRrdf2str (bat *res, bat *bid, bat *m
 rdf_export str
 RDFpartialjoin (bat *res, bat *lmap, bat *rmap, bat *input); 
 
+rdf_export str
+RDFtriplesubsort(BAT **sbat, BAT **pbat, BAT **obat); 
+
+rdf_export str
+RDFbisubsort(BAT **lbat, BAT **rbat); 
+
 #define RDF_MIN_LITERAL (((oid) 1) << ((sizeof(oid)==8)?59:27))
 
 
diff --git a/monetdb5/extras/rdf/rdfalgebra.c b/monetdb5/extras/rdf/rdfalgebra.c
--- a/monetdb5/extras/rdf/rdfalgebra.c
+++ b/monetdb5/extras/rdf/rdfalgebra.c
@@ -167,6 +167,70 @@ RDFpartialjoin(bat *retid, bat *lid, bat
        return MAL_SUCCEED; 
 }
 
+str RDFtriplesubsort(BAT **sbat, BAT **pbat, BAT **obat){
+
+       BAT *o1,*o2,*o3;
+       BAT *g1,*g2,*g3;
+       BAT *S = NULL, *P = NULL, *O = NULL;
+
+       S = *sbat;
+       P = *pbat;
+       O = *obat;
+       /* order SPO/SOP */
+       if (BATsubsort(sbat, &o1, &g1, S, NULL, NULL, 0, 0) == GDK_FAIL){
+               if (S != NULL) BBPreclaim(S);
+               throw(RDF, "rdf.triplesubsort", "Fail in sorting for S");
+       }
+
+       if (BATsubsort(pbat, &o2, &g2, P, o1, g1, 0, 0) == GDK_FAIL){
+               BBPreclaim(S);
+               if (P != NULL) BBPreclaim(P);
+               throw(RDF, "rdf.triplesubsort", "Fail in sub-sorting for P");
+       }
+       if (BATsubsort(obat, &o3, &g3, O, o2, g2, 0, 0) == GDK_FAIL){
+               BBPreclaim(S);
+               BBPreclaim(P);
+               if (O != NULL) BBPreclaim(O);
+               throw(RDF, "rdf.triplesubsort", "Fail in sub-sorting for O");
+       }       
+
+       BBPunfix(o2->batCacheid);
+       BBPunfix(g2->batCacheid);
+       BBPunfix(o3->batCacheid);
+       BBPunfix(g3->batCacheid);
+
+       return MAL_SUCCEED; 
+}
+
+/*
+ * Sort left bat and re-order right bat according to the lef bat
+ * */
+str RDFbisubsort(BAT **lbat, BAT **rbat){
+
+       BAT *o1,*o2;
+       BAT *g1,*g2;
+       BAT *L = NULL, *R = NULL;
+
+       L = *lbat;
+       R = *rbat;
+       if (BATsubsort(lbat, &o1, &g1, L, NULL, NULL, 0, 0) == GDK_FAIL){
+               if (L != NULL) BBPreclaim(L);
+               throw(RDF, "rdf.triplesubsort", "Fail in sorting for L");
+       }
+
+       if (BATsubsort(rbat, &o2, &g2, R, o1, g1, 0, 0) == GDK_FAIL){
+               BBPreclaim(L);
+               if (R != NULL) BBPreclaim(R);
+               throw(RDF, "rdf.triplesubsort", "Fail in sub-sorting for R");
+       }
+
+       BBPunfix(o1->batCacheid);
+       BBPunfix(g1->batCacheid);
+       BBPunfix(o2->batCacheid);
+       BBPunfix(g2->batCacheid);
+
+       return MAL_SUCCEED; 
+}
 
 str
 TKNZRrdf2str(bat *res, bat *bid, bat *map)
diff --git a/monetdb5/extras/rdf/rdfscan.c b/monetdb5/extras/rdf/rdfscan.c
--- a/monetdb5/extras/rdf/rdfscan.c
+++ b/monetdb5/extras/rdf/rdfscan.c
@@ -37,18 +37,21 @@
 #define MAX_PARAMS_NO  20
 
 static 
-void queryParser(RdfScanParams *rsParam, str query, str schema){
+str queryParser(RdfScanParams *rsParam, str query, str schema){
 
        int paramNo; 
        str parts[MAX_PARAMS_NO]; 
        int i = 0, j; 
-       int numRP = 0; 
-       int numOP = 0; 
-       int opIdx;
-       int rpIdx;
+       int numRP = 0, numOP = 0; 
+       int opIdx, rpIdx;
+       int ret; 
        
        (void) schema; 
 
+       if (TKNZRopen (NULL, &schema) != MAL_SUCCEED) {
+               throw(RDF, "rdf.rdfschema", "could not open the tokenizer\n");
+       }
+       
        paramNo = TKNZRtokenize(query, parts, '|');
        
        printf("Number of params from query %s is: %d \n", query, paramNo);
@@ -68,19 +71,19 @@ void queryParser(RdfScanParams *rsParam,
        printf("Number of RPs is %d \n", numRP);
 
        rsParam->numRP = numRP; 
-       rsParam->lstRPs = (char **)GDKmalloc(sizeof(char*) * rsParam->numRP); 
+       rsParam->lstRPstr = (char **)GDKmalloc(sizeof(char*) * rsParam->numRP); 
        rsParam->lstLow = (char **)GDKmalloc(sizeof(char*) * rsParam->numRP); 
        rsParam->lstHi = (char **)GDKmalloc(sizeof(char*) * rsParam->numRP); 
 
        rsParam->numOP = numOP;
-       rsParam->lstOPs = (char **)GDKmalloc(sizeof(char*) * rsParam->numOP); 
+       rsParam->lstOPstr = (char **)GDKmalloc(sizeof(char*) * rsParam->numOP); 
        
        opIdx = 0;
        rpIdx = 0; 
        for (i = 0; i < paramNo; i++){
                if (parts[i][0] == '?'){        //optional param
                        parts[i]++;                     
-                       rsParam->lstOPs[opIdx] = GDKstrdup(parts[i]);
+                       rsParam->lstOPstr[opIdx] = GDKstrdup(parts[i]);
                        opIdx++;
                }
                else{
@@ -113,11 +116,16 @@ void queryParser(RdfScanParams *rsParam,
                                }
                        }
                        
-                       rsParam->lstRPs[rpIdx] = GDKstrdup(parts[i]);
+                       rsParam->lstRPstr[rpIdx] = GDKstrdup(parts[i]);
 
                        rpIdx++;
                }
        }
+
+
+       TKNZRclose(&ret);
+
+       return MAL_SUCCEED; 
 }
 
 static
@@ -125,12 +133,12 @@ void printParams(RdfScanParams *rsParam)
        int i; 
 
        for (i = 0; i < rsParam->numRP; i++){
-               printf("RP[%d] = %s\n", i, rsParam->lstRPs[i]); 
+               printf("RP[%d] = %s\n", i, rsParam->lstRPstr[i]); 
                if (rsParam->lstLow[i] != NULL) printf("   Low 
%s\n",rsParam->lstLow[i]);
                if (rsParam->lstHi[i] != NULL) printf("   Hi 
%s\n",rsParam->lstHi[i]);
        }
        for (i = 0; i < rsParam->numOP; i++){
-               printf("OP[%d] = %s\n", i, rsParam->lstOPs[i]);
+               printf("OP[%d] = %s\n", i, rsParam->lstOPstr[i]);
        }       
        
 }
@@ -139,18 +147,18 @@ static
 void freeParams(RdfScanParams *rsParam){
        int i; 
        for (i = 0; i < rsParam->numRP; i++){
-               GDKfree(rsParam->lstRPs[i]);
+               GDKfree(rsParam->lstRPstr[i]);
                if (rsParam->lstLow[i] != NULL) GDKfree(rsParam->lstLow[i]);
                if (rsParam->lstHi[i] != NULL) GDKfree(rsParam->lstHi[i]);
        }
-       GDKfree(rsParam->lstRPs);
+       GDKfree(rsParam->lstRPstr);
        GDKfree(rsParam->lstLow);
        GDKfree(rsParam->lstHi); 
 
        for (i = 0; i < rsParam->numOP; i++){
-               GDKfree(rsParam->lstOPs[i]);
+               GDKfree(rsParam->lstOPstr[i]);
        }
-       GDKfree(rsParam->lstOPs);
+       GDKfree(rsParam->lstOPstr);
 
        GDKfree(rsParam); 
 
diff --git a/monetdb5/extras/rdf/rdfscan.h b/monetdb5/extras/rdf/rdfscan.h
--- a/monetdb5/extras/rdf/rdfscan.h
+++ b/monetdb5/extras/rdf/rdfscan.h
@@ -31,11 +31,13 @@
 
 typedef struct RdfScanParams {
        char    schema[20]; 
-       char**  lstRPs;         /* List of required params */   
-       char**  lstLow; /* Low bound of RP*/
-       char**  lstHi;  /* High bound of RP*/
+       char    **lstRPstr;     /* List of required params (string format) */   
+       oid     *lstRP;         /* List of required params' oids */
+       char    **lstLow;       /* Low bound of RP*/
+       char    **lstHi;        /* High bound of RP*/
        int     numRP;  /* Number of required params */
-       char**  lstOPs; /* List of optional params */
+       char    **lstOPstr;     /* List of optional params (string format) */
+       oid     *lstOP;         /* List of optional params' oids */
        int     numOP;
        
 } RdfScanParams; 
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -10091,41 +10091,6 @@ str getOrigPbt(oid *pbt, oid *origPbt, B
        return MAL_SUCCEED; 
 }
 
-static
-str triplesubsort(BAT **sbat, BAT **pbat, BAT **obat){
-
-       BAT *o1,*o2,*o3;
-       BAT *g1,*g2,*g3;
-       BAT *S = NULL, *P = NULL, *O = NULL;
-
-       S = *sbat;
-       P = *pbat;
-       O = *obat;
-       /* order SPO/SOP */
-       if (BATsubsort(sbat, &o1, &g1, S, NULL, NULL, 0, 0) == GDK_FAIL){
-               if (S != NULL) BBPreclaim(S);
-               throw(RDF, "rdf.triplesubsort", "Fail in sorting for S");
-       }
-
-       if (BATsubsort(pbat, &o2, &g2, P, o1, g1, 0, 0) == GDK_FAIL){
-               BBPreclaim(S);
-               if (P != NULL) BBPreclaim(P);
-               throw(RDF, "rdf.triplesubsort", "Fail in sub-sorting for P");
-       }
-       if (BATsubsort(obat, &o3, &g3, O, o2, g2, 0, 0) == GDK_FAIL){
-               BBPreclaim(S);
-               BBPreclaim(P);
-               if (O != NULL) BBPreclaim(O);
-               throw(RDF, "rdf.triplesubsort", "Fail in sub-sorting for O");
-       }       
-
-       BBPunfix(o2->batCacheid);
-       BBPunfix(g2->batCacheid);
-       BBPunfix(o3->batCacheid);
-       BBPunfix(g3->batCacheid);
-
-       return MAL_SUCCEED; 
-}
 
 static 
 BAT* BATnewPropSet(int ht, int tt, BUN cap){
@@ -11254,6 +11219,9 @@ str RDFdistTriplesToCSs(int *ret, bat *s
  * 
  * Input String --> Original TKNZR oid --> New Oid (TKNRZ_to_new_MapBAT)
  * New oid --> Original TKNZR oid --> Input String (New_to_TKNZR_MapBat)
+ * 
+ * To convert from mapId to tknz Id, we use the lmap, rmap bats. 
+ *
  * */
 static 
 str buildTKNZRMappingBat(BAT *lmap, BAT *rmap){
@@ -11263,9 +11231,30 @@ str buildTKNZRMappingBat(BAT *lmap, BAT 
        int     ret; 
        int     num = 0; 
        bat     mapBatId; 
-       BAT     *mapBat;        
-       str     batname = NULL; 
-
+       BAT     *tmpmapBat = NULL, *pMapBat = NULL;     
+       str     bname = NULL, bnamelBat = NULL, bnamerBat = NULL; 
+       bat     *lstCommits = NULL; 
+
+       /* Check if the bat has already built */
+       bname = (str) GDKmalloc(50 * sizeof(char));
+       snprintf(bname, 50, "tknzr_to_map");
+
+       bnamelBat = (str) GDKmalloc(50 * sizeof(char));
+       snprintf(bnamelBat, 50, "map_to_tknz_left");
+
+       bnamerBat = (str) GDKmalloc(50 * sizeof(char));
+       snprintf(bnamerBat, 50, "map_to_tknz_right");
+
+
+       mapBatId = BBPindex(bname); 
+       if (mapBatId != 0){
+               printf("The tokenizer-mapping-bat %s has been built \n", 
bname); 
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to