Changeset: 1bef4498b2fd for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=1bef4498b2fd Modified Files: monetdb5/extras/rdf/rdf.h monetdb5/extras/rdf/rdfalgebra.c monetdb5/extras/rdf/rdfscan.c monetdb5/extras/rdf/rdfscan.h monetdb5/extras/rdf/rdfschema.c Branch: rdf Log Message:
Make persistent bat for tknzr-converted_oid mapping diffs (truncated from 365 to 300 lines): diff --git a/monetdb5/extras/rdf/rdf.h b/monetdb5/extras/rdf/rdf.h --- a/monetdb5/extras/rdf/rdf.h +++ b/monetdb5/extras/rdf/rdf.h @@ -58,6 +58,12 @@ TKNZRrdf2str (bat *res, bat *bid, bat *m rdf_export str RDFpartialjoin (bat *res, bat *lmap, bat *rmap, bat *input); +rdf_export str +RDFtriplesubsort(BAT **sbat, BAT **pbat, BAT **obat); + +rdf_export str +RDFbisubsort(BAT **lbat, BAT **rbat); + #define RDF_MIN_LITERAL (((oid) 1) << ((sizeof(oid)==8)?59:27)) diff --git a/monetdb5/extras/rdf/rdfalgebra.c b/monetdb5/extras/rdf/rdfalgebra.c --- a/monetdb5/extras/rdf/rdfalgebra.c +++ b/monetdb5/extras/rdf/rdfalgebra.c @@ -167,6 +167,70 @@ RDFpartialjoin(bat *retid, bat *lid, bat return MAL_SUCCEED; } +str RDFtriplesubsort(BAT **sbat, BAT **pbat, BAT **obat){ + + BAT *o1,*o2,*o3; + BAT *g1,*g2,*g3; + BAT *S = NULL, *P = NULL, *O = NULL; + + S = *sbat; + P = *pbat; + O = *obat; + /* order SPO/SOP */ + if (BATsubsort(sbat, &o1, &g1, S, NULL, NULL, 0, 0) == GDK_FAIL){ + if (S != NULL) BBPreclaim(S); + throw(RDF, "rdf.triplesubsort", "Fail in sorting for S"); + } + + if (BATsubsort(pbat, &o2, &g2, P, o1, g1, 0, 0) == GDK_FAIL){ + BBPreclaim(S); + if (P != NULL) BBPreclaim(P); + throw(RDF, "rdf.triplesubsort", "Fail in sub-sorting for P"); + } + if (BATsubsort(obat, &o3, &g3, O, o2, g2, 0, 0) == GDK_FAIL){ + BBPreclaim(S); + BBPreclaim(P); + if (O != NULL) BBPreclaim(O); + throw(RDF, "rdf.triplesubsort", "Fail in sub-sorting for O"); + } + + BBPunfix(o2->batCacheid); + BBPunfix(g2->batCacheid); + BBPunfix(o3->batCacheid); + BBPunfix(g3->batCacheid); + + return MAL_SUCCEED; +} + +/* + * Sort left bat and re-order right bat according to the lef bat + * */ +str RDFbisubsort(BAT **lbat, BAT **rbat){ + + BAT *o1,*o2; + BAT *g1,*g2; + BAT *L = NULL, *R = NULL; + + L = *lbat; + R = *rbat; + if (BATsubsort(lbat, &o1, &g1, L, NULL, NULL, 0, 0) == GDK_FAIL){ + if (L != NULL) BBPreclaim(L); + throw(RDF, "rdf.triplesubsort", "Fail in sorting for L"); + } + + if (BATsubsort(rbat, &o2, &g2, R, o1, g1, 0, 0) == GDK_FAIL){ + BBPreclaim(L); + if (R != NULL) BBPreclaim(R); + throw(RDF, "rdf.triplesubsort", "Fail in sub-sorting for R"); + } + + BBPunfix(o1->batCacheid); + BBPunfix(g1->batCacheid); + BBPunfix(o2->batCacheid); + BBPunfix(g2->batCacheid); + + return MAL_SUCCEED; +} str TKNZRrdf2str(bat *res, bat *bid, bat *map) diff --git a/monetdb5/extras/rdf/rdfscan.c b/monetdb5/extras/rdf/rdfscan.c --- a/monetdb5/extras/rdf/rdfscan.c +++ b/monetdb5/extras/rdf/rdfscan.c @@ -37,18 +37,21 @@ #define MAX_PARAMS_NO 20 static -void queryParser(RdfScanParams *rsParam, str query, str schema){ +str queryParser(RdfScanParams *rsParam, str query, str schema){ int paramNo; str parts[MAX_PARAMS_NO]; int i = 0, j; - int numRP = 0; - int numOP = 0; - int opIdx; - int rpIdx; + int numRP = 0, numOP = 0; + int opIdx, rpIdx; + int ret; (void) schema; + if (TKNZRopen (NULL, &schema) != MAL_SUCCEED) { + throw(RDF, "rdf.rdfschema", "could not open the tokenizer\n"); + } + paramNo = TKNZRtokenize(query, parts, '|'); printf("Number of params from query %s is: %d \n", query, paramNo); @@ -68,19 +71,19 @@ void queryParser(RdfScanParams *rsParam, printf("Number of RPs is %d \n", numRP); rsParam->numRP = numRP; - rsParam->lstRPs = (char **)GDKmalloc(sizeof(char*) * rsParam->numRP); + rsParam->lstRPstr = (char **)GDKmalloc(sizeof(char*) * rsParam->numRP); rsParam->lstLow = (char **)GDKmalloc(sizeof(char*) * rsParam->numRP); rsParam->lstHi = (char **)GDKmalloc(sizeof(char*) * rsParam->numRP); rsParam->numOP = numOP; - rsParam->lstOPs = (char **)GDKmalloc(sizeof(char*) * rsParam->numOP); + rsParam->lstOPstr = (char **)GDKmalloc(sizeof(char*) * rsParam->numOP); opIdx = 0; rpIdx = 0; for (i = 0; i < paramNo; i++){ if (parts[i][0] == '?'){ //optional param parts[i]++; - rsParam->lstOPs[opIdx] = GDKstrdup(parts[i]); + rsParam->lstOPstr[opIdx] = GDKstrdup(parts[i]); opIdx++; } else{ @@ -113,11 +116,16 @@ void queryParser(RdfScanParams *rsParam, } } - rsParam->lstRPs[rpIdx] = GDKstrdup(parts[i]); + rsParam->lstRPstr[rpIdx] = GDKstrdup(parts[i]); rpIdx++; } } + + + TKNZRclose(&ret); + + return MAL_SUCCEED; } static @@ -125,12 +133,12 @@ void printParams(RdfScanParams *rsParam) int i; for (i = 0; i < rsParam->numRP; i++){ - printf("RP[%d] = %s\n", i, rsParam->lstRPs[i]); + printf("RP[%d] = %s\n", i, rsParam->lstRPstr[i]); if (rsParam->lstLow[i] != NULL) printf(" Low %s\n",rsParam->lstLow[i]); if (rsParam->lstHi[i] != NULL) printf(" Hi %s\n",rsParam->lstHi[i]); } for (i = 0; i < rsParam->numOP; i++){ - printf("OP[%d] = %s\n", i, rsParam->lstOPs[i]); + printf("OP[%d] = %s\n", i, rsParam->lstOPstr[i]); } } @@ -139,18 +147,18 @@ static void freeParams(RdfScanParams *rsParam){ int i; for (i = 0; i < rsParam->numRP; i++){ - GDKfree(rsParam->lstRPs[i]); + GDKfree(rsParam->lstRPstr[i]); if (rsParam->lstLow[i] != NULL) GDKfree(rsParam->lstLow[i]); if (rsParam->lstHi[i] != NULL) GDKfree(rsParam->lstHi[i]); } - GDKfree(rsParam->lstRPs); + GDKfree(rsParam->lstRPstr); GDKfree(rsParam->lstLow); GDKfree(rsParam->lstHi); for (i = 0; i < rsParam->numOP; i++){ - GDKfree(rsParam->lstOPs[i]); + GDKfree(rsParam->lstOPstr[i]); } - GDKfree(rsParam->lstOPs); + GDKfree(rsParam->lstOPstr); GDKfree(rsParam); diff --git a/monetdb5/extras/rdf/rdfscan.h b/monetdb5/extras/rdf/rdfscan.h --- a/monetdb5/extras/rdf/rdfscan.h +++ b/monetdb5/extras/rdf/rdfscan.h @@ -31,11 +31,13 @@ typedef struct RdfScanParams { char schema[20]; - char** lstRPs; /* List of required params */ - char** lstLow; /* Low bound of RP*/ - char** lstHi; /* High bound of RP*/ + char **lstRPstr; /* List of required params (string format) */ + oid *lstRP; /* List of required params' oids */ + char **lstLow; /* Low bound of RP*/ + char **lstHi; /* High bound of RP*/ int numRP; /* Number of required params */ - char** lstOPs; /* List of optional params */ + char **lstOPstr; /* List of optional params (string format) */ + oid *lstOP; /* List of optional params' oids */ int numOP; } RdfScanParams; diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c --- a/monetdb5/extras/rdf/rdfschema.c +++ b/monetdb5/extras/rdf/rdfschema.c @@ -10091,41 +10091,6 @@ str getOrigPbt(oid *pbt, oid *origPbt, B return MAL_SUCCEED; } -static -str triplesubsort(BAT **sbat, BAT **pbat, BAT **obat){ - - BAT *o1,*o2,*o3; - BAT *g1,*g2,*g3; - BAT *S = NULL, *P = NULL, *O = NULL; - - S = *sbat; - P = *pbat; - O = *obat; - /* order SPO/SOP */ - if (BATsubsort(sbat, &o1, &g1, S, NULL, NULL, 0, 0) == GDK_FAIL){ - if (S != NULL) BBPreclaim(S); - throw(RDF, "rdf.triplesubsort", "Fail in sorting for S"); - } - - if (BATsubsort(pbat, &o2, &g2, P, o1, g1, 0, 0) == GDK_FAIL){ - BBPreclaim(S); - if (P != NULL) BBPreclaim(P); - throw(RDF, "rdf.triplesubsort", "Fail in sub-sorting for P"); - } - if (BATsubsort(obat, &o3, &g3, O, o2, g2, 0, 0) == GDK_FAIL){ - BBPreclaim(S); - BBPreclaim(P); - if (O != NULL) BBPreclaim(O); - throw(RDF, "rdf.triplesubsort", "Fail in sub-sorting for O"); - } - - BBPunfix(o2->batCacheid); - BBPunfix(g2->batCacheid); - BBPunfix(o3->batCacheid); - BBPunfix(g3->batCacheid); - - return MAL_SUCCEED; -} static BAT* BATnewPropSet(int ht, int tt, BUN cap){ @@ -11254,6 +11219,9 @@ str RDFdistTriplesToCSs(int *ret, bat *s * * Input String --> Original TKNZR oid --> New Oid (TKNRZ_to_new_MapBAT) * New oid --> Original TKNZR oid --> Input String (New_to_TKNZR_MapBat) + * + * To convert from mapId to tknz Id, we use the lmap, rmap bats. + * * */ static str buildTKNZRMappingBat(BAT *lmap, BAT *rmap){ @@ -11263,9 +11231,30 @@ str buildTKNZRMappingBat(BAT *lmap, BAT int ret; int num = 0; bat mapBatId; - BAT *mapBat; - str batname = NULL; - + BAT *tmpmapBat = NULL, *pMapBat = NULL; + str bname = NULL, bnamelBat = NULL, bnamerBat = NULL; + bat *lstCommits = NULL; + + /* Check if the bat has already built */ + bname = (str) GDKmalloc(50 * sizeof(char)); + snprintf(bname, 50, "tknzr_to_map"); + + bnamelBat = (str) GDKmalloc(50 * sizeof(char)); + snprintf(bnamelBat, 50, "map_to_tknz_left"); + + bnamerBat = (str) GDKmalloc(50 * sizeof(char)); + snprintf(bnamerBat, 50, "map_to_tknz_right"); + + + mapBatId = BBPindex(bname); + if (mapBatId != 0){ + printf("The tokenizer-mapping-bat %s has been built \n", bname); _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list