Changeset: d63ce66b83cd for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=d63ce66b83cd Modified Files: monetdb5/extras/rdf/rdfschema.c monetdb5/extras/rdf/rdfschema.h Branch: rdf Log Message:
Get the table/column in relational representation for each property (considering its type). diffs (200 lines): diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c --- a/monetdb5/extras/rdf/rdfschema.c +++ b/monetdb5/extras/rdf/rdfschema.c @@ -704,11 +704,17 @@ CSPropTypes* initCSPropTypes(CSset* freq csPropTypes[id].lstPropTypes = (PropTypes*) GDKmalloc(sizeof(PropTypes) * csPropTypes[id].numProp); for (j = 0; j < csPropTypes[id].numProp; j++){ csPropTypes[id].lstPropTypes[j].prop = freqCSset->items[i].lstProp[j]; + csPropTypes[id].lstPropTypes[j].propFreq = 0; csPropTypes[id].lstPropTypes[j].numType = MULTIVALUES + 1; csPropTypes[id].lstPropTypes[j].lstTypes = (char*)GDKmalloc(sizeof(char) * csPropTypes[id].lstPropTypes[j].numType); csPropTypes[id].lstPropTypes[j].lstFreq = (int*)GDKmalloc(sizeof(int) * csPropTypes[id].lstPropTypes[j].numType); + csPropTypes[id].lstPropTypes[j].colIdxes = (int*)GDKmalloc(sizeof(int) * csPropTypes[id].lstPropTypes[j].numType); + csPropTypes[id].lstPropTypes[j].isMainTypes = (char*)GDKmalloc(sizeof(char) * csPropTypes[id].lstPropTypes[j].numType); + for (k = 0; k < csPropTypes[id].lstPropTypes[j].numType; k++){ csPropTypes[id].lstPropTypes[j].lstFreq[k] = 0; + csPropTypes[id].lstPropTypes[j].isMainTypes[k] = 0; + csPropTypes[id].lstPropTypes[j].colIdxes[k] = -1; } } @@ -723,9 +729,47 @@ CSPropTypes* initCSPropTypes(CSset* freq } static -void printCSPropTypes(CSPropTypes* csPropTypes, int numMergedCS, CSset* freqCSset){ +void genCSPropTypesColIdx(CSPropTypes* csPropTypes, int numMergedCS, CSset* freqCSset){ int i, j, k; - + int tmpMaxFreq; + int defaultIdx; /* Index of the default type for a property */ + int curTypeColIdx = 0; + + (void) freqCSset; + + for (i = 0; i < numMergedCS; i++){ + curTypeColIdx = 0; + for(j = 0; j < csPropTypes[i].numProp; j++){ + tmpMaxFreq = csPropTypes[i].lstPropTypes[j].lstFreq[0]; + defaultIdx = 0; + for (k = 0; k < csPropTypes[i].lstPropTypes[j].numType; k++){ + if (csPropTypes[i].lstPropTypes[j].lstFreq[k] > tmpMaxFreq){ + tmpMaxFreq = csPropTypes[i].lstPropTypes[j].lstFreq[k]; + defaultIdx = k; + } + if (csPropTypes[i].lstPropTypes[j].lstFreq[k] < csPropTypes[i].lstPropTypes[j].propFreq * 0.1){ + //non-frequent type goes to PSO + csPropTypes[i].lstPropTypes[j].isMainTypes[k] = PSOTBL; + } + else + csPropTypes[i].lstPropTypes[j].isMainTypes[k] =TYPETBL; + } + /* One type is set to be the default type (in the main table) */ + csPropTypes[i].lstPropTypes[j].isMainTypes[defaultIdx] = MAINTBL; + csPropTypes[i].lstPropTypes[j].colIdxes[defaultIdx] = j; + + /* Count the number of column needed */ + for (k = 0; k < csPropTypes[i].lstPropTypes[j].numType; k++){ + if (csPropTypes[i].lstPropTypes[j].isMainTypes[k] == TYPETBL){ + csPropTypes[i].lstPropTypes[j].colIdxes[k] = curTypeColIdx; + curTypeColIdx++; + } + } + } + } + + /* Print cspropTypes */ + /* for (i = 0; i < numMergedCS; i++){ printf("MergedCS %d (Freq: %d): \n", i, freqCSset->items[csPropTypes[i].freqCSId].support); for(j = 0; j < csPropTypes[i].numProp; j++){ @@ -734,8 +778,14 @@ void printCSPropTypes(CSPropTypes* csPro printf(" Type %d (%d) | ", k, csPropTypes[i].lstPropTypes[j].lstFreq[k]); } printf("\n"); + printf(" "); + for (k = 0; k < csPropTypes[i].lstPropTypes[j].numType; k++){ + printf(" Tbl %d (cl%d) | ", csPropTypes[i].lstPropTypes[j].isMainTypes[k], csPropTypes[i].lstPropTypes[j].colIdxes[k]); + } + printf("\n"); } } + */ } /* * Add types of properties @@ -757,6 +807,7 @@ void addPropTypes(char *buffTypes, oid* j++; } //j is position of the property buffP[i] in csPropTypes[tblId] + csPropTypes[tblId].lstPropTypes[j].propFreq++; csPropTypes[tblId].lstPropTypes[j].lstFreq[(int)buffTypes[i]]++; } @@ -773,6 +824,8 @@ void freeCSPropTypes(CSPropTypes* csProp for (j = 0; j < csPropTypes[i].numProp; j++){ free(csPropTypes[i].lstPropTypes[j].lstTypes); free(csPropTypes[i].lstPropTypes[j].lstFreq); + free(csPropTypes[i].lstPropTypes[j].colIdxes); + free(csPropTypes[i].lstPropTypes[j].isMainTypes); } free(csPropTypes[i].lstPropTypes); } @@ -3668,7 +3721,7 @@ void initCStables(CStableStat* cstablest static -void initCSTableIdxMapping(CSset* freqCSset, int* csTblIdxMapping, int* mfreqIdxTblIdxMapping, int* mTblIdxFreqIdxMapping){ +void initCSTableIdxMapping(CSset* freqCSset, int* csTblIdxMapping, int* mfreqIdxTblIdxMapping, int* mTblIdxFreqIdxMapping, int *numTables){ int i, k; CS cs; @@ -3683,6 +3736,8 @@ void initCSTableIdxMapping(CSset* freqCS } } + *numTables = k; + // Mapping the csid directly to the index of the table ==> csTblIndxMapping for (i = 0; i < freqCSset->numOrigFreqCS; i++){ @@ -4206,6 +4261,7 @@ RDFreorganize(int *ret, CStableStat *cst int *csTblIdxMapping; /* Store the mapping from a CS id to an index of a maxCS or mergeCS in freqCSset. */ int *mfreqIdxTblIdxMapping; /* Store the mapping from the idx of a max/merge freqCS to the table Idx */ int *mTblIdxFreqIdxMapping; /* Invert of mfreqIdxTblIdxMapping */ + int numTables = 0; PropStat *propStat; int numdistinctMCS = 0; int maxNumPwithDup = 0; @@ -4233,10 +4289,8 @@ RDFreorganize(int *ret, CStableStat *cst initIntArray(mTblIdxFreqIdxMapping , freqCSset->numCSadded, -1); //Mapping from from CSId to TableIdx - initCSTableIdxMapping(freqCSset, csTblIdxMapping, mfreqIdxTblIdxMapping, mTblIdxFreqIdxMapping); - - // Init CStableStat - initCStables(cstablestat, freqCSset); + initCSTableIdxMapping(freqCSset, csTblIdxMapping, mfreqIdxTblIdxMapping, mTblIdxFreqIdxMapping, &numTables); + if ((sbat = BATdescriptor(*sbatid)) == NULL) { throw(MAL, "rdf.RDFreorganize", RUNTIME_OBJECT_MISSING); @@ -4258,9 +4312,12 @@ RDFreorganize(int *ret, CStableStat *cst oi = bat_iterator(obat); /* Get possible types of each property in a table (i.e., mergedCS) */ - csPropTypes = initCSPropTypes(freqCSset, cstablestat->numTables); + csPropTypes = initCSPropTypes(freqCSset, numTables); RDFExtractCSPropTypes(ret, sbat, si, pi, oi, subjCSMap, csTblIdxMapping, csPropTypes, maxNumPwithDup); - printCSPropTypes(csPropTypes,cstablestat->numTables, freqCSset); + genCSPropTypesColIdx(csPropTypes, numTables, freqCSset); + + // Init CStableStat + initCStables(cstablestat, freqCSset); if (*mode == EXPLOREONLY){ printf("Only explore the schema information \n"); diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h --- a/monetdb5/extras/rdf/rdfschema.h +++ b/monetdb5/extras/rdf/rdfschema.h @@ -39,6 +39,11 @@ typedef enum{ REORGANIZE } ExpMode; +typedef enum{ + MAINTBL, + TYPETBL, + PSOTBL +} TableType; typedef enum { NORMALCS, @@ -189,10 +194,11 @@ typedef struct CSmergeRel{ typedef struct CStable { - BAT** colBats; - BAT** mvBats; /* One bat for one Muti-values property */ - int numCol; - oid* lstProp; + BAT** colBats; + ObjectType *colTypes; + BAT** mvBats; /* One bat for one Muti-values property */ + int numCol; + oid* lstProp; } CStable; @@ -217,8 +223,11 @@ typedef struct CStableStat { typedef struct PropTypes{ oid prop; int numType; + int propFreq; /* without considering type */ char* lstTypes; int* lstFreq; + int* colIdxes; + char* isMainTypes; } PropTypes; typedef struct CSPropTypes { _______________________________________________ checkin-list mailing list checkin-list@monetdb.org http://mail.monetdb.org/mailman/listinfo/checkin-list