Changeset: d63ce66b83cd for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=d63ce66b83cd
Modified Files:
        monetdb5/extras/rdf/rdfschema.c
        monetdb5/extras/rdf/rdfschema.h
Branch: rdf
Log Message:

Get the table/column in relational representation for each property 
(considering its type).


diffs (200 lines):

diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -704,11 +704,17 @@ CSPropTypes* initCSPropTypes(CSset* freq
                        csPropTypes[id].lstPropTypes = (PropTypes*) 
GDKmalloc(sizeof(PropTypes) * csPropTypes[id].numProp);
                        for (j = 0; j < csPropTypes[id].numProp; j++){
                                csPropTypes[id].lstPropTypes[j].prop = 
freqCSset->items[i].lstProp[j]; 
+                               csPropTypes[id].lstPropTypes[j].propFreq = 0; 
                                csPropTypes[id].lstPropTypes[j].numType = 
MULTIVALUES + 1;
                                csPropTypes[id].lstPropTypes[j].lstTypes = 
(char*)GDKmalloc(sizeof(char) * csPropTypes[id].lstPropTypes[j].numType);
                                csPropTypes[id].lstPropTypes[j].lstFreq = 
(int*)GDKmalloc(sizeof(int) * csPropTypes[id].lstPropTypes[j].numType);
+                               csPropTypes[id].lstPropTypes[j].colIdxes = 
(int*)GDKmalloc(sizeof(int) * csPropTypes[id].lstPropTypes[j].numType);
+                               csPropTypes[id].lstPropTypes[j].isMainTypes = 
(char*)GDKmalloc(sizeof(char) * csPropTypes[id].lstPropTypes[j].numType);
+
                                for (k = 0; k < 
csPropTypes[id].lstPropTypes[j].numType; k++){
                                        
csPropTypes[id].lstPropTypes[j].lstFreq[k] = 0; 
+                                       
csPropTypes[id].lstPropTypes[j].isMainTypes[k] = 0; 
+                                       
csPropTypes[id].lstPropTypes[j].colIdxes[k] = -1; 
                                }
 
                        }
@@ -723,9 +729,47 @@ CSPropTypes* initCSPropTypes(CSset* freq
 }
 
 static 
-void printCSPropTypes(CSPropTypes* csPropTypes, int numMergedCS, CSset* 
freqCSset){
+void genCSPropTypesColIdx(CSPropTypes* csPropTypes, int numMergedCS, CSset* 
freqCSset){
        int i, j, k; 
-
+       int tmpMaxFreq;  
+       int defaultIdx;  /* Index of the default type for a property */
+       int curTypeColIdx = 0;
+
+       (void) freqCSset;
+
+       for (i = 0; i < numMergedCS; i++){
+               curTypeColIdx = 0; 
+               for(j = 0; j < csPropTypes[i].numProp; j++){
+                       tmpMaxFreq = csPropTypes[i].lstPropTypes[j].lstFreq[0];
+                       defaultIdx = 0; 
+                       for (k = 0; k < csPropTypes[i].lstPropTypes[j].numType; 
k++){
+                               if (csPropTypes[i].lstPropTypes[j].lstFreq[k] > 
tmpMaxFreq){
+                                       tmpMaxFreq =  
csPropTypes[i].lstPropTypes[j].lstFreq[k];
+                                       defaultIdx = k;         
+                               }
+                               if (csPropTypes[i].lstPropTypes[j].lstFreq[k] < 
csPropTypes[i].lstPropTypes[j].propFreq * 0.1){
+                                       //non-frequent type goes to PSO
+                                       
csPropTypes[i].lstPropTypes[j].isMainTypes[k] = PSOTBL; 
+                               }
+                               else
+                                       
csPropTypes[i].lstPropTypes[j].isMainTypes[k] =TYPETBL;
+                       }
+                       /* One type is set to be the default type (in the main 
table) */
+                       csPropTypes[i].lstPropTypes[j].isMainTypes[defaultIdx] 
= MAINTBL; 
+                       csPropTypes[i].lstPropTypes[j].colIdxes[defaultIdx] = j;
+
+                       /* Count the number of column needed */
+                       for (k = 0; k < csPropTypes[i].lstPropTypes[j].numType; 
k++){
+                               if 
(csPropTypes[i].lstPropTypes[j].isMainTypes[k] == TYPETBL){
+                                       
csPropTypes[i].lstPropTypes[j].colIdxes[k] = curTypeColIdx; 
+                                       curTypeColIdx++;
+                               }       
+                       }
+               }
+       }
+
+       /* Print cspropTypes */
+       /*
        for (i = 0; i < numMergedCS; i++){
                printf("MergedCS %d (Freq: %d): \n", i, 
freqCSset->items[csPropTypes[i].freqCSId].support);
                for(j = 0; j < csPropTypes[i].numProp; j++){
@@ -734,8 +778,14 @@ void printCSPropTypes(CSPropTypes* csPro
                                printf(" Type %d (%d)  | ", k, 
csPropTypes[i].lstPropTypes[j].lstFreq[k]);
                        }
                        printf("\n");
+                       printf("         ");
+                       for (k = 0; k < csPropTypes[i].lstPropTypes[j].numType; 
k++){
+                               printf(" Tbl %d (cl%d) | ", 
csPropTypes[i].lstPropTypes[j].isMainTypes[k], 
csPropTypes[i].lstPropTypes[j].colIdxes[k]);
+                       }
+                       printf("\n");
                }
        }
+       */
 }
 /*
  * Add types of properties 
@@ -757,6 +807,7 @@ void addPropTypes(char *buffTypes, oid* 
                                j++;
                        }       
                        //j is position of the property buffP[i] in 
csPropTypes[tblId]
+                       csPropTypes[tblId].lstPropTypes[j].propFreq++;
                        
csPropTypes[tblId].lstPropTypes[j].lstFreq[(int)buffTypes[i]]++; 
                        
                }
@@ -773,6 +824,8 @@ void freeCSPropTypes(CSPropTypes* csProp
                        for (j = 0; j < csPropTypes[i].numProp; j++){
                                free(csPropTypes[i].lstPropTypes[j].lstTypes); 
                                free(csPropTypes[i].lstPropTypes[j].lstFreq);
+                               free(csPropTypes[i].lstPropTypes[j].colIdxes);
+                               
free(csPropTypes[i].lstPropTypes[j].isMainTypes);
                        }
                        free(csPropTypes[i].lstPropTypes); 
                }
@@ -3668,7 +3721,7 @@ void initCStables(CStableStat* cstablest
 
 
 static
-void initCSTableIdxMapping(CSset* freqCSset, int* csTblIdxMapping, int* 
mfreqIdxTblIdxMapping, int* mTblIdxFreqIdxMapping){
+void initCSTableIdxMapping(CSset* freqCSset, int* csTblIdxMapping, int* 
mfreqIdxTblIdxMapping, int* mTblIdxFreqIdxMapping, int *numTables){
 
        int             i, k; 
        CS              cs;
@@ -3683,6 +3736,8 @@ void initCSTableIdxMapping(CSset* freqCS
                }
        }
        
+       *numTables = k; 
+
        // Mapping the csid directly to the index of the table ==> 
csTblIndxMapping
        
        for (i = 0; i < freqCSset->numOrigFreqCS; i++){
@@ -4206,6 +4261,7 @@ RDFreorganize(int *ret, CStableStat *cst
        int             *csTblIdxMapping;       /* Store the mapping from a CS 
id to an index of a maxCS or mergeCS in freqCSset. */
        int             *mfreqIdxTblIdxMapping;  /* Store the mapping from the 
idx of a max/merge freqCS to the table Idx */
        int             *mTblIdxFreqIdxMapping;  /* Invert of 
mfreqIdxTblIdxMapping */
+       int             numTables = 0; 
        PropStat        *propStat; 
        int             numdistinctMCS = 0; 
        int             maxNumPwithDup = 0;
@@ -4233,10 +4289,8 @@ RDFreorganize(int *ret, CStableStat *cst
        initIntArray(mTblIdxFreqIdxMapping , freqCSset->numCSadded, -1);
 
        //Mapping from from CSId to TableIdx 
-       initCSTableIdxMapping(freqCSset, csTblIdxMapping, 
mfreqIdxTblIdxMapping, mTblIdxFreqIdxMapping);
-
-       // Init CStableStat
-       initCStables(cstablestat, freqCSset);
+       initCSTableIdxMapping(freqCSset, csTblIdxMapping, 
mfreqIdxTblIdxMapping, mTblIdxFreqIdxMapping, &numTables);
+
 
        if ((sbat = BATdescriptor(*sbatid)) == NULL) {
                throw(MAL, "rdf.RDFreorganize", RUNTIME_OBJECT_MISSING);
@@ -4258,9 +4312,12 @@ RDFreorganize(int *ret, CStableStat *cst
        oi = bat_iterator(obat); 
 
        /* Get possible types of each property in a table (i.e., mergedCS) */
-       csPropTypes = initCSPropTypes(freqCSset, cstablestat->numTables);
+       csPropTypes = initCSPropTypes(freqCSset, numTables);
        RDFExtractCSPropTypes(ret, sbat, si, pi, oi, subjCSMap, 
csTblIdxMapping, csPropTypes, maxNumPwithDup);
-       printCSPropTypes(csPropTypes,cstablestat->numTables, freqCSset);
+       genCSPropTypesColIdx(csPropTypes, numTables, freqCSset);
+
+       // Init CStableStat
+       initCStables(cstablestat, freqCSset);
 
        if (*mode == EXPLOREONLY){
                printf("Only explore the schema information \n");
diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h
--- a/monetdb5/extras/rdf/rdfschema.h
+++ b/monetdb5/extras/rdf/rdfschema.h
@@ -39,6 +39,11 @@ typedef enum{
        REORGANIZE
 } ExpMode; 
 
+typedef enum{
+       MAINTBL, 
+       TYPETBL,
+       PSOTBL
+} TableType;           
 
 typedef enum {
        NORMALCS, 
@@ -189,10 +194,11 @@ typedef struct CSmergeRel{
 
 
 typedef struct CStable {
-       BAT**   colBats; 
-       BAT**   mvBats;         /* One bat for one Muti-values property */
-       int     numCol; 
-       oid*    lstProp;
+       BAT**           colBats; 
+       ObjectType      *colTypes; 
+       BAT**           mvBats;         /* One bat for one Muti-values property 
*/
+       int             numCol; 
+       oid*            lstProp;
 } CStable; 
 
 
@@ -217,8 +223,11 @@ typedef struct CStableStat {
 typedef struct PropTypes{
        oid     prop;
        int     numType; 
+       int     propFreq;       /* without considering type */
        char*   lstTypes; 
        int*    lstFreq; 
+       int*    colIdxes; 
+       char*   isMainTypes;
 } PropTypes; 
 
 typedef struct CSPropTypes {
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to