Changeset: cc4754ecf9c8 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=cc4754ecf9c8
Added Files:
        monetdb5/extras/rdf/ontmetadata/ontAttribute.bsbm.csv
        monetdb5/extras/rdf/ontmetadata/ontMetadata.bsbm.csv
Modified Files:
        monetdb5/extras/rdf/ontmetadata/loadOntologyToMonetDB.sh
        monetdb5/extras/rdf/rdflabels.c
        monetdb5/extras/rdf/rdflabels.h
        monetdb5/extras/rdf/rdfschema.c
        monetdb5/extras/rdf/rdfschema.h
Branch: rdf
Log Message:

Modify to get the exact schema for BSBM

- Add ontology for bsbm

- If merge 2 dimension CSs, return a dimension CS.


diffs (truncated from 326 to 300 lines):

diff --git a/monetdb5/extras/rdf/ontmetadata/loadOntologyToMonetDB.sh 
b/monetdb5/extras/rdf/ontmetadata/loadOntologyToMonetDB.sh
--- a/monetdb5/extras/rdf/ontmetadata/loadOntologyToMonetDB.sh
+++ b/monetdb5/extras/rdf/ontmetadata/loadOntologyToMonetDB.sh
@@ -77,6 +77,19 @@ sed -i "s:AttFile:${PWD}/ontAttribute.fo
 
 mclient < loadtmp.sql
 
+#bsbm
+NUMMETADATA=`cat ontMetadata.bsbm.csv | wc -l`
+NUMATTRIBUTES=`cat ontAttribute.bsbm.csv | wc -l`
+
+cp loadOntologySAMPLE.sql loadtmp.sql
+sed -i "s:NUMMETADATA:$NUMMETADATA:g" loadtmp.sql
+sed -i "s:NUMATTRIBUTES:$NUMATTRIBUTES:g" loadtmp.sql
+sed -i "s:MetaFile:${PWD}/ontMetadata.bsbm.csv:g" loadtmp.sql
+sed -i "s:AttFile:${PWD}/ontAttribute.bsbm.csv:g" loadtmp.sql
+
+
+mclient < loadtmp.sql
+
 #List of possible ontologies
 NUMONT=`cat ontList.csv | wc -l`
 
diff --git a/monetdb5/extras/rdf/ontmetadata/ontAttribute.bsbm.csv 
b/monetdb5/extras/rdf/ontmetadata/ontAttribute.bsbm.csv
new file mode 100644
--- /dev/null
+++ b/monetdb5/extras/rdf/ontmetadata/ontAttribute.bsbm.csv
@@ -0,0 +1,10 @@
+http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/ProductFeature|NULL
+http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/ProductType|NULL
+http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/Producer|NULL
+http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/Product|NULL
+http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/ProductTypeProduct|NULL
+http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/ProductFeatureProduct|NULL
+http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/Vendor|NULL
+http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/Offer|NULL
+http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/Person|NULL
+http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/Review|NULL
diff --git a/monetdb5/extras/rdf/ontmetadata/ontMetadata.bsbm.csv 
b/monetdb5/extras/rdf/ontmetadata/ontMetadata.bsbm.csv
new file mode 100644
--- /dev/null
+++ b/monetdb5/extras/rdf/ontmetadata/ontMetadata.bsbm.csv
@@ -0,0 +1,10 @@
+http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/ProductFeature|ProductFeature|http://www.w3.org/2002/07/owl#Thing
+http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/ProductType|ProductType|http://www.w3.org/2002/07/owl#Thing
+http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/Producer|Producer|http://www.w3.org/2002/07/owl#Thing
+http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/Product|Product|http://www.w3.org/2002/07/owl#Thing
+http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/ProductTypeProduct|ProductTypeProduct|http://www.w3.org/2002/07/owl#Thing
+http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/ProductFeatureProduct|ProductFeatureProduct|http://www.w3.org/2002/07/owl#Thing
+http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/Vendor|Vendor|http://www.w3.org/2002/07/owl#Thing
+http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/Offer|Offer|http://www.w3.org/2002/07/owl#Thing
+http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/Person|Person|http://www.w3.org/2002/07/owl#Thing
+http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/Review|Review|http://www.w3.org/2002/07/owl#Thing
diff --git a/monetdb5/extras/rdf/rdflabels.c b/monetdb5/extras/rdf/rdflabels.c
--- a/monetdb5/extras/rdf/rdflabels.c
+++ b/monetdb5/extras/rdf/rdflabels.c
@@ -856,11 +856,13 @@ static
 void insertValuesIntoTypeAttributesHistogram(oid* typeList, int 
typeListLength, TypeAttributesFreq*** typeAttributesHistogram, int** 
typeAttributesHistogramCount, int csFreqIdx, int type, BAT *ontmetaBat) {
        int             i, j;
        int             fit;
+       (void) ontmetaBat;
 
        for (i = 0; i < typeListLength; ++i) {
+               #if ONLY_USE_ONTOLOGYBASED_TYPE
                BUN pos = BUNfnd(BATmirror(ontmetaBat), &typeList[i]);
                if (pos == BUN_NONE) continue; // no ontology information, 
ignore
-
+               #endif
                // add to histogram
                fit = 0;
                for (j = 0; j < typeAttributesHistogramCount[csFreqIdx][type]; 
++j) {
@@ -2079,7 +2081,11 @@ void getTableName(CSlabel* label, int cs
        oid             maxDepthOid;
        int             maxFreq;
 
-
+       //for choosing the right type values
+       BUN             ontClassPos;
+       oid             typeOid;
+       int             depth, maxDepth;
+       int             freq;
        (void) ontmetaBat;
 
 
@@ -2087,6 +2093,7 @@ void getTableName(CSlabel* label, int cs
        // get most frequent type value per type attribute
        tmpList = NULL;
        tmpListCount = 0;
+
        for (i = 0; i < typeAttributesCount; ++i) {
                if (typeAttributesHistogramCount[csIdx][i] == 0) continue;
                /*   //TODO: Uncomment this path
@@ -2109,6 +2116,7 @@ void getTableName(CSlabel* label, int cs
                        }
                }
                */
+               
                if (typeAttributesHistogram[csIdx][i][0].percent < 
TYPE_FREQ_THRESHOLD) continue; // sorted
                tmpList = (oid *) realloc(tmpList, sizeof(oid) * (tmpListCount 
+ 1));
                if (!tmpList) fprintf(stderr, "ERROR: Couldn't realloc 
memory!\n");
@@ -2116,23 +2124,40 @@ void getTableName(CSlabel* label, int cs
                // of all values that are >= TYPE_FREQ_THRESHOLD, choose the 
value with the highest hierarchy level ("deepest" value)
                maxDepthOid = typeAttributesHistogram[csIdx][i][0].value;
                maxFreq = typeAttributesHistogram[csIdx][i][0].freq;
+               ontClassPos = BUNfnd(BATmirror(ontmetaBat), &maxDepthOid);
+               if ( ontClassPos != BUN_NONE){
+                       maxDepth = ontclassSet[ontClassPos].hierDepth;
+               }       
+               else{
+                       maxDepth = -1;
+               }
+
+
                for (j = 1; j < typeAttributesHistogramCount[csIdx][i]; ++j) {
-                       int depth, maxDepth;
-                       int freq;
+
                        if (typeAttributesHistogram[csIdx][i][j].percent < 
TYPE_FREQ_THRESHOLD) break;
-                       depth = ontclassSet[BUNfnd(BATmirror(ontmetaBat), 
&typeAttributesHistogram[csIdx][i][j].value)].hierDepth;
-                       maxDepth = ontclassSet[BUNfnd(BATmirror(ontmetaBat), 
&maxDepthOid)].hierDepth;
-                       freq = typeAttributesHistogram[csIdx][i][j].freq;
-                       if (depth > maxDepth) {
-                               // choose value with higher hierarchy level
-                               maxDepthOid = 
typeAttributesHistogram[csIdx][i][j].value;
-                               maxFreq = freq;
-                       } else if (depth == maxDepth && freq > maxFreq) {
-                               // if both values are on the same level, choose 
the value with higher frequency
-                               maxDepthOid = 
typeAttributesHistogram[csIdx][i][j].value;
-                               maxFreq = freq;
+                       
+                       typeOid = typeAttributesHistogram[csIdx][i][j].value;
+                       ontClassPos = BUNfnd(BATmirror(ontmetaBat), &typeOid);
+                       if (ontClassPos != BUN_NONE){
+                               depth = ontclassSet[ontClassPos].hierDepth;
+                               freq = 
typeAttributesHistogram[csIdx][i][j].freq;
+
+                               if (depth > maxDepth) {
+                                       // choose value with higher hierarchy 
level
+                                       maxDepthOid = 
typeAttributesHistogram[csIdx][i][j].value;
+                                       maxFreq = freq;
+                                       maxDepth = depth;
+                               } else if (depth == maxDepth && freq > maxFreq) 
{
+                                       // if both values are on the same 
level, choose the value with higher frequency
+                                       maxDepthOid = 
typeAttributesHistogram[csIdx][i][j].value;
+                                       maxFreq = freq;
+                               }
                        }
                }
+
+               //
+
                tmpList[tmpListCount] = maxDepthOid;
                tmpListCount += 1;
        }
@@ -2185,8 +2210,7 @@ void getTableName(CSlabel* label, int cs
                        }
                }
        }
-
-
+               
        // --- ONTOLOGY ---
        // add all ontology candidates to list of candidates
        if (resultCount[csIdx] >= 1) {
diff --git a/monetdb5/extras/rdf/rdflabels.h b/monetdb5/extras/rdf/rdflabels.h
--- a/monetdb5/extras/rdf/rdflabels.h
+++ b/monetdb5/extras/rdf/rdflabels.h
@@ -101,6 +101,7 @@ enum {
 #define USE_ONTOLOGY_NAMES 1           // use ontology classes for labeling
 #define USE_TABLE_NAME 1               // calculate and store the final labels
 #define SHOW_CANDIDATES 0              // inserts a row in UML diagrams to 
show all candidate names
+#define        ONLY_USE_ONTOLOGYBASED_TYPE 0
 
 rdf_export void
 getPropNameShort(char** name, char* propStr);
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -1478,7 +1478,11 @@ CS* mergeTwoCSs(CS cs1, CS cs2, int freq
        int numCombineP = 0; 
 
        CS *mergecs = (CS*) malloc (sizeof (CS)); 
-       mergecs->type = (char)MERGECS; 
+       if (cs1.type == DIMENSIONCS && cs2.type == DIMENSIONCS)
+                mergecs->type = DIMENSIONCS; 
+       else
+               mergecs->type = (char)MERGECS; 
+
        mergecs->numConsistsOf = 2; 
        mergecs->lstConsistsOf = (int*) malloc(sizeof(int) * 2);
 
@@ -2527,7 +2531,7 @@ oid putaCStoHash(CSBats *csBats, oid* ke
                csId = *csoid; 
                addNewCS(csBats, fullPropStat, &csKey, key, csoid, num, 
numTriples, numTypeValues, rdftypeOntologyValues);
 
-               //if (csId == 73){
+               //if (csId == 2){
                //      printf("Extra info for cs 73 is: ");
                //      printTKNZStringFromOid(rdftypeOntologyValues[0]);
                //}
@@ -3471,6 +3475,10 @@ str mergeMaxFreqCSByS1(CSset *freqCSset,
        (void) name; 
        (void) ontmetadata;
        (void) ontmetadataCount;
+       #if     !NOT_MERGE_DIMENSIONCS_IN_S1
+       (void) cs1;
+       (void) cs2;
+       #endif
        labelStat = initLabelStat(); 
        buildLabelStat(labelStat, (*labels), freqCSset, TOPK);
        printf("Num FreqCSadded before using S1 = %d \n", 
freqCSset->numCSadded);
@@ -3502,7 +3510,7 @@ str mergeMaxFreqCSByS1(CSset *freqCSset,
                                freqId1 = labelStat->freqIdList[i][k];
                                if ((*labels)[freqId1].isOntology == 1) {
                                        cs1 = &(freqCSset->items[freqId1]);
-                                       #if     NOT_MERGE_DIMENSIONCS
+                                       #if     NOT_MERGE_DIMENSIONCS_IN_S1
                                        if (cs1->type == DIMENSIONCS) continue;
                                        #endif
                                        tmpCount++;
@@ -3512,7 +3520,7 @@ str mergeMaxFreqCSByS1(CSset *freqCSset,
                        for (j = k+1; j < labelStat->lstCount[i]; j++){
                                freqId2 = labelStat->freqIdList[i][j];
                                cs2 = &(freqCSset->items[freqId2]);
-                               #if     NOT_MERGE_DIMENSIONCS
+                               #if     NOT_MERGE_DIMENSIONCS_IN_S1
                                if (cs2->type == DIMENSIONCS) 
                                        continue; 
                                #endif
@@ -3533,7 +3541,7 @@ str mergeMaxFreqCSByS1(CSset *freqCSset,
                                freqId1 = labelStat->freqIdList[i][k];
                                if ((*labels)[freqId1].isType == 1) {
                                        cs1 = &(freqCSset->items[freqId1]);
-                                       #if     NOT_MERGE_DIMENSIONCS
+                                       #if     NOT_MERGE_DIMENSIONCS_IN_S1
                                        if (cs1->type == DIMENSIONCS) continue;
                                        #endif
                                        tmpCount++;
@@ -3543,7 +3551,7 @@ str mergeMaxFreqCSByS1(CSset *freqCSset,
                        for (j = k+1; j < labelStat->lstCount[i]; j++){
                                freqId2 = labelStat->freqIdList[i][j];
                                cs2 = &(freqCSset->items[freqId2]);
-                               #if     NOT_MERGE_DIMENSIONCS
+                               #if     NOT_MERGE_DIMENSIONCS_IN_S1
                                if (cs2->type == DIMENSIONCS) continue; 
                                #endif
                                if ((*labels)[freqId2].isType == 1){
@@ -3563,7 +3571,7 @@ str mergeMaxFreqCSByS1(CSset *freqCSset,
                                freqId1 = labelStat->freqIdList[i][k];
                                if ((*labels)[freqId1].isFK == 1) {
                                        cs1 = &(freqCSset->items[freqId1]);
-                                       #if     NOT_MERGE_DIMENSIONCS
+                                       #if     NOT_MERGE_DIMENSIONCS_IN_S1
                                        if (cs1->type == DIMENSIONCS) continue;
                                        #endif
                                        tmpCount++;
@@ -3573,7 +3581,7 @@ str mergeMaxFreqCSByS1(CSset *freqCSset,
                        for (j = k+1; j < labelStat->lstCount[i]; j++){
                                freqId2 = labelStat->freqIdList[i][j];
                                cs2 = &(freqCSset->items[freqId2]);
-                               #if     NOT_MERGE_DIMENSIONCS
+                               #if     NOT_MERGE_DIMENSIONCS_IN_S1
                                if (cs2->type == DIMENSIONCS) continue; 
                                #endif
                                if ((*labels)[freqId2].isFK == 1){
@@ -3594,8 +3602,8 @@ str mergeMaxFreqCSByS1(CSset *freqCSset,
                        for (k = 0; k < labelStat->lstCount[i]; k++){
                                freqId1 = labelStat->freqIdList[i][k];
                                cs1 = &(freqCSset->items[freqId1]);
-                               #if     NOT_MERGE_DIMENSIONCS
-                               if (0) if (cs1->type == DIMENSIONCS) continue;
+                               #if     NOT_MERGE_DIMENSIONCS_IN_S1
+                               if (cs1->type == DIMENSIONCS) continue;
                                #endif
                                tmpCount++;
                                break; 
@@ -3603,8 +3611,8 @@ str mergeMaxFreqCSByS1(CSset *freqCSset,
                        for (j = k+1; j < labelStat->lstCount[i]; j++){
                                freqId2 = labelStat->freqIdList[i][j];
                                cs2 = &(freqCSset->items[freqId2]);
-                               #if     NOT_MERGE_DIMENSIONCS
-                               if (0) if (cs2->type == DIMENSIONCS) continue; 
+                               #if     NOT_MERGE_DIMENSIONCS_IN_S1
+                               if (cs2->type == DIMENSIONCS) continue; 
                                #endif
                                doMerge(freqCSset, S1, freqId1, freqId2, 
mergecsId, labels, ontmetadata, ontmetadataCount, *name);
                                tmpCount++;
@@ -4175,11 +4183,22 @@ void mergeCSByS4(CSset *freqCSset, CSlab
                        if (simscore > SIM_THRESHOLD) {
                        #endif  
                                /*
-                               takeOid((*labels)[freqId1].name, &freqCSname1); 
-                               takeOid((*labels)[freqId2].name, &freqCSname2); 
-                               printf("Merge %d (%s) and %d (%s) with simscore 
= %f \n",freqId1, freqCSname1, freqId2, freqCSname2, simscore);
-                               GDKfree(freqCSname1);
-                               GDKfree(freqCSname2);
+                               if ((*labels)[freqId1].name != BUN_NONE){
+                                       takeOid((*labels)[freqId1].name, 
&freqCSname1);
+                                       printf("Merge %d (%s) and ",freqId1, 
freqCSname1);
+                                       GDKfree(freqCSname1);
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to