Changeset: 548ec8e2e127 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=548ec8e2e127
Modified Files:
        monetdb5/extras/rdf/rdfontologyload.c
        monetdb5/extras/rdf/rdfontologyload.h
        monetdb5/extras/rdf/rdfschema.c
        monetdb5/extras/rdf/rdfschema.h
        monetdb5/extras/rdf/rdfschema.mal
Branch: rdf
Log Message:

Use the ontology labels if available for the table name

TODO: Use the ontology property labels if available for the column names in the 
final schema


diffs (truncated from 376 to 300 lines):

diff --git a/monetdb5/extras/rdf/rdfontologyload.c 
b/monetdb5/extras/rdf/rdfontologyload.c
--- a/monetdb5/extras/rdf/rdfontologyload.c
+++ b/monetdb5/extras/rdf/rdfontologyload.c
@@ -261,7 +261,7 @@ int compareProp (const void * a, const v
 }
 
 static 
-str buildOntologyClassesInfo(oid **ontmetadat, int ontmetadataCount, oid 
**ontattributes, int ontattributesCount){
+str buildOntologyClassesInfo(oid **ontmetadat, int ontmetadataCount, oid 
**ontattributes, int ontattributesCount, str *tmpOntLabels){
 
        int     i; 
        oid     classOid; //The class Oid comes from 
@@ -281,7 +281,7 @@ str buildOntologyClassesInfo(oid **ontme
        int     tmpNumProp = 0; 
        oid*    buffProps = NULL;
        int     maxNumPropPerOntology = 1000; 
-
+       str     *tmpLabelsShortlist = NULL; 
 
        //Read all ontmetadata and store them in the ontmetaBat
        
@@ -291,6 +291,11 @@ str buildOntologyClassesInfo(oid **ontme
        if (!(ontmetaBat->T->hash)){
                throw(RDF, "buildOntologyClassesInfo", "Cannot allocate the 
hash for Bat");
        }
+       
+       tmpLabelsShortlist = (str *) malloc(sizeof(str) * ontmetadataCount);
+       for (i = 0; i < ontmetadataCount; i++) tmpLabelsShortlist[i] = NULL; 
+
+       classIdx = 0;
        for (i = 0; i < ontmetadataCount; i++){
                classOid = ontmetadat[0][i];
                assert(classOid != BUN_NONE); 
@@ -299,8 +304,20 @@ str buildOntologyClassesInfo(oid **ontme
                if (tmpBun == BUN_NONE){        //If it is a new class
                        if (BUNappend(ontmetaBat,&classOid, TRUE) == NULL)    
                                throw(RDF, "buildOntologyClassesInfo", "Cannot 
insert to ontmetaBat");
+
+                       if (tmpOntLabels[i] != NULL){
+                               tmpLabelsShortlist[classIdx] = 
GDKstrdup(tmpOntLabels[i]);
+                       }
+                       else
+                               tmpLabelsShortlist[classIdx] = NULL;    
+
+                       classIdx++;
                } 
+       
+       }
 
+       //Also add super class to list of ontology classes
+       for (i = 0; i < ontmetadataCount; i++){
                scOid = ontmetadat[1][i];
 
                if (scOid != BUN_NONE){ //The superClass oid is there
@@ -308,6 +325,9 @@ str buildOntologyClassesInfo(oid **ontme
                        if (tmpBun == BUN_NONE){        //If it is a new class
                                if (BUNappend(ontmetaBat, &scOid, TRUE) == 
NULL)    
                                        throw(RDF, "buildOntologyClassesInfo", 
"Cannot insert to ontmetaBat");
+                               
+                               tmpLabelsShortlist[classIdx] = NULL;
+                               classIdx++;
                        } 
                }
        }
@@ -332,6 +352,11 @@ str buildOntologyClassesInfo(oid **ontme
                
                tmpontclassSet[i].cOid = *tmpOid;
 
+               if (tmpLabelsShortlist[i] != NULL)
+                       tmpontclassSet[i].label = 
GDKstrdup(tmpLabelsShortlist[i]);
+               else
+                       tmpontclassSet[i].label = NULL; 
+
                //Init other info
                tmpontclassSet[i].scIdxes = (int *) malloc(sizeof(int) * 
NUMSC_PER_ONTCLASS);
                tmpontclassSet[i].numsc = 0;
@@ -343,6 +368,13 @@ str buildOntologyClassesInfo(oid **ontme
                i++;
        }
        
+       //Free 
+       for (i = 0; i < numClass; i++){
+               if (tmpLabelsShortlist[i] != NULL) 
+                       GDKfree(tmpLabelsShortlist[i]);
+       }
+       free(tmpLabelsShortlist);
+
 
        //Add sc
        for (i = 0; i < ontmetadataCount; i++){
@@ -425,14 +457,15 @@ str buildOntologyClassesInfo(oid **ontme
 }
 
 str
-RDFloadsqlontologies(int *ret, bat *auriid, bat *aattrid, bat *muriid, bat 
*msuperid){
+RDFloadsqlontologies(int *ret, bat *auriid, bat *aattrid, bat *muriid, bat 
*msuperid, bat *mlabelid){
        BUN                     p, q;
-       BAT                     *auri = NULL, *aattr = NULL, *muri = NULL, 
*msuper = NULL;
-       BATiter                 aurii, aattri, murii, msuperi;
-       BUN                     bun, bun2, bun3, bun4;
+       BAT                     *auri = NULL, *aattr = NULL, *muri = NULL, 
*msuper = NULL, *mlabel = NULL;
+       BATiter                 aurii, aattri, murii, msuperi, mlabeli;
+       BUN                     bun, bun2, bun3, bun4, bun5;
        BUN                     auriCount, muriCount;
        int                     i;
        str                     schema = "rdf";
+       str                     *tmpOntLabels = NULL;   //Set of ontology labels
 
        TKNZRopen (NULL, &schema);
 
@@ -462,10 +495,19 @@ RDFloadsqlontologies(int *ret, bat *auri
                throw(MAL, "rdf.RDFloadsqlontologies", RUNTIME_OBJECT_MISSING);
        }
 
+       if ((mlabel = BATdescriptor(*mlabelid)) == NULL) {
+               BBPreleaseref(auri->batCacheid);
+               BBPreleaseref(aattr->batCacheid);
+               BBPreleaseref(muri->batCacheid);
+               BBPreleaseref(msuper->batCacheid);
+               throw(MAL, "rdf.RDFloadsqlontologies", RUNTIME_OBJECT_MISSING);
+       }
+
        aurii = bat_iterator(auri);
        aattri = bat_iterator(aattr);
        murii = bat_iterator(muri);
        msuperi = bat_iterator(msuper);
+       mlabeli = bat_iterator(mlabel); 
 
        // load ontattributes
        i = 0;
@@ -517,18 +559,23 @@ RDFloadsqlontologies(int *ret, bat *auri
        i = 0;
        bun3 = BUNfirst(muri);
        bun4 = BUNfirst(msuper);
-
+       bun5 = BUNfirst(mlabel);
+       
        muriCount = BATcount(muri);
 
        ontmetadata = (oid**) malloc(sizeof(oid *) * 2);
        if (!ontmetadata) fprintf(stderr, "ERROR: Couldn't malloc memory!\n");
-       ontmetadata[0] = malloc(sizeof(str) * muriCount); // uri
-       ontmetadata[1] = malloc(sizeof(str) * muriCount); // superclass
+       ontmetadata[0] = malloc(sizeof(oid) * muriCount); // uri
+       ontmetadata[1] = malloc(sizeof(oid) * muriCount); // superclass
        if (!ontmetadata[0] || !ontmetadata[1]) fprintf(stderr, "ERROR: 
Couldn't malloc memory!\n");
 
+       tmpOntLabels = (str*)malloc(sizeof(str) * muriCount); //labels of 
ontology classes  
+       if (!tmpOntLabels) fprintf(stderr, "ERROR: Couldn't malloc memory!\n");
+
        BATloop(muri, p, q){
                str muristr = (str) BUNtail(murii, bun3 + i);
                str msuperstr = (str) BUNtail(msuperi, bun4 + i);
+               str mlabelstr = (str) BUNtail(mlabeli, bun5 + i);
 
                oid murioid, msuperoid;
 
@@ -554,6 +601,14 @@ RDFloadsqlontologies(int *ret, bat *auri
                } else {
                        ontmetadata[1][ontmetadataCount] = msuperoid;
                }
+
+               if (strcmp(mlabelstr, "\x80") == 0) {
+                       tmpOntLabels[ontmetadataCount] = NULL;
+               } else {
+                       tmpOntLabels[ontmetadataCount] = GDKstrdup(mlabelstr);
+               }
+                
+
                ontmetadataCount += 1;
 
                ++i;
@@ -562,8 +617,14 @@ RDFloadsqlontologies(int *ret, bat *auri
                GDKfree(msuperstr2);
        }
 
-       buildOntologyClassesInfo(ontmetadata, ontmetadataCount, ontattributes, 
ontattributesCount);
+       buildOntologyClassesInfo(ontmetadata, ontmetadataCount, ontattributes, 
ontattributesCount, tmpOntLabels);
 
+       for (i = 0; i < ontmetadataCount; i++){
+               if (tmpOntLabels[i] != NULL) 
+                       GDKfree(tmpOntLabels[i]);
+       }
+       free(tmpOntLabels);
+       
        BBPreclaim(auri);
        BBPreclaim(aattr);
        BBPreclaim(muri);
diff --git a/monetdb5/extras/rdf/rdfontologyload.h 
b/monetdb5/extras/rdf/rdfontologyload.h
--- a/monetdb5/extras/rdf/rdfontologyload.h
+++ b/monetdb5/extras/rdf/rdfontologyload.h
@@ -24,6 +24,7 @@
 
 typedef struct OntClass {
        oid     cOid;           /*class Oid*/
+       char*   label;          /*ontology label (if available) */
        int*    scIdxes;        /*Idx of super classes*/
        int     numsc;          /*Number of super classes*/
        int     numAllocation;
@@ -36,7 +37,7 @@ rdf_export str
 RDFOntologyParser(int *ret, str *location, str *schema);
 
 rdf_export str
-RDFloadsqlontologies(int *ret, bat *auri, bat *aattr, bat *muri, bat *msuper);
+RDFloadsqlontologies(int *ret, bat *auri, bat *aattr, bat *muri, bat *msuper, 
bat *mlabel);
 
 /*
 rdf_export str
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -158,6 +158,15 @@ str printTKNZStringFromOid(oid id){
        return MAL_SUCCEED; 
 }
 
+
+static 
+char isOntologyName(oid valueOid, BUN *ontClassPos){
+       *ontClassPos = BUN_NONE; 
+       *ontClassPos = BUNfnd(BATmirror(ontmetaBat), &valueOid);
+       if (*ontClassPos == BUN_NONE) return 0; 
+       else return 1; 
+}
+
 //Get the string for 
 static
 char getStringName(oid objOid, str *objStr, BATiter mapi, BAT *mapbat, char 
isTblName){
@@ -166,38 +175,57 @@ char getStringName(oid objOid, str *objS
        oid     realObjOid; 
        BUN     bun;
        int     i = 0;
-
-       if (objType == URI || objType == BLANKNODE){
-               realObjOid = objOid - ((oid)objType << (sizeof(BUN)*8 - 4));
-               takeOid(realObjOid, objStr); 
-       }
-       else{
-               str tmpObjStr;
-               str s;
-               int len; 
-               realObjOid = objOid - (objType*2 + 1) *  RDF_MIN_LITERAL;   /* 
Get the real objOid from Map or Tokenizer */ 
-               bun = BUNfirst(mapbat);
-               tmpObjStr = (str) BUNtail(mapi, bun + realObjOid); 
-               
-               *objStr = GDKstrdup(tmpObjStr);
+       char    hasOntologyLabel = 0; 
+
+       #if USE_ONTLABEL_FOR_NAME
+       if (isTblName){
+               char    isOntName = 0; 
+               BUN     tmpontClassPos = BUN_NONE; 
+
+               isOntName = isOntologyName(objOid, &tmpontClassPos);    
+
+               if (isOntName == 1){
+                       //Check if label is availabel 
+                       if (ontclassSet[tmpontClassPos].label != NULL){ //Use 
this label
+                               *objStr =  
GDKstrdup(ontclassSet[tmpontClassPos].label);
+                               hasOntologyLabel = 1; 
+                       } 
+               }
+       }
+       #endif
+
+       if (hasOntologyLabel == 0){
+               if (objType == URI || objType == BLANKNODE){
+                       realObjOid = objOid - ((oid)objType << (sizeof(BUN)*8 - 
4));
+                       takeOid(realObjOid, objStr); 
+               }
+               else{
+                       str tmpObjStr;
+                       str s;
+                       int len; 
+                       realObjOid = objOid - (objType*2 + 1) *  
RDF_MIN_LITERAL;   /* Get the real objOid from Map or Tokenizer */ 
+                       bun = BUNfirst(mapbat);
+                       tmpObjStr = (str) BUNtail(mapi, bun + realObjOid); 
                        
-               if (isTblName){
-                       s = *objStr;
-                       len = strlen(s);
-                       //Replace all non-alphabet character by ___
-                       for (i = 0; i < len; i++)
-                       {       
-                               //printf("i = %d: %c \n",i, s[i]);
-                               if (!isalpha(*s)){
-                                       *s = '_';
-                               }
-                               s++;
-                               
-                       }
-               }
-
-       }
-
+                       *objStr = GDKstrdup(tmpObjStr);
+                                       
+                       if (isTblName){
+                               s = *objStr;
+                               len = strlen(s);
+                               //Replace all non-alphabet character by ___
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to