Changeset: 8407c931a2e1 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=8407c931a2e1
Modified Files:
        monetdb5/extras/rdf/rdfparams.c
        monetdb5/extras/rdf/rdfparams.h
        monetdb5/extras/rdf/rdfschema.c
        monetdb5/extras/rdf/rdfschema.h
Branch: rdf
Log Message:

Compute average precision


diffs (122 lines):

diff --git a/monetdb5/extras/rdf/rdfparams.c b/monetdb5/extras/rdf/rdfparams.c
--- a/monetdb5/extras/rdf/rdfparams.c
+++ b/monetdb5/extras/rdf/rdfparams.c
@@ -29,6 +29,8 @@
 
 int dimensionFactor; 
 float ontologySimThreshold; 
+int upperboundNumTables;
+float generalityThreshold; 
 
 void createDefaultParamsFile(void){
        
@@ -37,7 +39,8 @@ void createDefaultParamsFile(void){
        paramFile = fopen("params.ini", "wt");
        
        fprintf(paramFile, "dimensionFactor 3\n");
-       fprintf(paramFile, "ontologySimThreshold 0.8\n");
+       fprintf(paramFile, "ontologySimThreshold 0.75\n");
+       fprintf(paramFile, "upperboundNumTables 1000");
 
        fclose(paramFile); 
 }
@@ -64,7 +67,21 @@ void readParamsInput(void){
                                ontologySimThreshold = atof(value);
                                printf("ontologySimThreshold = 
%f\n",ontologySimThreshold);
                        }
+                       else if (strcmp(variable, "upperboundNumTables") == 0){
+                               upperboundNumTables = atoi(value);
+                               printf("upperboundNumTables = %d\n", 
upperboundNumTables);
+                       }
                }
        }
 
+       
+       if (upperboundNumTables != 0){
+               generalityThreshold = (float) 1 / (float)upperboundNumTables; 
+               printf("generalityThreshold = %f\n",generalityThreshold);
+       }
+       else{ //default
+               generalityThreshold = 0.001; 
+       }
+
+
 }
diff --git a/monetdb5/extras/rdf/rdfparams.h b/monetdb5/extras/rdf/rdfparams.h
--- a/monetdb5/extras/rdf/rdfparams.h
+++ b/monetdb5/extras/rdf/rdfparams.h
@@ -33,6 +33,8 @@
 
 extern int dimensionFactor; 
 extern float ontologySimThreshold;
+extern int upperboundNumTables; 
+extern float generalityThreshold;
 
 rdf_export void
 createDefaultParamsFile(void);
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -3088,7 +3088,7 @@ void updateParentIdxAll(CSset *freqCSset
 
 #if USE_LABEL_FINDING_MAXCS
 /*
- *  * Return 1 if there is semantic evidence against merging the two CS's, 
this is the case iff the two CS's have a hierarchy and their common ancestor is 
too generic (support above IMPORTANCE_THRESHOLD).
+ *  * Return 1 if there is semantic evidence against merging the two CS's, 
this is the case iff the two CS's have a hierarchy and their common ancestor is 
too generic (support above generalityThreshold).
  *   */
 static
 char isEvidenceAgainstMerging(int freqId1, int freqId2, CSlabel* labels, 
OntoUsageNode *tree) {
@@ -3132,7 +3132,7 @@ char isEvidenceAgainstMerging(int freqId
                level++;
        }
 
-       if (tmpNode->percentage >= IMPORTANCE_THRESHOLD) {
+       if (tmpNode->percentage >= generalityThreshold) {
                // have common ancestor but it is too generic --> there is 
semantic evidence against merging the two CS's
                return 1;
        } else {
@@ -4494,7 +4494,7 @@ char isSemanticSimilar(int freqId1, int 
                */
                
 
-               if (tmpNode->percentage < IMPORTANCE_THRESHOLD) {
+               if (tmpNode->percentage < generalityThreshold) {
                        //printf("Merge two CS's %d (Label: "BUNFMT") and %d 
(Label: "BUNFMT") using the common ancestor ("BUNFMT") at level %d (score: 
%f)\n",
                        //              freqId1, labels[freqId1].name, freqId2, 
labels[freqId2].name,tmpNode->uri, i,tmpNode->percentage);
                        oid classOid;
@@ -8972,6 +8972,7 @@ void computeMetricsQ(CSset *freqCSset){
        int tblIdx = -1;
        CS cs;  
        int     totalCov = 0; 
+       float   totalPrecision = 0.0; 
        float   Q = 0.0;
        int     i;
        int curNumMergeCS = countNumberMergeCS(freqCSset);
@@ -8990,11 +8991,14 @@ void computeMetricsQ(CSset *freqCSset){
                        weight[tblIdx] = (float) cs.coverage * ( 
fillRatio[tblIdx] + refRatio[tblIdx]); 
                        //weight[tblIdx] = (float) cs.coverage * ( 
fillRatio[tblIdx]);  //If do not consider reference ratio
                        totalCov += cs.coverage;
+                       totalPrecision += fillRatio[tblIdx];
                        
                        Q += weight[tblIdx];
                }
        }
        printf("Performance metric Q = (weighting %f)/(totalCov %d * numTbl %d) 
\n", Q,totalCov, curNumMergeCS);
+       printf("Average precision = %f\n",(float)totalPrecision/curNumMergeCS);
+       //printf("Average precision = %f\n",(float)totalPrecision/totalCov);
 
        Q = Q/((float)totalCov * curNumMergeCS);
 
diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h
--- a/monetdb5/extras/rdf/rdfschema.h
+++ b/monetdb5/extras/rdf/rdfschema.h
@@ -235,7 +235,8 @@ typedef struct SubCSSet{
 #define INIT_NUM_CS 1000 
 #define SIM_THRESHOLD 0.6
 #define SIM_TFIDF_THRESHOLD 0.75
-#define IMPORTANCE_THRESHOLD 0.001 //This is used when merging CS's by common 
ancestor
+//#define IMPORTANCE_THRESHOLD 0.001 //This is used when merging CS's by 
common ancestor
+                                       // Replace by generalityThreshold = 
1/(upperboundNumTables)
 #define COMMON_ANCESTOR_LOWEST_SPECIFIC_LEVEL 2 
 
 //#define MIN_PERCETAGE_S5 5   // Merge all CS refered by more than 
1/MIN_PERCETAGE_S6 percent of a CS via one property
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to