Changeset: 536a87adb3d5 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=536a87adb3d5
Modified Files:
        monetdb5/extras/rdf/rdfschema.c
        monetdb5/extras/rdf/rdfschema.h
Branch: rdf
Log Message:

Compute stats with the removal of small tables


diffs (123 lines):

diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -8904,10 +8904,17 @@ Pscore computeMetricsQ(CSset *freqCSset)
        float* weight;
        int tblIdx = -1;
        CS cs;  
+
        int     totalCov = 0; 
        float   totalPrecision = 0.0; 
-       long    overalFill = 0; 
-       long    overalMaxFill = 0;
+       lng     overalFill = 0; 
+       lng     overalMaxFill = 0;
+
+       int     totalExpFinalCov = 0; 
+       float   totalExpFinalPrecision = 0.0; 
+       lng     overalExpFinalFill = 0; 
+       lng     overalExpFinalMaxFill = 0;
+       float   expFinalQ = 0.0; 
 
        float   Q = 0.0;
        int     i;
@@ -8937,24 +8944,37 @@ Pscore computeMetricsQ(CSset *freqCSset)
                        //if ((cs.numProp *  cs.support) > 1000000) 
printf("FreqCS %d has %d prop and support %d (Fill Ratio %f 
)\n",i,cs.numProp,cs.support,fillRatio[tblIdx]);
                        
                        Q += weight[tblIdx];
-               }
-               if (isCSTable(freqCSset->items[i], 1)) numExpFinalTbl++;
+
+                       if (isCSTable(freqCSset->items[i], 1)){ 
+                               totalExpFinalCov += cs.coverage;
+                               totalExpFinalPrecision += fillRatio[tblIdx];
+                               overalExpFinalFill += cs.numFill;
+                               overalExpFinalMaxFill += cs.numProp *  
cs.support;
+                               expFinalQ += weight[tblIdx];
+                               numExpFinalTbl++;
+                       }
+               }
        }
        printf("Performance metric Q = (weighting %f)/(totalCov %d * numTbl %d) 
\n", Q,totalCov, curNumMergeCS);
        printf("Average precision = %f\n",(float)totalPrecision/curNumMergeCS);
-       printf("Overall precision = %f (overfill %ld / overalMaxFill %ld)\n", 
(float) overalFill/overalMaxFill, overalFill, overalMaxFill);
+       printf("Overall precision = %f (overfill %lld / overalMaxFill %lld)\n", 
(float) overalFill/overalMaxFill, overalFill, overalMaxFill);
        //printf("Average precision = %f\n",(float)totalPrecision/totalCov);
 
        Q = Q/((float)totalCov * curNumMergeCS);
 
        printf("==> Performance metric Q = %f \n", Q);
 
+       expFinalQ = expFinalQ/((float)totalExpFinalCov * numExpFinalTbl);
+
        pscore.avgPrec = (float)totalPrecision/curNumMergeCS; 
        pscore.overallPrec = (float) overalFill/overalMaxFill;
        pscore.Qscore = Q;
        //pscore.Cscore = 
        pscore.nTable = curNumMergeCS;
        pscore.nFinalTable = numExpFinalTbl;
+       pscore.avgPrecFinal = (float)totalExpFinalPrecision/numExpFinalTbl;
+       pscore.overallPrecFinal = (float) 
overalExpFinalFill/overalExpFinalMaxFill;
+       pscore.QscoreFinal = expFinalQ;
 
        free(fillRatio); 
        free(refRatio); 
@@ -8992,8 +9012,8 @@ void computeMetricsQForRefinedTable(CSse
        #endif
 
        float   totalPrecision = 0.0; 
-       long    overalFill = 0; 
-       long    overalMaxFill = 0;
+       lng     overalFill = 0; 
+       lng     overalMaxFill = 0;
 
        fillRatio = (float*)malloc(sizeof(float) * numTables);
        refRatio = (float*)malloc(sizeof(float) * numTables);
@@ -9130,7 +9150,7 @@ void computeMetricsQForRefinedTable(CSse
        }
        printf("Refined Table: Performance metric Q = (weighting %f)/(totalCov 
%d * numTbl %d) \n", Q,totalCov, numTables);
        printf("Average precision = %f\n",(float)totalPrecision/numTables);
-       printf("Overall precision = %f (overfill %ld / overalMaxFill %ld)\n", 
(float) overalFill/overalMaxFill, overalFill, overalMaxFill);
+       printf("Overall precision = %f (overfill %lld / overalMaxFill %lld)\n", 
(float) overalFill/overalMaxFill, overalFill, overalMaxFill);
 
        Q = Q/((float)totalCov * numTables);
 
@@ -9475,18 +9495,21 @@ void setFinalsimTfidfThreshold(Pscore *p
        float totalgap; 
 
 
-       
printf("SimThreshold|avgPrecision|OvrallPrecision|Qscore|numTable|FinalTable|precRatio|finalTblRatio|tblRatio\n");
+       
printf("SimThreshold|avgPrecision|OvrallPrecision|Qscore|numTable|avgPrecisionFinal|OvrallPrecisionFinal|QscoreFinal|FinalTable|precRatio|tblRatio|precFinalRatio|finalTblRatio\n");
        for ( i = 0; i < numRun; i++){
                float numFinTblRatio = 1.0;
                float numTblRatio = 1.0;
                float precRatio = 1.0; 
+               float precRatioFinal = 1.0;
                if (i > 0 && i < (numRun - 1)){
                        numFinTblRatio = (float)(pscores[i+1].nFinalTable - 
pscores[i].nFinalTable)/(pscores[i].nFinalTable - pscores[i-1].nFinalTable);
                        numTblRatio  = (float)(pscores[i+1].nTable - 
pscores[i].nTable)/(pscores[i].nTable - pscores[i-1].nTable);
                        precRatio = (float)(pscores[i].overallPrec - 
pscores[i-1].overallPrec)/(pscores[i+1].overallPrec - pscores[i].overallPrec);
-               }
-               printf("%f|%f|%f|%f|%d|%d|%f|%f|%f\n",0.5 + i * 
0.05,pscores[i].avgPrec, 
-                               pscores[i].overallPrec, pscores[i].Qscore, 
pscores[i].nTable,pscores[i].nFinalTable,precRatio,numFinTblRatio,numTblRatio);
+                       precRatioFinal = (float)(pscores[i].overallPrecFinal - 
pscores[i-1].overallPrecFinal)/(pscores[i+1].overallPrecFinal - 
pscores[i].overallPrecFinal);
+               }
+               printf("%f|%f|%f|%f|%d|%f|%f|%f|%d|%f|%f|%f|%f\n",0.5 + i * 
0.05,pscores[i].avgPrec, pscores[i].overallPrec, pscores[i].Qscore, 
pscores[i].nTable,
+                                                                    
pscores[i].avgPrecFinal, pscores[i].overallPrecFinal, pscores[i].QscoreFinal, 
pscores[i].nFinalTable,
+                                                                    
precRatio,numTblRatio, precRatioFinal, numFinTblRatio);
        }
        
        totalgap = pscores[numRun-1].overallPrec - pscores[0].overallPrec;
diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h
--- a/monetdb5/extras/rdf/rdfschema.h
+++ b/monetdb5/extras/rdf/rdfschema.h
@@ -445,6 +445,9 @@ typedef struct Pscore{              //Performance sc
        float Qscore;           //metric score Q
        float Cscore;           //metric score C
        int   nTable;           //number of tables
+       float avgPrecFinal;     //Avg precision of expected final tables (after 
removing small size table)
+       float overallPrecFinal; //of expected final tables (after removing 
small size table)
+       float QscoreFinal;      //of expected final tables (after removing 
small size table)
        int   nFinalTable;      //Expected number of final table after removing 
e.g., small size table  
 } Pscore; 
 
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to