Changeset: 72b6716bcfd7 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=72b6716bcfd7
Modified Files:
        monetdb5/extras/rdf/rdflabels.c
        monetdb5/extras/rdf/rdflabels.h
        monetdb5/extras/rdf/rdfschema.c
Branch: rdf
Log Message:

create labels for freqCS, not maxCS/mergeCS


diffs (truncated from 394 to 300 lines):

diff --git a/monetdb5/extras/rdf/rdflabels.c b/monetdb5/extras/rdf/rdflabels.c
--- a/monetdb5/extras/rdf/rdflabels.c
+++ b/monetdb5/extras/rdf/rdflabels.c
@@ -221,7 +221,6 @@ int** initRelationMetadataCount(CSset* f
        if (!relationMetadataCount) fprintf(stderr, "ERROR: Couldn't malloc 
memory!\n");
        for (i = 0; i < freqCSset->numCSadded; ++i) {
                relationMetadataCount[i] = NULL;
-               if (freqCSset->items[i].parentFreqIdx != -1) continue; // ignore
                relationMetadataCount[i] = (int *) malloc(sizeof(int) * 
freqCSset->items[i].numProp);
                if (!relationMetadataCount[i]) fprintf(stderr, "ERROR: Couldn't 
malloc memory!\n");
                for (j = 0; j < freqCSset->items[i].numProp; ++j) {
@@ -234,7 +233,7 @@ int** initRelationMetadataCount(CSset* f
 
 /* Calculate frequency per foreign key relationship. */
 static
-Relation*** initRelationMetadata(int** relationMetadataCount, CSmergeRel* 
csRelBetweenMergeFreqSet, CSset* freqCSset) {
+Relation*** initRelationMetadata(int** relationMetadataCount, CSrel* csrelSet, 
int num, CSset* freqCSset, int* csIdFreqIdxMap) {
        int             i, j, k;
        Relation***     relationMetadata;
 
@@ -245,49 +244,51 @@ Relation*** initRelationMetadata(int** r
 
        relationMetadata = (Relation ***) malloc(sizeof(Relation **) * 
freqCSset->numCSadded);
        if (!relationMetadata) fprintf(stderr, "ERROR: Couldn't malloc 
memory!\n");
-       for (i = 0; i < freqCSset->numCSadded; ++i) { // CS
-               CS cs = (CS) freqCSset->items[i];
-               if (cs.parentFreqIdx != -1) continue; // ignore
-               relationMetadata[i] = (Relation **) malloc (sizeof(Relation *) 
* cs.numProp);
-               if (!relationMetadata[i]) fprintf(stderr, "ERROR: Couldn't 
malloc memory!\n");
+       for (i = 0; i < num; ++i) { // CS
+               int csId = csIdFreqIdxMap[i];
+               CS cs = (CS) freqCSset->items[csId];
+               if (csId == -1) continue; // ignore
+               relationMetadata[csId] = (Relation **) malloc (sizeof(Relation 
*) * cs.numProp);
+               if (!relationMetadata[csId]) fprintf(stderr, "ERROR: Couldn't 
malloc memory!\n");
                for (j = 0; j < cs.numProp; ++j) { // propNo in CS order
                        int sum = 0;
-                       relationMetadataCount[i][j] = 0;
-                       relationMetadata[i][j] = NULL;
-                       for (k = 0; k < csRelBetweenMergeFreqSet[i].numRef; 
++k) { // propNo in CSrel
+                       relationMetadataCount[csId][j] = 0;
+                       relationMetadata[csId][j] = NULL;
+                       for (k = 0; k < csrelSet[i].numRef; ++k) { // propNo in 
CSrel
 
-                               if (csRelBetweenMergeFreqSet[i].lstPropId[k] == 
cs.lstProp[j]) {
-                                       int toId = 
csRelBetweenMergeFreqSet[i].lstRefFreqIdx[k];
-                                       relationMetadataCount[i][j] += 1;
+                               if (csrelSet[i].lstPropId[k] == cs.lstProp[j]) {
+                                       int toId = csIdFreqIdxMap[ 
csrelSet[i].lstRefCSoid[k] ];
+                                       if (toId == -1) continue; // ignore
+                                       relationMetadataCount[csId][j] += 1;
 
                                        // alloc/realloc
-                                       if (relationMetadataCount[i][j] == 1) {
+                                       if (relationMetadataCount[csId][j] == 
1) {
                                                // alloc
-                                               relationMetadata[i][j] = 
(Relation *) malloc (sizeof(Relation));
-                                               if (!relationMetadata[i][j]) 
fprintf(stderr, "ERROR: Couldn't malloc memory!\n");
-                                               relationMetadata[i][j][0].to = 
toId;
-                                               relationMetadata[i][j][0].from 
= i;
-                                               relationMetadata[i][j][0].freq 
= csRelBetweenMergeFreqSet[i].lstCnt[k];
-                                               
relationMetadata[i][j][0].percent = -1;
+                                               relationMetadata[csId][j] = 
(Relation *) malloc (sizeof(Relation));
+                                               if (!relationMetadata[csId][j]) 
fprintf(stderr, "ERROR: Couldn't malloc memory!\n");
+                                               relationMetadata[csId][j][0].to 
= toId;
+                                               
relationMetadata[csId][j][0].from = csId;
+                                               
relationMetadata[csId][j][0].freq = csrelSet[i].lstCnt[k];
+                                               
relationMetadata[csId][j][0].percent = -1;
                                        } else {
                                                // realloc
-                                               relationMetadata[i][j] = 
(Relation *) realloc(relationMetadata[i][j], sizeof(Relation) * 
relationMetadataCount[i][j]);
-                                               if (!relationMetadata[i][j]) 
fprintf(stderr, "ERROR: Couldn't realloc memory!\n");
-                                               
relationMetadata[i][j][relationMetadataCount[i][j] - 1].to = toId;
-                                               
relationMetadata[i][j][relationMetadataCount[i][j] - 1].from = i;
-                                               
relationMetadata[i][j][relationMetadataCount[i][j] - 1].freq = 
csRelBetweenMergeFreqSet[i].lstCnt[k];
-                                               
relationMetadata[i][j][relationMetadataCount[i][j] - 1].percent = -1;
+                                               relationMetadata[csId][j] = 
(Relation *) realloc(relationMetadata[csId][j], sizeof(Relation) * 
relationMetadataCount[csId][j]);
+                                               if (!relationMetadata[csId][j]) 
fprintf(stderr, "ERROR: Couldn't realloc memory!\n");
+                                               
relationMetadata[csId][j][relationMetadataCount[csId][j] - 1].to = toId;
+                                               
relationMetadata[csId][j][relationMetadataCount[csId][j] - 1].from = csId;
+                                               
relationMetadata[csId][j][relationMetadataCount[csId][j] - 1].freq = 
csrelSet[i].lstCnt[k];
+                                               
relationMetadata[csId][j][relationMetadataCount[csId][j] - 1].percent = -1;
                                        }
                                }
                        }
 
                        // get total count of values
-                       for (k = 0; k < relationMetadataCount[i][j]; ++k) {
-                               sum += relationMetadata[i][j][k].freq;
+                       for (k = 0; k < relationMetadataCount[csId][j]; ++k) {
+                               sum += relationMetadata[csId][j][k].freq;
                        }
                        // assign percentage values for every value
-                       for (k = 0; k < relationMetadataCount[i][j]; ++k) {
-                               relationMetadata[i][j][k].percent = (int) 
(100.0 * relationMetadata[i][j][k].freq / sum + 0.5);
+                       for (k = 0; k < relationMetadataCount[csId][j]; ++k) {
+                               relationMetadata[csId][j][k].percent = (int) 
(100.0 * relationMetadata[csId][j][k].freq / sum + 0.5);
                        }
                }
        }
@@ -387,7 +388,6 @@ void convertToSQL(CSset *freqCSset, Rela
        // create statement for every table
        for (i = 0; i < freqCSset->numCSadded; ++i) {
                char *temp;
-               if ( freqCSset->items[i].parentFreqIdx != -1) continue; // 
ignore
                temp = (char *) malloc(sizeof(char) * (strlen(labels[i].name) + 
1));
                if (!temp) fprintf(stderr, "ERROR: Couldn't malloc memory!\n");
                strcpy(temp, labels[i].name);
@@ -414,7 +414,6 @@ void convertToSQL(CSset *freqCSset, Rela
 
        // add foreign key columns and add foreign keys
        for (i = 0; i < freqCSset->numCSadded; ++i) {
-               if (freqCSset->items[i].parentFreqIdx != -1) continue; // ignore
                for (j = 0; j < labels[i].numProp; ++j) {
                        char *temp2;
                        int refCounter = 0;
@@ -453,7 +452,7 @@ void convertToSQL(CSset *freqCSset, Rela
 }
 
 static
-void createSQLMetadata(CSset* freqCSset, CSmergeRel* csRelBetweenMergeFreqSet, 
Labels* labels) {
+void createSQLMetadata(CSset* freqCSset, CSrel* csrelSet, int num, Labels* 
labels, int* csIdFreqIdxMap) {
        int     **matrix = NULL; // matrix[from][to] frequency
        int     i, j, k;
        FILE    *fout;
@@ -472,24 +471,27 @@ void createSQLMetadata(CSset* freqCSset,
        }
 
        // set values
-       for (i = 0; i < freqCSset->numCSadded; ++i) {
-               if (freqCSset->items[i].parentFreqIdx != -1) continue; // ignore
+       for (i = 0; i < num; ++i) {
+               int csId = csIdFreqIdxMap[i];
+               CS cs = (CS) freqCSset->items[csId];
+               if (csId == -1) continue; // ignore
 
-               for (j = 0; j < freqCSset->items[i].numProp; ++j) { // propNo 
in CS order
+               for (j = 0; j < cs.numProp; ++j) { // propNo in CS order
                        // check foreign key frequency
                        int sum = 0;
-                       for (k = 0; k < csRelBetweenMergeFreqSet[i].numRef; 
++k) {
-                               if (csRelBetweenMergeFreqSet[i].lstPropId[k] == 
freqCSset->items[i].lstProp[j]) {
-                                       sum += 
csRelBetweenMergeFreqSet[i].lstCnt[k];
+                       for (k = 0; k < csrelSet[i].numRef; ++k) {
+                               if (csrelSet[i].lstPropId[k] == cs.lstProp[j]) {
+                                       sum += csrelSet[i].lstCnt[k];
                                }
                        }
 
-                       for (k = 0; k < csRelBetweenMergeFreqSet[i].numRef; 
++k) { // propNo in CSrel
-                               if (csRelBetweenMergeFreqSet[i].lstPropId[k] == 
freqCSset->items[i].lstProp[j]) {
-                                       int to = 
csRelBetweenMergeFreqSet[i].lstRefFreqIdx[k];
-                                       if (i == to) continue; // ignore self 
references
-                                       if ((int) (100.0 * 
csRelBetweenMergeFreqSet[i].lstCnt[k] / sum + 0.5) < FK_FREQ_THRESHOLD) 
continue; // foreign key is not frequent enough
-                                       matrix[i][to] += 
csRelBetweenMergeFreqSet[i].lstCnt[k]; // multiple links from 'i' to 'to'? add 
the frequencies
+                       for (k = 0; k < csrelSet[i].numRef; ++k) { // propNo in 
CSrel
+                               if (csrelSet[i].lstPropId[k] == cs.lstProp[j]) {
+                                       int toId = csIdFreqIdxMap[ 
csrelSet[i].lstRefCSoid[k] ];
+                                       if (toId == -1) continue; // ignore
+                                       if (i == toId) continue; // ignore self 
references
+                                       if ((int) (100.0 * 
csrelSet[i].lstCnt[k] / sum + 0.5) < FK_FREQ_THRESHOLD) continue; // foreign 
key is not frequent enough
+                                       matrix[csId][toId] += 
csrelSet[i].lstCnt[k]; // multiple links from 'i' to 'toId'? add the frequencies
                                }
                        }
                }
@@ -510,7 +512,6 @@ void createSQLMetadata(CSset* freqCSset,
        fout = fopen("tableIdFreq.csv", "wt");
        for (i = 0; i < freqCSset->numCSadded; ++i) {
                char *temp;
-               if (freqCSset->items[i].parentFreqIdx != -1) continue; // ignore
                temp = (char *) malloc(sizeof(char) * (strlen(labels[i].name) + 
1));
                if (!temp) fprintf(stderr, "ERROR: Couldn't malloc memory!\n");
                strcpy(temp, labels[i].name);
@@ -542,7 +543,6 @@ void printTxt(CSset* freqCSset, Labels* 
 
        fout = fopen(filename, "wt");
        for (i = 0; i < freqCSset->numCSadded; ++i) {
-               if (freqCSset->items[i].parentFreqIdx != -1) continue; // ignore
                fprintf(fout, "%s (CS "BUNFMT"): ", labels[i].name, 
freqCSset->items[i].csId);
                for (j = 0; j < labels[i].numProp; ++j) {
                        if (j + 1 < labels[i].numProp) fprintf(fout, "%s, ", 
labels[i].lstProp[j]);
@@ -611,12 +611,7 @@ void createTypeAttributesHistogram(BAT *
                for (i = 0; i < typeAttributesCount; ++i) {
                        if (strstr(propStr, typeAttributes[i]) != NULL) {
                                // prop is a type!
-
-                               // lookup maxCS/mergeCS
                                csFreqIdx = csIdFreqIdxMap[subjCSMap[*sbt]];
-                               while 
(freqCSset->items[csFreqIdx].parentFreqIdx != -1) {
-                                       csFreqIdx = 
freqCSset->items[csFreqIdx].parentFreqIdx;
-                               }
 
                                // get object
                                obt = (oid *) BUNtloc(oi, p);
@@ -679,7 +674,6 @@ void createTypeAttributesHistogram(BAT *
 
        // sort descending by frequency
        for (i = 0; i < freqCSset->numCSadded; ++i) {
-               if (freqCSset->items[i].parentFreqIdx != -1) continue; // ignore
                for (j = 0; j < typeAttributesCount; ++j) {
                        qsort(typeAttributesHistogram[i][j], 
typeAttributesHistogramCount[i][j], sizeof(TypeAttributesFreq), 
compareTypeAttributesFreqs);
                }
@@ -1041,7 +1035,6 @@ void createPropStatistics(PropStat* prop
 
        for (i = 0; i < freqCSset->numCSadded; ++i) {
                CS cs = (CS)freqCSset->items[i];
-               if (cs.parentFreqIdx != -1) continue; // ignore
                for (j = 0; j < cs.numProp; ++j) {
                        // add prop to propStat
                        BUN     bun = BUNfnd(BATmirror(propStat->pBat), (ptr) 
&cs.lstProp[j]);
@@ -1089,13 +1082,9 @@ static
 void createOntologyLookupResult(str** result, CSset* freqCSset, int* 
resultCount, str** ontattributes, int ontattributesCount, str** ontmetadata, 
int ontmetadataCount) {
        int             i, j;
        PropStat        *propStat;
-       int             numCS = 0;
 
-       for (i = 0; i < freqCSset->numCSadded; ++i) {
-               if (freqCSset->items[i].parentFreqIdx == -1) numCS += 1;
-       }
        propStat = initPropStat();
-       createPropStatistics(propStat, numCS, freqCSset);
+       createPropStatistics(propStat, freqCSset->numCSadded, freqCSset);
 
        for (i = 0; i < freqCSset->numCSadded; ++i) {
                CS              cs;
@@ -1104,7 +1093,6 @@ void createOntologyLookupResult(str** re
                int             *propOntologiesCount = NULL;
 
                cs = (CS) freqCSset->items[i];
-               if (cs.parentFreqIdx != -1) continue; // ignore
 
                // order properties by ontologies
                propOntologiesCount = (int *) malloc(sizeof(int) * 
ontologyCount);
@@ -1170,7 +1158,6 @@ void printUML(CSset *freqCSset, int type
 
        for (i = 0; i < freqCSset->numCSadded; ++i) {
                CS cs = (CS) freqCSset->items[i];
-               if (cs.parentFreqIdx != -1) continue; // ignore
 
 #if SHOW_CANDIDATES
                /* DATA SOURCES */
@@ -1370,14 +1357,12 @@ void printUML(CSset *freqCSset, int type
                        getPropNameShort(&propStrShort, propStr);
 #endif
 
-                       if (cs.parentFreqIdx == -1) {
-                               // if it is a type, include top-3 values
+                       // if it is a type, include top-3 values
 #if USE_SHORT_NAMES
-                               fprintf(fout, "<TR><TD 
PORT=\"%s\">%s</TD></TR>\n", propStrEscaped, propStrShort);
+                       fprintf(fout, "<TR><TD PORT=\"%s\">%s</TD></TR>\n", 
propStrEscaped, propStrShort);
 #else
-                               fprintf(fout, "<TR><TD 
PORT=\"%s\">%s</TD></TR>\n", propStrEscaped, propStr);
+                       fprintf(fout, "<TR><TD PORT=\"%s\">%s</TD></TR>\n", 
propStrEscaped, propStr);
 #endif
-                       }
                        free(propStrEscaped);
 
                }
@@ -1387,7 +1372,6 @@ void printUML(CSset *freqCSset, int type
 
        for (i = 0; i < freqCSset->numCSadded; ++i) {
                CS cs = (CS) freqCSset->items[i];
-               if (cs.parentFreqIdx != -1) continue; // ignore
                for (j = 0; j < cs.numProp; ++j) {
                        char    *propStrEscaped = NULL;
 #if USE_SHORT_NAMES
@@ -1588,7 +1572,6 @@ void getAllLabels(Labels* labels, CSset*
        for (i = 0; i < freqCSset->numCSadded; ++i) {
                CS cs = (CS) freqCSset->items[i];
                char *temp = NULL;
-               if (cs.parentFreqIdx != -1) continue; // ignore
 
                // get table name
                getTableName(&temp, i,  typeAttributesCount, 
typeAttributesHistogram, typeAttributesHistogramCount, typeStat, typeStatCount, 
result, resultCount, links);
@@ -1649,7 +1632,6 @@ void createLinks(CSset* freqCSset, Relat
 
        for (i = 0; i < freqCSset->numCSadded; ++i) {
                CS cs = (CS) freqCSset->items[i];
-               if (cs.parentFreqIdx != -1) continue; // ignore
                for (j = 0; j < cs.numProp; ++j) {
                        for (k = 0; k < relationMetadataCount[i][j]; ++k) {
                                int to;
@@ -1725,7 +1707,6 @@ void freeRelationMetadata(Relation*** re
 
        for (i = 0; i < freqCSset->numCSadded; ++i) { // CS
                CS cs = (CS) freqCSset->items[i];
-               if (cs.parentFreqIdx != -1) continue; // ignore
                for (j = 0; j < cs.numProp; ++j) {
                        if (relationMetadata[i][j])
                                free(relationMetadata[i][j]);
@@ -1773,7 +1754,7 @@ void freeOntologyLookupResult(str** onto
 }
 
 /* Creates labels for all CS (without a parent). */
-Labels* createLabels(CSset* freqCSset, CSmergeRel* csRelBetweenMergeFreqSet, 
BAT *sbat, BATiter si, BATiter pi, BATiter oi, oid *subjCSMap, BAT* mbat, int 
*csIdFreqIdxMap, int freqThreshold, str** ontattributes, int 
ontattributesCount, str** ontmetadata, int ontmetadataCount) {
+Labels* createLabels(CSset* freqCSset, CSrel* csrelSet, int num, BAT *sbat, 
BATiter si, BATiter pi, BATiter oi, oid *subjCSMap, BAT* mbat, int 
*csIdFreqIdxMap, int freqThreshold, str** ontattributes, int 
ontattributesCount, str** ontmetadata, int ontmetadataCount) {
 #if USE_TYPE_NAMES
        char*           typeAttributes[] = {
                                "http://ogp.me/ns#type";,
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to