MonetDB: transaction-replication - Merge with default branch

2014-08-04 Thread Dimitar Nedev
Changeset: 9ed89e343077 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=9ed89e343077
Added Files:
sql/test/BugTracker-2014/Tests/stringfloatshtcompare.Bug-3512.stable.err
sql/test/BugTracker-2014/Tests/stringfloatshtcompare.Bug-3512.stable.out
Modified Files:
NT/installer32/cv64-32.sh
NT/installer64/cv32-64.sh
clients/mapiclient/stethoscope.c
clients/mapiclient/tomograph.c
clients/odbc/setup/Makefile.ag
clients/odbc/winsetup/Makefile.ag
gdk/gdk_atoms.c
monetdb5/mal/mal.h
monetdb5/mal/mal_dataflow.c
monetdb5/mal/mal_interpreter.c
monetdb5/mal/mal_profiler.c
monetdb5/mal/mal_profiler.h
sql/backends/monet5/sql_scenario.c
sql/benchmarks/tpch/Tests/01-explain.stable.out
sql/benchmarks/tpch/Tests/03-explain.stable.out
sql/common/sql_types.c
sql/server/rel_optimizer.c
sql/server/sql_mvc.c
sql/server/sql_parser.y
sql/server/sql_scan.c
sql/server/sql_scan.h
sql/storage/store.c

sql/test/BugTracker-2009/Tests/old_views_incorrectly_reused.SF-2863804.view1.sql

sql/test/BugTracker-2009/Tests/old_views_incorrectly_reused.SF-2863804.view2.sql
sql/test/BugTracker-2014/Tests/All
sql/test/BugTracker-2014/Tests/stringfloatshtcompare.Bug-3512.sql
sql/test/BugTracker/Tests/between_with_column.SF-1959410.stable.err
sql/test/pg_regress/Tests/date.sql
sql/test/pg_regress/Tests/date.stable.err
sql/test/pg_regress/Tests/date.stable.out
sql/test/pg_regress/Tests/load.SQL.sh
sql/test/pg_regress/Tests/time.sql
sql/test/pg_regress/Tests/time.stable.err
sql/test/pg_regress/Tests/time.stable.out
sql/test/pg_regress/Tests/timestamp.sql
sql/test/pg_regress/Tests/timestamp.stable.err
sql/test/pg_regress/Tests/timestamp.stable.out
sql/test/pg_regress/Tests/timestamptz.sql
sql/test/pg_regress/Tests/timestamptz.stable.err
sql/test/pg_regress/Tests/timestamptz.stable.out
sql/test/pg_regress/Tests/timetz.sql
sql/test/pg_regress/Tests/timetz.stable.err
sql/test/pg_regress/Tests/timetz.stable.out
sql/test/pg_regress/postgresql2sql99.sh
testing/quicktest
Branch: transaction-replication
Log Message:

Merge with default branch


diffs (truncated from 8055 to 300 lines):

diff --git a/NT/installer32/cv64-32.sh b/NT/installer32/cv64-32.sh
--- a/NT/installer32/cv64-32.sh
+++ b/NT/installer32/cv64-32.sh
@@ -19,13 +19,13 @@
 # this script is mostly to document what I did, not for regular use.
 
 cp ../installer64/*.{vdproj,sln} .
-sed -i -e '/TargetPlatform/s/3:1/3:0/' \
-   -e 's/win64/win32/' \
-   -e 's/\[ProgramFiles64Folder\]/[ProgramFilesFolder]/' \
-   -e 's/C:Program Files.*Common FilesMerge 
ModulesMicrosoft_VC100_CRT_x64.msm/C:Program FilesCommon 
FilesMerge ModulesMicrosoft_VC100_CRT_x86.msm/' \
-   -e 's/"ProductCode" = "8:{.*/"ProductCode" = 
"8:{66BABD32-D69D-4A89-A7F3-2655D4CD0641}"/' \
-   -e 's/"PackageCode" = "8:{.*/"PackageCode" = 
"8:{D9B2D386-1461-43BC-9A63-93F1BA0D7921}"/' \
-   -e 's/"UpgradeCode" = 
"8:{95ACBC8C-BC4B-4901-AF70-48B54A5C20F7}"/"UpgradeCode" = 
"8:{C1F69378-3F5C-4120-8224-32F07D3458F3}"/' \
-   -e 's/"UpgradeCode" = 
"8:{8E6CDFDE-39B9-43D9-97B3-2440C012845C}"/"UpgradeCode" = 
"8:{92C89C36-0E86-45E1-B3D8-0D6C91108F30}"/' \
-   -e 's/"UpgradeCode" = 
"8:{839D3C90-B578-41E2-A004-431440F9E899}"/"UpgradeCode" = 
"8:{730C595B-DBA6-48D7-94B8-A98780AC92B6}"/' \
+sed -i~ -e '/TargetPlatform/s/3:1/3:0/' \
+   -e 's/win64/win32/' \
+   -e 's/\[ProgramFiles64Folder\]/[ProgramFilesFolder]/' \
+   -e 's/C:Program Files.*Common FilesMerge 
ModulesMicrosoft_VC100_CRT_x64.msm/C:Program FilesCommon 
FilesMerge ModulesMicrosoft_VC100_CRT_x86.msm/' \
+   -e 's/"ProductCode" = "8:{.*/"ProductCode" = 
"8:{66BABD32-D69D-4A89-A7F3-2655D4CD0641}"/' \
+   -e 's/"PackageCode" = "8:{.*/"PackageCode" = 
"8:{D9B2D386-1461-43BC-9A63-93F1BA0D7921}"/' \
+   -e 's/"UpgradeCode" = 
"8:{95ACBC8C-BC4B-4901-AF70-48B54A5C20F7}"/"UpgradeCode" = 
"8:{C1F69378-3F5C-4120-8224-32F07D3458F3}"/' \
+   -e 's/"UpgradeCode" = 
"8:{8E6CDFDE-39B9-43D9-97B3-2440C012845C}"/"UpgradeCode" = 
"8:{92C89C36-0E86-45E1-B3D8-0D6C91108F30}"/' \
+   -e 's/"UpgradeCode" = 
"8:{839D3C90-B578-41E2-A004-431440F9E899}"/"UpgradeCode" = 
"8:{730C595B-DBA6-48D7-94B8-A98780AC92B6}"/' \
 *.vdproj
diff --git a/NT/installer64/cv32-64.sh b/NT/installer64/cv32-64.sh
--- a/NT/installer64/cv32-64.sh
+++ b/NT/installer64/cv32-64.sh
@@ -19,13 +19,13 @@
 # this script is mostly to document what I did, not for regular use.
 
 cp ../installer32/*.{vdproj,sln} .
-sed -i -e '/TargetPlatform/s/3:0/3:1/' \
-   -e 's/win32/win64/' \
-   -e 's/\[ProgramFilesFolder\]/[ProgramFil

MonetDB: rdf - Redefine threshold for detecting dimension table.

2014-08-04 Thread Minh-Duc Pham
Changeset: b0c7844f3b85 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=b0c7844f3b85
Modified Files:
monetdb5/extras/rdf/Makefile.ag
monetdb5/extras/rdf/rdf_shredder.c
monetdb5/extras/rdf/rdflabels.c
monetdb5/extras/rdf/rdflabels.h
monetdb5/extras/rdf/rdfschema.c
monetdb5/extras/rdf/rdfschema.h
Branch: rdf
Log Message:

Redefine threshold for detecting dimension table.

Also, merge two tf-idf similarity threshold in rdflabel and rdfschema into one.


diffs (168 lines):

diff --git a/monetdb5/extras/rdf/Makefile.ag b/monetdb5/extras/rdf/Makefile.ag
--- a/monetdb5/extras/rdf/Makefile.ag
+++ b/monetdb5/extras/rdf/Makefile.ag
@@ -32,7 +32,7 @@ lib__rdf = {
#MODULE
NOINST
#DIR = libdir/monetdb5
-   SOURCES = rdf.h rdftypes.h rdfschema.h rdfgraph.h rdfgraph.c 
rdfminheap.h rdfminheap.c rdflabels.h rdfretrieval.h rdfparser.h rdftypes.c 
rdfparser.c rdfontologyload.h rdfontologyload.c rdf_shredder.c rdfalgebra.c 
rdfschema.c rdflabels.c rdfretrieval.c  
+   SOURCES = rdf.h rdftypes.h rdfparams.h rdfparams.c rdfschema.h 
rdfgraph.h rdfgraph.c rdfminheap.h rdfminheap.c rdflabels.h rdfretrieval.h 
rdfparser.h rdftypes.c rdfparser.c rdfontologyload.h rdfontologyload.c 
rdf_shredder.c rdfalgebra.c rdfschema.c rdflabels.c rdfretrieval.c  
 
#SEP = _
# LIBS =  ./hashmap/librdfhash  
diff --git a/monetdb5/extras/rdf/rdf_shredder.c 
b/monetdb5/extras/rdf/rdf_shredder.c
--- a/monetdb5/extras/rdf/rdf_shredder.c
+++ b/monetdb5/extras/rdf/rdf_shredder.c
@@ -34,6 +34,7 @@
 #include 
 #include 
 #include 
+#include 
 
 typedef struct graphBATdef {
graphBATType batType;/* BAT type */
@@ -1014,6 +1015,10 @@ RDFParser (BAT **graph, str *location, s
}
tmpendT = clock();
printf ("Post processing took %f seconds.\n", ((float)(tmpendT - 
tmpbeginT))/CLOCKS_PER_SEC);
+
+   //Create default paramters file 
+   createDefaultParamsFile();
+
freeParserData(pdata);
return MAL_SUCCEED;
 }
diff --git a/monetdb5/extras/rdf/rdflabels.c b/monetdb5/extras/rdf/rdflabels.c
--- a/monetdb5/extras/rdf/rdflabels.c
+++ b/monetdb5/extras/rdf/rdflabels.c
@@ -830,7 +830,7 @@ oid* getOntologyCandidates(oid** ontattr
for (k = 0; k < num; ++k) {
int found = 0;
//if (freqId == 161) printf("   TFIDF score at %d 
("BUNFMT") is: %f | Number of matched Prop %d \n",k, classStat[k].ontoClass, 
classStat[k].tfidfs,classStat[k].numMatchedProp);
-   if (classStat[k].tfidfs < ONTOLOGY_FREQ_THRESHOLD) 
break; // values not frequent enough (list is sorted by tfidfs)
+   if (classStat[k].tfidfs < SIM_TFIDF_THRESHOLD) break; 
// values not frequent enough (list is sorted by tfidfs)
for (j = 0; j < ontmetadataCount && (found == 0); ++j) {
oid muri = ontmetadata[0][j];
oid msuper = ontmetadata[1][j];
diff --git a/monetdb5/extras/rdf/rdflabels.h b/monetdb5/extras/rdf/rdflabels.h
--- a/monetdb5/extras/rdf/rdflabels.h
+++ b/monetdb5/extras/rdf/rdflabels.h
@@ -95,8 +95,7 @@ enum {
 #define FK_MIN_REFER_PERCENTAGE 25 // To be consider as the name of a CS, 
the FK have to point to at least FK_MIN_REFER_PERCENTAGE of all CS's instances 
 #define TYPE_FREQ_THRESHOLD 80 // X % of the type values have to be 
this value
 #define GOOD_TYPE_FREQ_THRESHOLD 95// If a type appears really frequent in 
that CS, it should be choosen
-//#define ONTOLOGY_FREQ_THRESHOLD 0.4  // similarity threshold for tfidf 
simularity for ontology classes
-#define ONTOLOGY_FREQ_THRESHOLD 0.8// similarity threshold for tfidf 
simularity for ontology classes
+//#define ONTOLOGY_FREQ_THRESHOLD 0.8  // similarity threshold for tfidf 
simularity for ontology classes
 
 #define USE_SHORT_NAMES 1  // use getPropNameShort()
 #define USE_TYPE_NAMES 1   // use type attribute values for 
labeling
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -36,6 +36,7 @@
 #include "rdfontologyload.h"
 #include 
 #include 
+#include 
 
 #define SHOWPROPERTYNAME 1
 
@@ -569,7 +570,8 @@ void updateFreqCStype(CSset *freqCSset, 
int threshold = 0; 
int ratio; 
 
-   ratio = pow(IR_DIMENSION_FACTOR, nIterIR);
+   //ratio = pow(IR_DIMENSION_FACTOR, nIterIR);
+   ratio = IR_DIMENSION_FACTOR;
 
printf("List of dimension tables: \n");
for (i = 0; i < num; i++){
@@ -9385,7 +9387,7 @@ RDFextractCSwithTypes(int *ret, bat *sba
//return "Error"; 
 
/* Get the number of indirect refs in order to detect dimension table */
-   
+   if(0)   {
//nIterIR = getDiameter(3, freqCSset->numCSadded,csrelSet);
nIterIR = getDiameterExact(freqCSset->

MonetDB: rdf - Add missing files

2014-08-04 Thread Minh-Duc Pham
Changeset: baf530118f87 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=baf530118f87
Added Files:
monetdb5/extras/rdf/rdfparams.c
monetdb5/extras/rdf/rdfparams.h
Modified Files:
monetdb5/extras/rdf/rdfschema.c
Branch: rdf
Log Message:

Add missing files


diffs (148 lines):

diff --git a/monetdb5/extras/rdf/rdfparams.c b/monetdb5/extras/rdf/rdfparams.c
new file mode 100644
--- /dev/null
+++ b/monetdb5/extras/rdf/rdfparams.c
@@ -0,0 +1,65 @@
+/*
+ * The contents of this file are subject to the MonetDB Public License
+ * Version 1.1 (the "License"); you may not use this file except in
+ * compliance with the License. You may obtain a copy of the License at
+ * http://www.monetdb.org/Legal/MonetDBLicense
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+ * License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * The Original Code is the MonetDB Database System.
+ *
+ * The Initial Developer of the Original Code is CWI.
+ * Portions created by CWI are Copyright (C) 1997-July 2008 CWI.
+ * Copyright August 2008-2013 MonetDB B.V.
+ * All Rights Reserved.
+ */
+
+/* This contains graph algorithms for the graph formed by CS's and their 
relationships */
+
+#include "monetdb_config.h"
+#include "mal_exception.h"
+#include "url.h"
+#include "tokenizer.h"
+#include 
+#include 
+#include 
+
+int dimensionFactor; 
+float ontologySimThreshold; 
+
+void createDefaultParamsFile(void){
+   
+   FILE *paramFile;
+   
+   paramFile = fopen("params.ini", "wt");
+   
+   fprintf(paramFile, "dimensionFactor 3\n");
+   fprintf(paramFile, "ontologySimThreshold 0.8\n");
+
+   fclose(paramFile); 
+}
+
+void readParamsInput(void){
+   FILE *pf;
+   char variable[80];
+   char value[80];
+
+   pf = fopen("params.ini","r");
+
+   while (!feof(pf)){
+   if(fscanf(pf, "%s %s", variable, value) == 2){
+   if (strcmp(variable, "dimensionFactor") == 0){
+   dimensionFactor = atoi(value);
+   printf("dimensionFactor = 
%d\n",dimensionFactor);
+   }
+   else if (strcmp(variable, "ontologySimThreshold") == 0){
+   ontologySimThreshold = atof(value);
+   printf("ontologySimThreshold = 
%f\n",ontologySimThreshold);
+   }
+   }
+   }
+
+}
diff --git a/monetdb5/extras/rdf/rdfparams.h b/monetdb5/extras/rdf/rdfparams.h
new file mode 100644
--- /dev/null
+++ b/monetdb5/extras/rdf/rdfparams.h
@@ -0,0 +1,43 @@
+/*
+ * The contents of this file are subject to the MonetDB Public License
+ * Version 1.1 (the "License"); you may not use this file except in
+ * compliance with the License. You may obtain a copy of the License at
+ * http://www.monetdb.org/Legal/MonetDBLicense
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+ * License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * The Original Code is the MonetDB Database System.
+ *
+ * The Initial Developer of the Original Code is CWI.
+ * Portions created by CWI are Copyright (C) 1997-July 2008 CWI.
+ * Copyright August 2008-2013 MonetDB B.V.
+ * All Rights Reserved.
+ */
+
+#ifndef _RDFPARAMS_H_
+#define _RDFPARAMS_H_
+
+#ifdef WIN32
+#ifndef LIBRDF
+#define rdf_export extern __declspec(dllimport)
+#else
+#define rdf_export extern __declspec(dllexport)
+#endif
+#else
+#define rdf_export extern
+#endif
+
+
+extern int dimensionFactor; 
+extern float ontologySimThreshold;
+
+rdf_export void
+createDefaultParamsFile(void);
+
+rdf_export void
+readParamsInput(void);
+
+#endif /* _RDFPARAMS_H_ */
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -563,7 +563,7 @@ void getIRNums(CSrel *csrelSet, CSset *f
 
 
 static 
-void updateFreqCStype(CSset *freqCSset, int num,  float *curIRScores, int 
*refCount, int nIterIR){
+void updateFreqCStype(CSset *freqCSset, int num,  float *curIRScores, int 
*refCount){
 
int i; 
int numDimensionCS = 0; 
@@ -9399,7 +9399,7 @@ RDFextractCSwithTypes(int *ret, bat *sba
 
getOrigRefCount(csrelSet, freqCSset, freqCSset->numCSadded, refCount);  
getIRNums(csrelSet, freqCSset, freqCSset->numCSadded, refCount, 
curIRScores, nIterIR);  
-   updateFreqCStype(freqCSset, freqCSset->numCSadded, curIRScores, 
refCount, nIterIR);
+   updateFreqCStype(freqCSset, freqCSset->numCSadded, curIRScores, 
refCount);
 
free(refCount); 
free(curIRScores);
@@ -9603,7 +9603,7 @@ RDFextractCSwithTypes(int *ret, bat *sba
 
getOrigR

MonetDB: rdf - Check the availablity of params file

2014-08-04 Thread Minh-Duc Pham
Changeset: 982db287e95c for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=982db287e95c
Modified Files:
monetdb5/extras/rdf/rdfparams.c
Branch: rdf
Log Message:

Check the availablity of params file


diffs (15 lines):

diff --git a/monetdb5/extras/rdf/rdfparams.c b/monetdb5/extras/rdf/rdfparams.c
--- a/monetdb5/extras/rdf/rdfparams.c
+++ b/monetdb5/extras/rdf/rdfparams.c
@@ -48,6 +48,11 @@ void readParamsInput(void){
char value[80];
 
pf = fopen("params.ini","r");
+   
+   if (pf == NULL){
+   printf("No input parameter file found!");
+   return; 
+   }
 
while (!feof(pf)){
if(fscanf(pf, "%s %s", variable, value) == 2){
___
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list


MonetDB: holindex - Implement single-threaded out-of-place two-w...

2014-08-04 Thread Eleni Petraki
Changeset: 65544fc482d0 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=65544fc482d0
Modified Files:
monetdb5/extras/crackers/crackers_core_unordered.mx
monetdb5/extras/crackers/crackers_select_ops.mx
monetdb5/extras/crackers/crackers_selectholpl_ops.mx
Branch: holindex
Log Message:

Implement single-threaded out-of-place two-way cracking for the first query 
(instead of BATcopy + multi-threaded crack).


diffs (155 lines):

diff --git a/monetdb5/extras/crackers/crackers_core_unordered.mx 
b/monetdb5/extras/crackers/crackers_core_unordered.mx
--- a/monetdb5/extras/crackers/crackers_core_unordered.mx
+++ b/monetdb5/extras/crackers/crackers_core_unordered.mx
@@ -111,6 +111,7 @@ crackers_export str CRKparallelscan_@1 (
 @
 @= crackInTwoUnorderedPieces_decl
 str CRKcrackUnorderedZero_@2_@1( BAT *b, @1 mval, oid first, oid last, oid 
*pos, int nthreads, int vector_elements);
+str CRKcrackUnorderedTwoCopy_@2_@1( BAT *b, @1 pivot, BUN first, BUN last, BUN 
*pos, BAT *bc);
 str CRKparallelscan_@2_@1( BAT *b, BAT *ob, @1 mval, oid first, oid last, int 
nthreads);
 str CRKparallelcopy_@2_@1( BAT *b, BAT *ob, oid first, oid last, int nthreads);
 @
@@ -1877,7 +1878,32 @@ CRKparallelcopy_@2_@1( BAT *b, BAT *ob, 
 
return msg;
 }
+str
+CRKcrackUnorderedTwoCopy_@2_@1( BAT *b, @1 pivot, BUN first, BUN last, BUN 
*pos, BAT *bc)
+{
+@1  *src_t_basebat = (@1*)Tloc(b, BUNfirst(b));
+@1  *src_t_crackerbat = (@1*)Tloc(bc, BUNfirst(bc));
+oid *src_h_crackerbat = (oid *) Hloc(bc,BUNfirst(bc));
+oid hf = b->hseqbase + first;
+oid hl =  b->hseqbase + last;
+BUN i, j[2] = {first, last}, kt[2] = {1,-1}, kh[2] = {-1,1};
+oid h[2] = {hl, hf};
 
+for(i = first; i <= last; i++)
+{
+bit x = src_t_basebat[i] @8 pivot;
+src_t_crackerbat[j[x]] = src_t_basebat[i];
+src_h_crackerbat[j[x]] = h[x];
+j[x] += kt[x];
+h[x] += kh[x];
+}
+
+assert(j[0] > first);
+
+*pos = j[0];
+
+return MAL_SUCCEED;
+}
 @
 @= crackInThreeUnorderedPieces_impl
 str
diff --git a/monetdb5/extras/crackers/crackers_select_ops.mx 
b/monetdb5/extras/crackers/crackers_select_ops.mx
--- a/monetdb5/extras/crackers/crackers_select_ops.mx
+++ b/monetdb5/extras/crackers/crackers_select_ops.mx
@@ -300,7 +300,22 @@ CRKparallelscanselect_@1_MT(int *vid, in
gapH = -1;
/*vh--;*/
}
+@
+@= crkTwoRTreeCopy
+/*CRACK in two pieces cl2-ch2 using  ch2){
+/*then the right piece is empty*/
+gapH = -1;
+/*vh--;*/
+}
 @
 @= CreateResult
 createView:
@@ -964,6 +979,10 @@ CRKRangeLeftNilTree_@1(int *vid, int *bi
BATmode(b,PERSISTENT);
bo->batRestricted= BAT_READ;
 
+   //CRKparallelcopy_LE_@1(b, bo, (BUN) 0, BATcount(bo)-1, 
nthreads);
+   //BATkey(BATmirror(b),FALSE);
+//BATsetcount(b,BATcount(bo));
+
m = newCrackerIndex_@1(*bid,b->batCacheid);
 
if ((c = BATdescriptor(CrackerIndex[m].cid)) == NULL)
@@ -972,6 +991,7 @@ CRKRangeLeftNilTree_@1(int *vid, int *bi
cl2 = BUNfirst(b);
ch2 = BUNlast(b)-(oid)1;
@:crkTwoRTree(@1)@
+   //@:crkTwoRTreeCopy(@1)@
if (gapH>0) addCrackerIndex_@1(m,hgh,HBound,vh,c);
vl = BUNfirst(b);
pieces=pieces+2;
diff --git a/monetdb5/extras/crackers/crackers_selectholpl_ops.mx 
b/monetdb5/extras/crackers/crackers_selectholpl_ops.mx
--- a/monetdb5/extras/crackers/crackers_selectholpl_ops.mx
+++ b/monetdb5/extras/crackers/crackers_selectholpl_ops.mx
@@ -449,6 +449,22 @@ CRKrandomholpl_@1_MT(int *bid, bit *incl
/*vh--;*/
}
 @
+@= crkTwoRTreeCopy
+/*CRACK in two pieces cl2-ch2 using  ch2){
+/*then the right piece is empty*/
+gapH = -1;
+/*vh--;*/
+}
+@
 @= CreateResult
 createView:
assert(0);
@@ -1412,20 +1428,24 @@ CRKRangeLeftNilTree_@1(int *vid, int *bi
}
 
gettimeofday(&tv0_copy, 0);
-   b=BATnew(TYPE_oid, bo->ttype,BATcount(bo));
+b = BATcopy(bo, bo->htype, bo->ttype, TRUE);
+if ( bo->htype == TYPE_void)
+b = BATmaterializeh(b);
+   //b=BATnew(TYPE_oid, bo->ttype,BATcount(bo));
+   gettimeofday(&tv1_copy, 0);
+   fprintf(ofp_copy,"%d\n",(int)dt(tv0_copy,tv1_copy)*100);
+
b->hsorted = FALSE;
b->tsorted = FALSE;
b->hdense = FALSE;
b->tdense = FALSE;
-   CRKparallelcopy_LE_@1(b, bo, (BUN) 0, BATcount(bo)-1, nthreads);
-   BATkey(BATmirror(b),FALSE);
-BATsetcount(b,BATcount(bo));
b->ba

MonetDB: holindex - Fix bug in out-of-place cracking.

2014-08-04 Thread Eleni Petraki
Changeset: 03200759a887 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=03200759a887
Modified Files:
monetdb5/extras/crackers/crackers_core_unordered.mx
Branch: holindex
Log Message:

Fix bug in out-of-place cracking.


diffs (24 lines):

diff --git a/monetdb5/extras/crackers/crackers_core_unordered.mx 
b/monetdb5/extras/crackers/crackers_core_unordered.mx
--- a/monetdb5/extras/crackers/crackers_core_unordered.mx
+++ b/monetdb5/extras/crackers/crackers_core_unordered.mx
@@ -1884,18 +1884,14 @@ CRKcrackUnorderedTwoCopy_@2_@1( BAT *b, 
 @1  *src_t_basebat = (@1*)Tloc(b, BUNfirst(b));
 @1  *src_t_crackerbat = (@1*)Tloc(bc, BUNfirst(bc));
 oid *src_h_crackerbat = (oid *) Hloc(bc,BUNfirst(bc));
-oid hf = b->hseqbase + first;
-oid hl =  b->hseqbase + last;
-BUN i, j[2] = {first, last}, kt[2] = {1,-1}, kh[2] = {-1,1};
-oid h[2] = {hl, hf};
+BUN i, j[2] = {first, last}, kt[2] = {1,-1};
 
 for(i = first; i <= last; i++)
 {
 bit x = src_t_basebat[i] @8 pivot;
 src_t_crackerbat[j[x]] = src_t_basebat[i];
-src_h_crackerbat[j[x]] = h[x];
+src_h_crackerbat[j[x]] = b->hseqbase + i;
 j[x] += kt[x];
-h[x] += kh[x];
 }
 
 assert(j[0] > first);
___
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list


MonetDB: rdf - Replace several parameters by a single param

2014-08-04 Thread Minh-Duc Pham
Changeset: 20d17afb3ae1 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=20d17afb3ae1
Modified Files:
monetdb5/extras/rdf/rdfschema.c
monetdb5/extras/rdf/rdfschema.h
Branch: rdf
Log Message:

Replace several parameters by a single param


diffs (128 lines):

diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -503,7 +503,7 @@ void getOrigRefCount(CSrel *csrelSet, CS
for (j = 0; j < csrelSet[i].numRef; j++){
freqId = csrelSet[i].lstRefFreqIdx[j]; 
#if FILTER_INFREQ_FK_FOR_IR
-   if (csrelSet[i].lstCnt[j] < 
FILTER_THRESHOLD_FK_FOR_IR * freqCSset->items[freqId].support) continue; 
+   if (csrelSet[i].lstCnt[j] < 
INFREQ_TYPE_THRESHOLD * freqCSset->items[freqId].support) continue; 
#endif
//Do not count the self-reference
if (freqId != i) refCount[freqId] += 
csrelSet[i].lstCnt[j];
@@ -536,7 +536,7 @@ void getIRNums(CSrel *csrelSet, CSset *f
for (j = 0; j < csrelSet[i].numRef; j++){
freqId = csrelSet[i].lstRefFreqIdx[j]; 
#if FILTER_INFREQ_FK_FOR_IR
-   if (csrelSet[i].lstCnt[j] < 
FILTER_THRESHOLD_FK_FOR_IR * freqCSset->items[freqId].support) continue; 
+   if (csrelSet[i].lstCnt[j] < 
INFREQ_TYPE_THRESHOLD * freqCSset->items[freqId].support) continue; 
#endif
if (freqId != i){   //Do not count 
the self-reference
//curIRScores[freqId] += 
(lastIRScores[i] * (float)csrelSet[i].lstCnt[j]/(float)refCount[freqId]) +  
csrelSet[i].lstCnt[j];
@@ -867,7 +867,7 @@ char isMultiValueCol(PropTypes pt){
 
tmpRatio = ((double)pt.propCover / (pt.numSingleType + pt.numMVType));
//printf("NumMVType = %d  | Ratio %f \n", pt.numMVType, tmpRatio);
-   if ((pt.numMVType > 0) && (tmpRatio > IS_MULVALUE_THRESHOLD)){
+   if ((pt.numMVType > 0) && (tmpRatio > (1 + INFREQ_TYPE_THRESHOLD))){
return 1; 
}
else return 0; 
@@ -3603,7 +3603,7 @@ void generatecsRelSum(CSrel csRel, int f
freq = freqCSset->items[csRel.origFreqIdx].support; 
referredFreqId = csRel.lstRefFreqIdx[i];
freqOfReferredCS = freqCSset->items[referredFreqId].support;
-   if (freq > MIN_FROMTABLE_SIZE_S5 && freq < csRel.lstCnt[i] * 
MIN_PERCETAGE_S5 
+   if (freq > MIN_FROMTABLE_SIZE_S5 && (((float)freq * 
INFREQ_TYPE_THRESHOLD) < csRel.lstCnt[i]))   
&& freqOfReferredCS < csRel.lstCnt[i] * 
MIN_TO_PERCETAGE_S5){   

p = csRel.lstPropId[i]; 
@@ -8437,7 +8437,7 @@ CSrel* getFKBetweenTableSet(CSrel *csrel
// add relation to new data structure
 
//Compare with prop coverage from csproptype
-   if (rel.lstCnt[j]  < freqCSset->items[toFreqId].support 
* MIN_FK_FREQUENCY) continue; 
+   if (rel.lstCnt[j]  < freqCSset->items[toFreqId].support 
* INFREQ_TYPE_THRESHOLD)continue; 
 
to = mfreqIdxTblIdxMapping[toFreqId]; 
assert(to != -1); 
@@ -8455,7 +8455,7 @@ CSrel* getFKBetweenTableSet(CSrel *csrel
//Filtering: For big size table, if large number of 
prop's instances need to refer to a certain table
// else, all instances of that prop must refer to the 
certain table
if (freqCSset->items[i].coverage > MINIMUM_TABLE_SIZE){
-   if 
(csPropTypes[from].lstPropTypes[propIdx].propCover * MIN_FK_PROPCOVERAGE > 
rel.lstCnt[j]) continue; 
+   if 
(csPropTypes[from].lstPropTypes[propIdx].propCover * (1 - 
INFREQ_TYPE_THRESHOLD) > rel.lstCnt[j]) continue; 
else if 
(csPropTypes[from].lstPropTypes[propIdx].propCover == rel.lstCnt[j])

csPropTypes[from].lstPropTypes[propIdx].isDirtyFKProp = 0;
else
diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h
--- a/monetdb5/extras/rdf/rdfschema.h
+++ b/monetdb5/extras/rdf/rdfschema.h
@@ -127,10 +127,10 @@ typedef struct PropStat {
 #define OUTPUT_FREQID_PER_LABEL 1  /* This is for evaluating the results 
of merging using S1. TODO: Set it to 0 for default*/
 #defineMERGING_CONSIDER_NAMEORIGINALITY 0  /*Merging in rule S1, 
consider

MonetDB: rdf - Increase dimension factor

2014-08-04 Thread Minh-Duc Pham
Changeset: 57ffe2bf4c3b for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=57ffe2bf4c3b
Modified Files:
monetdb5/extras/rdf/rdfschema.c
monetdb5/extras/rdf/rdfschema.h
Branch: rdf
Log Message:

Increase dimension factor


diffs (45 lines):

diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -3603,7 +3603,7 @@ void generatecsRelSum(CSrel csRel, int f
freq = freqCSset->items[csRel.origFreqIdx].support; 
referredFreqId = csRel.lstRefFreqIdx[i];
freqOfReferredCS = freqCSset->items[referredFreqId].support;
-   if (freq > MIN_FROMTABLE_SIZE_S5 && (((float)freq * 
INFREQ_TYPE_THRESHOLD) < csRel.lstCnt[i]))   
+   if (freq > MIN_FROMTABLE_SIZE_S5 && (((float)freq * 
INFREQ_TYPE_THRESHOLD) < csRel.lstCnt[i])   
&& freqOfReferredCS < csRel.lstCnt[i] * 
MIN_TO_PERCETAGE_S5){   

p = csRel.lstPropId[i]; 
diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h
--- a/monetdb5/extras/rdf/rdfschema.h
+++ b/monetdb5/extras/rdf/rdfschema.h
@@ -142,7 +142,7 @@ typedef struct PropStat {
 //#define IR_DIMENSION_THRESHOLD_PERCENTAGE0.02//  Score of indirect 
references that the CS can be considered as a dimension CS 
//   
IR_DIMENSION_THRESHOLD_PERCENTAGE * totalFrequency 
//   Number of IR 
references should be several times larger than the CS frequency 
-#defineIR_DIMENSION_FACTOR 100 //A table is a dimension table 
if the # of references to it is an order of magnitude (IR_DIMENSION_FACTOR) 
compared to # of its tuples   
+#defineIR_DIMENSION_FACTOR 1000//A table is a dimension table 
if the # of references to it is an order of magnitude (IR_DIMENSION_FACTOR) 
compared to # of its tuples   
//
 #define MAX_ITERATION_NO   6   //Max number of iteration run

@@ -222,20 +222,6 @@ typedef struct SubCS {
charisdefault; 
 } SubCS; 
 
-/*
-typedef struct mergeCS {   // CS formed by merging CS id1 and CS id2   
-   oid*lstConsistsOf;  
-   int numConsistsOf; 
-   oid*lstProp; 
-   int numProp; 
-   int support;
-   int coverage;
-   charisRemove;
-
-} mergeCS; 
-
-*/
-
 #define INIT_NUM_SUBCS 4
 
 typedef struct SubCSSet{
___
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list


MonetDB: rdf - Compute average precision

2014-08-04 Thread Minh-Duc Pham
Changeset: 8407c931a2e1 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=8407c931a2e1
Modified Files:
monetdb5/extras/rdf/rdfparams.c
monetdb5/extras/rdf/rdfparams.h
monetdb5/extras/rdf/rdfschema.c
monetdb5/extras/rdf/rdfschema.h
Branch: rdf
Log Message:

Compute average precision


diffs (122 lines):

diff --git a/monetdb5/extras/rdf/rdfparams.c b/monetdb5/extras/rdf/rdfparams.c
--- a/monetdb5/extras/rdf/rdfparams.c
+++ b/monetdb5/extras/rdf/rdfparams.c
@@ -29,6 +29,8 @@
 
 int dimensionFactor; 
 float ontologySimThreshold; 
+int upperboundNumTables;
+float generalityThreshold; 
 
 void createDefaultParamsFile(void){

@@ -37,7 +39,8 @@ void createDefaultParamsFile(void){
paramFile = fopen("params.ini", "wt");

fprintf(paramFile, "dimensionFactor 3\n");
-   fprintf(paramFile, "ontologySimThreshold 0.8\n");
+   fprintf(paramFile, "ontologySimThreshold 0.75\n");
+   fprintf(paramFile, "upperboundNumTables 1000");
 
fclose(paramFile); 
 }
@@ -64,7 +67,21 @@ void readParamsInput(void){
ontologySimThreshold = atof(value);
printf("ontologySimThreshold = 
%f\n",ontologySimThreshold);
}
+   else if (strcmp(variable, "upperboundNumTables") == 0){
+   upperboundNumTables = atoi(value);
+   printf("upperboundNumTables = %d\n", 
upperboundNumTables);
+   }
}
}
 
+   
+   if (upperboundNumTables != 0){
+   generalityThreshold = (float) 1 / (float)upperboundNumTables; 
+   printf("generalityThreshold = %f\n",generalityThreshold);
+   }
+   else{ //default
+   generalityThreshold = 0.001; 
+   }
+
+
 }
diff --git a/monetdb5/extras/rdf/rdfparams.h b/monetdb5/extras/rdf/rdfparams.h
--- a/monetdb5/extras/rdf/rdfparams.h
+++ b/monetdb5/extras/rdf/rdfparams.h
@@ -33,6 +33,8 @@
 
 extern int dimensionFactor; 
 extern float ontologySimThreshold;
+extern int upperboundNumTables; 
+extern float generalityThreshold;
 
 rdf_export void
 createDefaultParamsFile(void);
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -3088,7 +3088,7 @@ void updateParentIdxAll(CSset *freqCSset
 
 #if USE_LABEL_FINDING_MAXCS
 /*
- *  * Return 1 if there is semantic evidence against merging the two CS's, 
this is the case iff the two CS's have a hierarchy and their common ancestor is 
too generic (support above IMPORTANCE_THRESHOLD).
+ *  * Return 1 if there is semantic evidence against merging the two CS's, 
this is the case iff the two CS's have a hierarchy and their common ancestor is 
too generic (support above generalityThreshold).
  *   */
 static
 char isEvidenceAgainstMerging(int freqId1, int freqId2, CSlabel* labels, 
OntoUsageNode *tree) {
@@ -3132,7 +3132,7 @@ char isEvidenceAgainstMerging(int freqId
level++;
}
 
-   if (tmpNode->percentage >= IMPORTANCE_THRESHOLD) {
+   if (tmpNode->percentage >= generalityThreshold) {
// have common ancestor but it is too generic --> there is 
semantic evidence against merging the two CS's
return 1;
} else {
@@ -4494,7 +4494,7 @@ char isSemanticSimilar(int freqId1, int 
*/

 
-   if (tmpNode->percentage < IMPORTANCE_THRESHOLD) {
+   if (tmpNode->percentage < generalityThreshold) {
//printf("Merge two CS's %d (Label: "BUNFMT") and %d 
(Label: "BUNFMT") using the common ancestor ("BUNFMT") at level %d (score: 
%f)\n",
//  freqId1, labels[freqId1].name, freqId2, 
labels[freqId2].name,tmpNode->uri, i,tmpNode->percentage);
oid classOid;
@@ -8972,6 +8972,7 @@ void computeMetricsQ(CSset *freqCSset){
int tblIdx = -1;
CS cs;  
int totalCov = 0; 
+   float   totalPrecision = 0.0; 
float   Q = 0.0;
int i;
int curNumMergeCS = countNumberMergeCS(freqCSset);
@@ -8990,11 +8991,14 @@ void computeMetricsQ(CSset *freqCSset){
weight[tblIdx] = (float) cs.coverage * ( 
fillRatio[tblIdx] + refRatio[tblIdx]); 
//weight[tblIdx] = (float) cs.coverage * ( 
fillRatio[tblIdx]);  //If do not consider reference ratio
totalCov += cs.coverage;
+   totalPrecision += fillRatio[tblIdx];

Q += weight[tblIdx];
}
}
printf("Performance metric Q = (weighting %f)/(totalCov %d * numTbl %d) 
\n", Q,totalCov, curNumMergeCS);
+   printf("Average precision = %f\n",(float)totalPrecision/curNumMergeCS);
+   //printf("Average precision = %f\n"

MonetDB: Jan2014 - allow decimals without '.'

2014-08-04 Thread Niels Nes
Changeset: 27d9a8384784 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=27d9a8384784
Modified Files:
sql/backends/monet5/sql_round_impl.h
Branch: Jan2014
Log Message:

allow decimals without '.'


diffs (12 lines):

diff --git a/sql/backends/monet5/sql_round_impl.h 
b/sql/backends/monet5/sql_round_impl.h
--- a/sql/backends/monet5/sql_round_impl.h
+++ b/sql/backends/monet5/sql_round_impl.h
@@ -300,7 +300,7 @@ str_2dec(TYPE *res, str *val, int *d, in
*res = NIL(TYPE);
return MAL_SUCCEED;
} else {
-   throw(SQL, STRING(TYPE), "\"%s\" is no decimal value 
(doesn't contain a '.')", *val);
+   scale = 0;
}
}
 
___
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list


MonetDB: Jan2014 - fixed bug 3521, ie use exp(double) (and other...

2014-08-04 Thread Niels Nes
Changeset: 8f619c2ff336 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=8f619c2ff336
Modified Files:
sql/common/sql_types.c
sql/test/BugTracker-2009/Tests/overflow.SF-2853458.stable.err
sql/test/BugTracker-2009/Tests/pow_operator.SF-2812721.stable.out
sql/test/BugTracker-2013/Tests/mitosis-floor.Bug-3330.stable.out
sql/test/bugs/Tests/floor-bug-sf-967066.stable.out
Branch: Jan2014
Log Message:

fixed bug 3521, ie use exp(double) (and other related math functions) when
the input type isn't a real (ie single precision floating point).


diffs (93 lines):

diff --git a/sql/common/sql_types.c b/sql/common/sql_types.c
--- a/sql/common/sql_types.c
+++ b/sql/common/sql_types.c
@@ -1441,11 +1441,7 @@ sqltypeinit( sql_allocator *sa)
sql_create_func(sa, "scale_up", "calc", "*", *u, *t, 
*t, SCALE_NONE);
}
 
-   /* initial assignment to t is on purpose like this, such that the
-* compiler (as of gcc-4.6) "sees" that we never go below the
-* initial pointer, and hence don't get a
-* error: array subscript is below array bounds */
-   for (t = floats + (dates - floats - 1); t >= floats; t--) {
+   for (t = floats; t < dates; t++) {
sql_create_func(sa, "power", "mmath", "pow", *t, *t, *t, 
SCALE_FIX);
sql_create_func(sa, "floor", "mmath", "floor", *t, NULL, *t, 
SCALE_FIX);
sql_create_func(sa, "ceil", "mmath", "ceil", *t, NULL, *t, 
SCALE_FIX);
diff --git a/sql/test/BugTracker-2009/Tests/overflow.SF-2853458.stable.err 
b/sql/test/BugTracker-2009/Tests/overflow.SF-2853458.stable.err
--- a/sql/test/BugTracker-2009/Tests/overflow.SF-2853458.stable.err
+++ b/sql/test/BugTracker-2009/Tests/overflow.SF-2853458.stable.err
@@ -67,12 +67,12 @@ stderr of test 'overflow.SF-2853458` in 
 # 22:26:13 >  mclient -lsql -umonetdb -Pmonetdb --host=alf --port=35561 
 # 22:26:13 >  
 
-MAPI  = (monetdb) /var/tmp/mtest-23209/.s.monetdb.33225
+MAPI  = (monetdb) /var/tmp/mtest-9389/.s.monetdb.34946
 QUERY = select cast(power(2,63) as bigint);
-ERROR = !overflow in conversion of 9.22337204e+18 to lng.
-MAPI  = (monetdb) /var/tmp/mtest-23209/.s.monetdb.33225
+ERROR = !overflow in conversion of 9.2233720368547758e+18 to lng.
+MAPI  = (monetdb) /var/tmp/mtest-9389/.s.monetdb.34946
 QUERY = select cast(power(2,64) as bigint);
-ERROR = !overflow in conversion of 1.84467441e+19 to lng.
+ERROR = !overflow in conversion of 1.8446744073709552e+19 to lng.
 
 # 22:26:13 >  
 # 22:26:13 >  Done.
diff --git a/sql/test/BugTracker-2009/Tests/pow_operator.SF-2812721.stable.out 
b/sql/test/BugTracker-2009/Tests/pow_operator.SF-2812721.stable.out
--- a/sql/test/BugTracker-2009/Tests/pow_operator.SF-2812721.stable.out
+++ b/sql/test/BugTracker-2009/Tests/pow_operator.SF-2812721.stable.out
@@ -27,17 +27,17 @@ Ready.
 #select power(2,2);
 % .L # table_name
 % power_single_value # name
-% real # type
-% 15 # length
+% double # type
+% 24 # length
 [ 4]
 #select power(2.8,2);
 % .L # table_name
 % power_single_value # name
-% real # type
-% 15 # length
-[ 7.8368   ]
+% double # type
+% 24 # length
+[ 7.84 ]
 
-# 09:09:06 >  
-# 09:09:06 >  "Done."
-# 09:09:06 >  
+# 19:02:22 >  
+# 19:02:22 >  "Done."
+# 19:02:22 >  
 
diff --git a/sql/test/BugTracker-2013/Tests/mitosis-floor.Bug-3330.stable.out 
b/sql/test/BugTracker-2013/Tests/mitosis-floor.Bug-3330.stable.out
--- a/sql/test/BugTracker-2013/Tests/mitosis-floor.Bug-3330.stable.out
+++ b/sql/test/BugTracker-2013/Tests/mitosis-floor.Bug-3330.stable.out
@@ -51,8 +51,8 @@ Ready.
 #;
 % sys.image,   sys.image,  sys.image,  sys.L1 # table_name
 % tilex,   tiley,  intensity,  count # name
-% real,real,   int,wrd # type
-% 15,  15, 1,  1 # length
+% double,  double, int,wrd # type
+% 24,  24, 1,  1 # length
 [ 0,   0,  3,  1   ]
 [ 0,   0,  6,  1   ]
 #drop table error;
diff --git a/sql/test/bugs/Tests/floor-bug-sf-967066.stable.out 
b/sql/test/bugs/Tests/floor-bug-sf-967066.stable.out
--- a/sql/test/bugs/Tests/floor-bug-sf-967066.stable.out
+++ b/sql/test/bugs/Tests/floor-bug-sf-967066.stable.out
@@ -27,8 +27,8 @@ Ready.
 #select floor(3.5);
 % .L # table_name
 % floor_single_value # name
-% real # type
-% 15 # length
+% double # type
+% 24 # length
 [ 3]
 
 # 12:51:03 >  
___
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list


MonetDB: rdf - Add params

2014-08-04 Thread Minh-Duc Pham
Changeset: d2816518acc4 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=d2816518acc4
Modified Files:
monetdb5/extras/rdf/rdflabels.c
monetdb5/extras/rdf/rdfparams.c
monetdb5/extras/rdf/rdfparams.h
monetdb5/extras/rdf/rdfschema.c
monetdb5/extras/rdf/rdfschema.h
Branch: rdf
Log Message:

Add params


diffs (106 lines):

diff --git a/monetdb5/extras/rdf/rdflabels.c b/monetdb5/extras/rdf/rdflabels.c
--- a/monetdb5/extras/rdf/rdflabels.c
+++ b/monetdb5/extras/rdf/rdflabels.c
@@ -23,6 +23,7 @@
 #include "rdfschema.h"
 #include "tokenizer.h"
 #include 
+#include "rdfparams.h"
 
 // list of known ontologies
 int ontologyCount = 74;
@@ -830,7 +831,7 @@ oid* getOntologyCandidates(oid** ontattr
for (k = 0; k < num; ++k) {
int found = 0;
//if (freqId == 161) printf("   TFIDF score at %d 
("BUNFMT") is: %f | Number of matched Prop %d \n",k, classStat[k].ontoClass, 
classStat[k].tfidfs,classStat[k].numMatchedProp);
-   if (classStat[k].tfidfs < SIM_TFIDF_THRESHOLD) break; 
// values not frequent enough (list is sorted by tfidfs)
+   if (classStat[k].tfidfs < simTfidfThreshold) break; // 
values not frequent enough (list is sorted by tfidfs)
for (j = 0; j < ontmetadataCount && (found == 0); ++j) {
oid muri = ontmetadata[0][j];
oid msuper = ontmetadata[1][j];
diff --git a/monetdb5/extras/rdf/rdfparams.c b/monetdb5/extras/rdf/rdfparams.c
--- a/monetdb5/extras/rdf/rdfparams.c
+++ b/monetdb5/extras/rdf/rdfparams.c
@@ -28,9 +28,9 @@
 #include 
 
 int dimensionFactor; 
-float ontologySimThreshold; 
 int upperboundNumTables;
 float generalityThreshold; 
+float simTfidfThreshold;
 
 void createDefaultParamsFile(void){

@@ -38,9 +38,9 @@ void createDefaultParamsFile(void){

paramFile = fopen("params.ini", "wt");

-   fprintf(paramFile, "dimensionFactor 3\n");
-   fprintf(paramFile, "ontologySimThreshold 0.75\n");
+   fprintf(paramFile, "dimensionFactor 1000\n");
fprintf(paramFile, "upperboundNumTables 1000");
+   fprintf(paramFile, "simTfidfThreshold 0.75");
 
fclose(paramFile); 
 }
@@ -63,14 +63,14 @@ void readParamsInput(void){
dimensionFactor = atoi(value);
printf("dimensionFactor = 
%d\n",dimensionFactor);
}
-   else if (strcmp(variable, "ontologySimThreshold") == 0){
-   ontologySimThreshold = atof(value);
-   printf("ontologySimThreshold = 
%f\n",ontologySimThreshold);
-   }
else if (strcmp(variable, "upperboundNumTables") == 0){
upperboundNumTables = atoi(value);
printf("upperboundNumTables = %d\n", 
upperboundNumTables);
}
+   else if (strcmp(variable, "simTfidfThreshold") == 0){
+   simTfidfThreshold = atof(value);
+   printf("simTfidfThreshold = %f\n", 
simTfidfThreshold);
+   }
}
}
 
diff --git a/monetdb5/extras/rdf/rdfparams.h b/monetdb5/extras/rdf/rdfparams.h
--- a/monetdb5/extras/rdf/rdfparams.h
+++ b/monetdb5/extras/rdf/rdfparams.h
@@ -32,9 +32,9 @@
 
 
 extern int dimensionFactor; 
-extern float ontologySimThreshold;
 extern int upperboundNumTables; 
 extern float generalityThreshold;
+extern float simTfidfThreshold;
 
 rdf_export void
 createDefaultParamsFile(void);
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -4683,9 +4683,9 @@ void mergeCSByS4(CSset *freqCSset, CSlab
  isSameLabel = 0;
  if ((*labels)[freqId1].name == 
(*labels)[freqId2].name) isSameLabel = 1;
 
- if (simscore > SIM_TFIDF_THRESHOLD && 
(existDiscriminatingProp || isSameLabel)){
+ if (simscore > simTfidfThreshold && 
(existDiscriminatingProp || isSameLabel)){
  #else 
- if (simscore > SIM_TFIDF_THRESHOLD && 
existDiscriminatingProp){ 
+ if (simscore > simTfidfThreshold && 
existDiscriminatingProp){   
  #endif
#else   
if (simscore > SIM_THRESHOLD) {
diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h
--- a/monetdb5/extras/rdf/rdfschema.h
+++ b/monetdb5/extras/rdf/rdfschema.h
@@ -234,7 +234,7 @@ typedef struct SubCSSet{
 
 #define INIT_NUM_CS 1000 
 #define SIM_THRESHOLD 0.6
-#define SIM_TFIDF_THRESHOLD 0.75
+//#define SIM_TFIDF_THRESHOLD 0.75