Changeset: 0646f409ff77 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=0646f409ff77
Modified Files:
monetdb5/extras/rdf/rdf_shredder.c
monetdb5/extras/rdf/rdfparser.h
monetdb5/extras/rdf/rdfschema.c
monetdb5/extras/rdf/rdftypes.c
monetdb5/extras/rdf/rdftypes.h
Branch: rdf
Log Message:
Add function for encoding/decoding numeric value in oid
diffs (truncated from 563 to 300 lines):
diff --git a/monetdb5/extras/rdf/rdf_shredder.c
b/monetdb5/extras/rdf/rdf_shredder.c
--- a/monetdb5/extras/rdf/rdf_shredder.c
+++ b/monetdb5/extras/rdf/rdf_shredder.c
@@ -177,6 +177,7 @@ rdf_BUNappend_unq_ForObj(parserData* pda
}
+
/*
* Get the specific type of the object value in an RDF triple
* The URI object can be recoginized by raptor parser.
@@ -186,7 +187,7 @@ rdf_BUNappend_unq_ForObj(parserData* pda
*/
static ObjectType
-getObjectType(unsigned char* objStr, BUN *realNumValue){
+getObjectType_and_Value(unsigned char* objStr, ValPtr vrPtrRealValue){
ObjectType obType = STRING;
unsigned char* endpart;
char* valuepart;
@@ -194,7 +195,9 @@ getObjectType(unsigned char* objStr, BUN
int len = 0;
int subLen = 0;
- *realNumValue = BUN_NONE;
+ double realDbl;
+ int realInt;
+
len = strlen((str)objStr);
if (len > 20){
@@ -206,13 +209,15 @@ getObjectType(unsigned char* objStr, BUN
/* printf("%s: DateTime \n", objStr); */
}
else if ((pos = strstr((str) endpart, "XMLSchema#int>")) !=
NULL || (pos = strstr((str)endpart, "XMLSchema#integer>")) != NULL){
+ //TODO: Consider nonNegativeInteger
obType = INTEGER;
subLen = (int) (pos - (str)objStr - 28);
valuepart = substring((char*)objStr, 2 , subLen);
/* printf("%s: Integer \n. Length of value %d ==> value
%s \n", objStr, (int) (pos - (str)objStr - 28), valuepart); */
if (isInt(valuepart, subLen) == 1){ /* Check
whether the real value is an integer */
- *realNumValue = (BUN) atoi(valuepart);
- /* printf("Real value is: " BUNFMT " \n",
*realNumValue); */
+ realInt = (BUN) atoi(valuepart);
+ VALset(vrPtrRealValue,TYPE_int, &realInt);
+ printf("Real int value is: %d \n",
vrPtrRealValue->val.ival);
}
else
obType = STRING;
@@ -223,8 +228,16 @@ getObjectType(unsigned char* objStr, BUN
else if ((pos = strstr((str) endpart, "XMLSchema#float>")) !=
NULL
|| (pos = strstr((str) endpart,
"XMLSchema#double>")) != NULL
|| (pos = strstr((str) endpart,
"XMLSchema#decimal>")) != NULL){
- obType = FLOAT;
- /* printf("%s: Float \n", objStr); */
+ obType = DOUBLE;
+ subLen = (int) (pos - (str)objStr - 28);
+ valuepart = substring((char*)objStr, 2 , subLen);
+ if (isDouble(valuepart, subLen) == 1){
+ realDbl = atof(valuepart);
+ VALset(vrPtrRealValue,TYPE_dbl, &realDbl);
+ printf("Real double value is: %.10f \n",
vrPtrRealValue->val.dval);
+ }
+ else
+ obType = STRING;
}
else {
obType = STRING;
@@ -280,7 +293,7 @@ tripleHandler(void* user_data, const rap
#endif
parserData *pdata = ((parserData *) user_data);
BUN bun = BUN_NONE;
- BUN realNumValue = BUN_NONE;
+ ValRecord vrRealValue;
BAT **graph = pdata->graph;
@@ -389,11 +402,30 @@ tripleHandler(void* user_data, const rap
unsigned char* objStr;
ObjectType objType = STRING;
objStr = raptor_term_to_string(triple->object);
- objType = getObjectType(objStr, &realNumValue);
+ objType = getObjectType_and_Value(objStr, &vrRealValue);
- rdf_BUNappend_unq_ForObj(pdata, graph[MAP_LEX],
(str)objStr, objType, &bun);
+ if (objType == STRING){
+ rdf_BUNappend_unq_ForObj(pdata, graph[MAP_LEX],
(str)objStr, objType, &bun);
+ }
+ else{ //For handling dateTime, Integer, Float values
+ encodeValueInOid(&vrRealValue, objType, &bun);
+ }
+
rdf_BUNappend(pdata, graph[O_sort], &bun);
+ VALclear(&vrRealValue);
+
+ /*
+ if (objType == INTEGER){
+ decodeValueFromOid(bun, objType, &vrRealValue);
+ printf("Decoded integer value is: %d \n",
vrRealValue.val.ival);
+ }
+ if (objType == DOUBLE){
+ decodeValueFromOid(bun, objType, &vrRealValue);
+ printf("Decoded double value is: %.10f \n",
vrRealValue.val.dval);
+ }
+ */
+
//printf("Object string is %s --> object type is %d
(oid = " BUNFMT " \n",objStr,objType, bun);
bun = BUN_NONE;
diff --git a/monetdb5/extras/rdf/rdfparser.h b/monetdb5/extras/rdf/rdfparser.h
--- a/monetdb5/extras/rdf/rdfparser.h
+++ b/monetdb5/extras/rdf/rdfparser.h
@@ -29,8 +29,8 @@
#include <raptor2.h>
-#define CHECK_NUM_DBPONTOLOGY 1 /* Check how many rdf triples use
dbpontology */
-#define BUILD_ONTOLOGIES_HISTO 1 /* Check how much percentage each
ontology has in the dataset */
+#define CHECK_NUM_DBPONTOLOGY 0 /* Check how many rdf triples use
dbpontology */
+#define BUILD_ONTOLOGIES_HISTO 0 /* Check how much percentage each
ontology has in the dataset */
typedef struct parserData {
/**PROPERTIES */
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -6622,11 +6622,12 @@ char getObjTypeFromBATtype(int battype){
case TYPE_int:
return INTEGER;
break;
- case TYPE_flt:
- return FLOAT;
+ case TYPE_dbl:
+ return DOUBLE;
break;
default:
return 100;
+ break;
}
}
@@ -6634,7 +6635,7 @@ static
int getObjValueFromMVBat(ValPtr returnValue, ValPtr castedValue, BUN pos,
ObjectType objType, BAT *tmpBat, BAT *lmap, BAT *rmap){
str tmpStr;
str inputStr;
- float *realFloat;
+ double *realDbl;
int *realInt;
oid *tmpUriOid;
oid realUriOid = BUN_NONE;
@@ -6699,11 +6700,11 @@ int getObjValueFromMVBat(ValPtr returnVa
return 0;
}
break;
- case FLOAT:
+ case DOUBLE:
//printf("Full object value: %s \n",objStr);
- realFloat = (float *)BUNtail(tmpi, pos);
- if (*realFloat != flt_nil){
- VALset(returnValue, TYPE_flt, realFloat);
+ realDbl = (double *)BUNtail(tmpi, pos);
+ if (*realDbl != dbl_nil){
+ VALset(returnValue, TYPE_dbl, realDbl);
if (rdfcast(objType, STRING, returnValue,
castedValue) != 1){
printf("Everything should be able to
cast to String \n");
}
@@ -6740,6 +6741,8 @@ int getObjValueFromMVBat(ValPtr returnVa
else{
return 0;
}
+
+ break;
}
@@ -7701,7 +7704,7 @@ str printFullSampleData(CSSampleExtend *
CSSampleExtend sample;
str objStr;
oid *objOid = NULL;
- float *objFlt = NULL;
+ double *objDbl = NULL;
int *objInt = NULL;
str canStr;
char isTitle = 0;
@@ -7996,15 +7999,15 @@ str printFullSampleData(CSSampleExtend *
GDKfree(objStr);
}
}
- else if (tmpBat->ttype == TYPE_flt){
- objFlt = (float *) BUNtail(tmpi, k);
- if (*objFlt == flt_nil){
+ else if (tmpBat->ttype == TYPE_dbl){
+ objDbl = (double *) BUNtail(tmpi, k);
+ if (*objDbl == dbl_nil){
fprintf(fout,"|NULL");
fprintf(foutis,"|NULL");
}
else{
- fprintf(fout,"|%f", *objFlt);
- fprintf(foutis,"|%f", *objFlt);
+ fprintf(fout,"|%f", *objDbl);
+ fprintf(foutis,"|%f", *objDbl);
}
}
@@ -9620,7 +9623,7 @@ void initCStables(CStableStat* cstablest
mapObjBATtypes[URI] = TYPE_oid;
mapObjBATtypes[DATETIME] = TYPE_str;
mapObjBATtypes[INTEGER] = TYPE_int;
- mapObjBATtypes[FLOAT] = TYPE_flt;
+ mapObjBATtypes[DOUBLE] = TYPE_dbl;
mapObjBATtypes[STRING] = TYPE_str;
mapObjBATtypes[BLANKNODE] = TYPE_oid;
mapObjBATtypes[MULTIVALUES] = TYPE_oid;
@@ -9964,7 +9967,7 @@ void getRealValue(ValPtr returnValue, oi
str tmpStr;
BUN bun;
BUN maxObjectURIOid = ((oid)1 << (sizeof(BUN)*8 - NBITS_FOR_CSID -
1)) - 1; //Base on getTblIdxFromS
- float realFloat;
+ float realDbl;
int realInt;
oid realUri;
@@ -10006,17 +10009,19 @@ void getRealValue(ValPtr returnValue, oi
realInt = getIntFromRDFString(objStr);
VALset(returnValue,TYPE_int, &realInt);
break;
- case FLOAT:
+ case DOUBLE:
//printf("Full object value: %s \n",objStr);
- realFloat = getFloatFromRDFString(objStr);
- VALset(returnValue,TYPE_flt, &realFloat);
+ realDbl = getDoubleFromRDFString(objStr);
+ VALset(returnValue,TYPE_dbl, &realDbl);
break;
default: //URI or BLANK NODE
realUri = objOid;
VALset(returnValue,TYPE_oid, &realUri);
- }
-
-}
+ break;
+ }
+
+}
+
static
void updatePropTypeForRemovedTriple(CSPropTypes *csPropTypes, int*
tmpTblIdxPropIdxMap, int tblIdx, oid *subjCSMap, int* csTblIdxMapping, oid sbt,
oid pbt, oid *lastRemovedProp, oid* lastRemovedSubj, char isMultiToSingleProp){
int tmptblIdx, tmpPropIdx;
diff --git a/monetdb5/extras/rdf/rdftypes.c b/monetdb5/extras/rdf/rdftypes.c
--- a/monetdb5/extras/rdf/rdftypes.c
+++ b/monetdb5/extras/rdf/rdftypes.c
@@ -61,12 +61,27 @@ char* substring(char *string, int positi
return pointer;
}
+
+/*
+ This function returns 1 if architecture
+ is little endian, 0 in case of big endian.
+*/
+
+int is_little_endian(void)
+{
+ unsigned int x = 1;
+ char *c = (char*) &x;
+ return (int)*c;
+}
+
char isInt(char *input, int len){
int i = 0;
//int len = strlen(input);
//printf("... Checking value %s with len %d \n", input, len);
- if (input[0] != '-' && isdigit(input[0]) == 0)
+ if (len > 11) return 0;
+
+ if (input[0] != '-' && input[0] != '+' && isdigit(input[0]) == 0)
return 0;
for(i = 1; i < len; i++)
@@ -83,6 +98,37 @@ char isInt(char *input, int len){
return 0;
}
+
+char isDouble(char *input, int len){
+
+ int i = 0;
+ int numE = 0; //number of E's
+ //int len = strlen(input);
+ //printf("... Checking value %s with len %d \n", input, len);
+ if (input[0] != '-' && input[0] != '+' && isdigit(input[0]) == 0)
+ return 0;
+
+ for(i = 1; i < len; i++)
+ {
+ if(isdigit(input[i]) == 0 && input[i] != '.'
+ && input[i] != 'e' && input[i] != 'E'
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list