[rdflib-dev] some patches

Mikael Högqvist Fri, 29 Sep 2006 04:26:18 -0700

Hi,

I have a number of patches that I applied to the code I'm working
with. Here is a list:


unescaped.patch: checks the for non-allowed characters in the RDF/XML parser

order_by.patch: support for simple ORDER BY (without function calls)
and for LIMIT

python_2_5.patch: makes it possible to use rdflib with python 2.5. It
is a quick fix, and might not be the correct way to solve things (see
__cmp__ in Literal and URIRef)

regex.patch: changes a simple typo for regex with three args
(regex(?name, "^ali", "i"))

sparql_optionals.patch: already mapped BNodes did not match any
bindings in the _bind method in sparql.py. It made the Optional.py
test-case to work correctly.

xml_output.patch: omits the namespace for elements (<sparql:result>
becomes <result>) and include line breaks and indentation.

I did not have any problems running this with the unit-tests. Hope
some of it can be of any use...:)

Cheers,
Mikael

Index: rdflib/sparql/bison/SPARQLEvaluate.py
===================================================================
--- rdflib/sparql/bison/SPARQLEvaluate.py	(revision 866)
+++ rdflib/sparql/bison/SPARQLEvaluate.py	(working copy)
@@ -17,7 +17,7 @@
 from Util import ListRedirect
 from Operators import *
 from FunctionLibrary import *
-from SolutionModifier import DESCENDING_ORDER
+from SolutionModifier import ASCENDING_ORDER
 from Query import AskQuery, SelectQuery
 
 DEBUG = False
@@ -379,14 +379,23 @@
             orderBy     = []
             orderAsc    = []
             for orderCond in query.query.solutionModifier.orderClause:
-                expr = orderCond.expression.reduce()
-                assert isinstance(expr,Variable),"Support for ORDER BY with anything other than a variable is not supported: %s"%expr
-                orderBy.append(expr)
-                orderAsc.append(orderCond.order == DESCENDING_ORDER)
+                # is it a variable?
+                if isinstance(orderCond,Variable):
+                    orderBy.append(orderCond)
+                    orderAsc.append(ASCENDING_ORDER)
+                # is it another expression, only variables are supported
+                else:
+                    expr = orderCond.expression
+                    assert isinstance(expr,Variable),"Support for ORDER BY with anything other than a variable is not supported: %s"%expr
+                    orderBy.append(expr)                    
+                    orderAsc.append(orderCond.order == ASCENDING_ORDER)
+
+        limit = query.query.solutionModifier.limitClause and int(query.query.solutionModifier.limitClause) or None
+
         offset = query.query.solutionModifier.offsetClause and int(query.query.solutionModifier.offsetClause) or 0
         return result.select(query.query.variables,
                              query.query.distinct,
-                             query.query.solutionModifier.limitClause,
+                             limit,
                              orderBy,
                              orderAsc,
                              offset

Index: rdflib/Graph.py
===================================================================
--- rdflib/Graph.py	(revision 866)
+++ rdflib/Graph.py	(working copy)
@@ -1,3 +1,5 @@
+from __future__ import generators
+
 __doc__="""
 Instanciating Graphs with default store (IOMemory) and default identifier (a BNode):
 
@@ -144,7 +146,6 @@
 
 """
 
-from __future__ import generators
 from cStringIO import StringIO
 from rdflib import URIRef, BNode, Namespace, Literal, Variable
 from rdflib import RDF, RDFS
Index: rdflib/Literal.py
===================================================================
--- rdflib/Literal.py	(revision 866)
+++ rdflib/Literal.py	(working copy)
@@ -137,11 +137,17 @@
         s = super(Literal, self).__add__(val)
         return Literal(s, self.language, self.datatype)
 
+    def __cmp__(self, other):
+        result = cmp(unicode(self), unicode(other))
+        if result == 0:
+            return self.__eq__(other)
+        return result
+        
     def __eq__(self, other):
         if other==None:
             return False
         elif isinstance(other, Literal):
-            result = self.__cmp__(other)==False
+            result = cmp(unicode(self), unicode(other)) == False
             if result==True:
                 if self.datatype == None or self.datatype == '' :
                     if not(other.datatype == None or other.datatype == '') :
Index: rdflib/URIRef.py
===================================================================
--- rdflib/URIRef.py	(revision 866)
+++ rdflib/URIRef.py	(working copy)
@@ -14,7 +14,7 @@
 class URIRef(Identifier):
 
     __slots__ = ()
-
+    
     def __new__(cls, value, base=None):
         if base is not None:
             ends_in_hash = value.endswith("#")
@@ -60,6 +60,15 @@
         else:
             return self
 
+    def __cmp__(self, other):
+        return cmp(unicode(self), unicode(other))
+        
+    def __eq__(self, other):
+        if isinstance(other, Literal):
+            return other.__eq__(self)
+            
+        return unicode(self) == unicode(other)
+                        
     def __reduce__(self):
         return (URIRef, (unicode(self),))

Index: rdflib/sparql/bison/SPARQLEvaluate.py
===================================================================
--- rdflib/sparql/bison/SPARQLEvaluate.py	(revision 866)
+++ rdflib/sparql/bison/SPARQLEvaluate.py	(working copy)
@@ -149,7 +150,7 @@
     elif isinstance(expr,ParsedREGEXInvocation):
         return 'sparqlOperators.regex(%s,%s%s)%s'%(mapToOperator(expr.arg1,prolog,combinationArg),
                                                  mapToOperator(expr.arg2,prolog,combinationArg),
-                                                 expr.arg3 and ','+expr.arg3 or '',
+                                                 expr.arg3 and ',"'+expr.arg3 + '"' or '',
                                                  combinationInvokation)
     elif isinstance(expr,BuiltinFunctionCall):
         normBuiltInName = FUNCTION_NAMES[expr.name].lower()

Index: rdflib/sparql/sparql.py
===================================================================
--- rdflib/sparql/sparql.py	(revision 866)
+++ rdflib/sparql/sparql.py	(working copy)
@@ -1144,8 +1175,6 @@
                 return None
             else :
                 return self.bindings[r]
-        elif isinstance(r,(BNode)):
-            return self.bindings.get(r)            
         else :
             return r

Index: rdflib/syntax/parsers/RDFXMLHandler.py
===================================================================
--- rdflib/syntax/parsers/RDFXMLHandler.py	(revision 866)
+++ rdflib/syntax/parsers/RDFXMLHandler.py	(working copy)
@@ -38,6 +38,7 @@
 
 from xml.sax.saxutils import handler, quoteattr, escape
 from urlparse import urljoin, urldefrag
+import re
 
 RDFNS = RDF.RDFNS
 
@@ -206,11 +207,19 @@
     # element handler
     parent = property(get_parent)
 
+    def is_escaped_uri(self, uri):
+        esc_uri = re.compile(r'[\[\]\{}\|\^`<>" \\]')
+        find_esc = esc_uri.search(uri)
+        if find_esc != None:
+            self.error("URI character " + str(find_esc.end()) + " is not properly escaped")
+        return True
+        
     def absolutize(self, uri):
-        result = urljoin(self.current.base, uri, allow_fragments=1)
-        if uri and uri[-1]=="#" and result[-1]!="#":
-            result = "%s#" % result
-        return URIRef(result)
+        if self.is_escaped_uri(uri):
+            result = urljoin(self.current.base, uri, allow_fragments=1)
+            if uri and uri[-1]=="#" and result[-1]!="#":
+                result = "%s#" % result
+            return URIRef(result)
 
     def convert(self, name, qname, attrs):
         if name[0] is None:

Index: rdflib/sparql/QueryResult.py
===================================================================
--- rdflib/sparql/QueryResult.py	(revision 866)
+++ rdflib/sparql/QueryResult.py	(working copy)
@@ -11,14 +11,18 @@
         writer = XMLGenerator(output, encoding)
         writer.startDocument()
         writer.startPrefixMapping(u'sparql',SPARQL_XML_NAMESPACE)
-        writer.startPrefixMapping(u'xml'   ,u'http://www.w3.org/XML/1998/namespace')
-        writer.startElementNS((SPARQL_XML_NAMESPACE, u'sparql'), u'sparql', AttributesNSImpl({}, {}))
+        writer.startPrefixMapping(u'xml', u'http://www.w3.org/XML/1998/namespace')
+        writer.startElementNS((None, u'sparql'), u'', AttributesNSImpl({}, {}))
         self.writer = writer
         self._output = output
         self._encoding = encoding
+        self._endline = "\n"
+        self._tab = "\t"
+        self._output.write(self._endline)
 
     def write_header(self,allvarsL):
-        self.writer.startElementNS((SPARQL_XML_NAMESPACE, u'head'), u'head', AttributesNSImpl({}, {}))
+        self.writer.startElementNS((None, u'head'), u'head', AttributesNSImpl({}, {}))
+        self._output.write(self._endline)
         for i in xrange(0,len(allvarsL)) :
             attr_vals = {
                 (None, u'name'): unicode(allvarsL[i][1:]),
@@ -26,11 +30,14 @@
             attr_qnames = {
                 (None, u'name'): u'name',
                 }
-            self.writer.startElementNS((SPARQL_XML_NAMESPACE, u'variable'), 
+            self._output.write(self._tab)
+            self.writer.startElementNS((None, u'variable'), 
                                          u'variable', 
                                          AttributesNSImpl(attr_vals, attr_qnames))                
-            self.writer.endElementNS((SPARQL_XML_NAMESPACE, u'variable'), u'variable')
-        self.writer.endElementNS((SPARQL_XML_NAMESPACE, u'head'), u'head')
+            self.writer.endElementNS((None, u'variable'), u'variable')
+            self._output.write(self._endline)            
+        self.writer.endElementNS((None, u'head'), u'head')
+        self._output.write(self._endline)
         
     def write_results_header(self,orderBy,distinct):
         attr_vals = {
@@ -41,35 +48,51 @@
             (None, u'ordered')  : u'ordered',
             (None, u'distinct') : u'distinct'
             }
-        self.writer.startElementNS((SPARQL_XML_NAMESPACE, u'results'), 
+        self.writer.startElementNS((None, u'results'), 
                                      u'results', 
                                      AttributesNSImpl(attr_vals, attr_qnames))        
+        self._output.write(self._endline)
+        
+    def write_start_result(self):
+        self._output.write(self._tab)
+        self.writer.startElementNS((None, u'result'), u'result', AttributesNSImpl({}, {}))
+        self._output.write(self._endline)        
 
-    def write_result(self,name,val):
+    def write_end_result(self):
+        self._output.write(self._tab)    
+        self.writer.endElementNS((None, u'result'), u'result')
+        self._output.write(self._endline)
+    
+    def write_binding(self,name,val):
         if val:
-            self.writer.startElementNS((SPARQL_XML_NAMESPACE, u'result'), u'result', AttributesNSImpl({}, {}))
             attr_vals = {
                 (None, u'name')  : unicode(name),
                 }
             attr_qnames = {
                 (None, u'name')  : u'name',
                 }
-            self.writer.startElementNS((SPARQL_XML_NAMESPACE, u'binding'), 
+            self._output.write(self._tab*2)
+            self.writer.startElementNS((None, u'binding'), 
                                    u'binding', 
                                    AttributesNSImpl(attr_vals, attr_qnames))
+            self._output.write(self._endline)                                   
 
             if isinstance(val,URIRef) :
-                self.writer.startElementNS((SPARQL_XML_NAMESPACE, u'uri'), 
+                self._output.write(self._tab*3)
+                self.writer.startElementNS((None, u'uri'), 
                                        u'uri', 
                                        AttributesNSImpl(attr_vals, attr_qnames))
                 self.writer.characters(val)
-                self.writer.endElementNS((SPARQL_XML_NAMESPACE, u'uri'),u'uri')
+                self.writer.endElementNS((None, u'uri'),u'uri')
+                self._output.write(self._endline)                
             elif isinstance(val,BNode) :
-                self.writer.startElementNS((SPARQL_XML_NAMESPACE, u'bnode'), 
+                self._output.write(self._tab*3)
+                self.writer.startElementNS((None, u'bnode'), 
                                        u'bnode', 
                                        AttributesNSImpl(attr_vals, attr_qnames))
                 self.writer.characters(val)
-                self.writer.endElementNS((SPARQL_XML_NAMESPACE, u'bnode'),u'bnode')
+                self.writer.endElementNS((None, u'bnode'),u'bnode')
+                self._output.write(self._endline)                
             elif isinstance(val,Literal) :
                 attr_vals = {}
                 attr_qnames = {}
@@ -80,21 +103,26 @@
                     attr_vals[(None,u'datatype')] = val.datatype
                     attr_qnames[(None,u'datatype')] = u'datatype'
 
-                self.writer.startElementNS((SPARQL_XML_NAMESPACE, u'literal'), 
+                self._output.write(self._tab*3)
+                self.writer.startElementNS((None, u'literal'), 
                                        u'literal', 
                                        AttributesNSImpl(attr_vals, attr_qnames))
                 self.writer.characters(val)
-                self.writer.endElementNS((SPARQL_XML_NAMESPACE, u'literal'),u'literal')
+                self.writer.endElementNS((None, u'literal'),u'literal')
+                self._output.write(self._endline)                
                 
             else:
                 raise Exception("Unsupported RDF term: %s"%val)
 
-            self.writer.endElementNS((SPARQL_XML_NAMESPACE, u'binding'),u'binding')        
-            self.writer.endElementNS((SPARQL_XML_NAMESPACE, u'result'), u'result')
+            self._output.write(self._tab*2)
+            self.writer.endElementNS((None, u'binding'),u'binding')
+            self._output.write(self._endline)
 
     def close(self):
-        self.writer.endElementNS((SPARQL_XML_NAMESPACE, u'results'), u'results')
-        self.writer.endElementNS((SPARQL_XML_NAMESPACE, u'sparql'), u'sparql')
+        self.writer.endElementNS((None, u'results'), u'results')
+        self._output.write(self._endline)        
+        self.writer.endElementNS((None, u'sparql'), u'sparql')
+        self._output.write(self._endline)        
         self.writer.endDocument()
 
 def retToJSON(val) :
@@ -219,16 +247,19 @@
                writer.write_results_header(self.orderBy,self.distinct)
                for i in xrange(0,len(self.selected)) :
                    hit = self.selected[i]
+                   writer.write_start_result()
                    if len(self.selectionF) == 0 :
                        for j in xrange(0,len(allvarsL)) :
-                           writer.write_result(allvarsL[j][1:],hit[j])
+                           writer.write_binding(allvarsL[j][1:],hit[j])
                    elif len(self.selectionF) == 1 :
-                       writer.write_result(self.selectionF[0][1:],hit)
+                       writer.write_binding(self.selectionF[0][1:],hit)
                    else:
+                        
                        for j in xrange(0,len(self.selectionF)) :
-                           writer.write_result(self.selectionF[j][1:],hit[j])
+                           writer.write_binding(self.selectionF[j][1:],hit[j])
+                   writer.write_end_result()
                writer.close()
                return out.getvalue()
            return retval
         else :
-           raise Exception("Result format not implemented: %s"%format)
\ No newline at end of file
+           raise Exception("Result format not implemented: %s"%format)

from rdflib import ConjunctiveGraph, plugin
from rdflib.store import Store
from StringIO import StringIO
import unittest

test_data = """
@prefix foaf:       <http://xmlns.com/foaf/0.1/> .
@prefix rdf:        <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .

_:a  rdf:type        foaf:Person .
_:a  foaf:name       "Alice" .
_:a  foaf:mbox       <mailto:[EMAIL PROTECTED]> .
_:a  foaf:mbox       <mailto:[EMAIL PROTECTED]> .

_:b  rdf:type        foaf:Person .
_:b  foaf:name       "Bob" .
"""

class OptionalTest(unittest.TestCase):
    
    def _query(self, query):
        graph = ConjunctiveGraph(plugin.get('IOMemory',Store)())
        graph.parse(StringIO(test_data), format="n3")
        return graph.query(query)
        
    def testOptional(self):
        correct = set([('Alice', 'mailto:[EMAIL PROTECTED]'), ('Alice', 'mailto:[EMAIL PROTECTED]'), ('Bob', 'None')])
        test_query = """
            PREFIX foaf: <http://xmlns.com/foaf/0.1/>
            SELECT ?name ?mbox
            WHERE  { ?x foaf:name  ?name .
                 OPTIONAL { ?x  foaf:mbox  ?mbox }
            }
            """        

        res = self._query(test_query)
#        print "\n", [(str(n), str(m)) for (n,m) in res]
        self.assertEqual(set([(str(name), str(mail)) for (name, mail) in res]), correct)
        
def main():
    unittest.main()
        
if __name__ == '__main__':
    main()

_______________________________________________
Dev mailing list
[email protected]
http://rdflib.net/mailman/listinfo/dev

[rdflib-dev] some patches

Reply via email to