The escaping is way off in NTSerializer.py. Here's an attempt at a
better one (in python). What it really needs is the w3 test suite, which
I have not tried yet.
==== rdflib/syntax/serializers/NTSerializer.py ====
from rdflib.syntax.serializers import Serializer
from rdflib import Literal, URIRef
def ntNode(node):
# see http://www.w3.org/TR/rdf-testcases/#ntriples
if not isinstance(node, (Literal, URIRef)):
return node.n3()
# I tried using raptor via redland, but redland couldn't make
# literals from unicode! So here's a pure-python, not-well-tested
# nt serializer. w3 has a test suite that should be incorporated.
# http://www.w3.org/TR/rdf-testcases/#ntrip_strings
chars = list(node)
for i, char in enumerate(chars):
o = ord(char)
if o in [0x09, 0x0a, 0x0d]:
chars[i] = char.encode('unicode_escape')
elif o <= 0x1f:
chars[i] = "\\u%04X" % o
elif o == 0x22:
chars[i] = '\\"'
elif char == '\\':
chars[i] = '\\\\'
elif o > 0xff:
# unicode_escape seems to pick \u or \U correctly
chars[i] = char.encode('unicode_escape')
elif o > 0x7e:
# unicode_escape would pick \x## for these
chars[i] = "\\u%04X" % o
escaped = ''.join(chars)
if isinstance(node, Literal):
ret = '"%s"' % escaped
if node.language:
if node.datatype:
raise ValueError("NT can't represent a literal with both "
"language and datatype")
ret = ret + "@" + node.language
elif node.datatype:
#
ret = ret + "^^<%s>" % node.datatype
elif isinstance(node, URIRef):
ret = '<%s>' % escaped
return ret
class NTSerializer(Serializer):
def __init__(self, store):
"""
I serialize RDF graphs in NTriples format.
"""
super(NTSerializer, self).__init__(store)
def serialize(self, stream, base=None, encoding=None):
if base is not None:
print "TODO: NTSerializer does not support base"
encoding = self.encoding
write = lambda triple: stream.write(
(ntNode(triple[0]) + u" " +
ntNode(triple[1]) + u" " +
ntNode(triple[2]) + u".\n"))
map(write, self.store)
_______________________________________________
Dev mailing list
[email protected]
http://rdflib.net/mailman/listinfo/dev