Skip to content

Commit 1723c5e

Browse files
committed
Always encode U+FFFE and U+FFFF in N-Triples/Quads.
1 parent 55fbd63 commit 1723c5e

File tree

2 files changed

+43
-30
lines changed

2 files changed

+43
-30
lines changed

lib/rdf/ntriples/writer.rb

Lines changed: 31 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,10 @@ def self.escape(string, encoding = nil)
7272
buffer << case u.ord
7373
when (0x00..0x7F)
7474
escape_ascii(u, encoding)
75+
when (0xFFFE..0xFFFF)
76+
# NOT A CHARACTER
77+
# @see https://corp.unicode.org/~asmus/proposed_faq/private_use.html#history1
78+
escape_uchar(u)
7579
else
7680
u
7781
end
@@ -100,12 +104,10 @@ def self.escape(string, encoding = nil)
100104
# @see http://www.w3.org/TR/rdf-testcases/#ntrip_strings
101105
def self.escape_unicode(u, encoding)
102106
case (u = u.ord)
103-
when (0x00..0x7F) # ASCII 7-bit
107+
when (0x00..0x7F) # ECHAR
104108
escape_ascii(u, encoding)
105-
when (0x80..0xFFFF) # Unicode BMP
106-
escape_utf16(u)
107-
when (0x10000..0x10FFFF) # Unicode
108-
escape_utf32(u)
109+
when (0x80...0x10FFFF) # UCHAR
110+
escape_uchar(u)
109111
else
110112
raise ArgumentError.new("expected a Unicode codepoint in (0x00..0x10FFFF), but got 0x#{u.to_s(16)}")
111113
end
@@ -132,18 +134,34 @@ def self.escape_ascii(u, encoding)
132134
when (0x0D) then "\\r"
133135
when (0x22) then "\\\""
134136
when (0x5C) then "\\\\"
135-
when (0x00..0x1F) then escape_utf16(u)
136-
when (0x7F) then escape_utf16(u)
137+
when (0x00..0x1F) then escape_uchar(u)
138+
when (0x7F) then escape_uchar(u) # DEL
137139
when (0x20..0x7E) then u.chr
138140
else
139141
raise ArgumentError.new("expected an ASCII character in (0x00..0x7F), but got 0x#{u.to_s(16)}")
140142
end
141143
end
142144

145+
##
146+
# @param [Integer, #ord] u
147+
# @return [String]
148+
# @see https://www.w3.org/TR/rdf12-concepts/#rdf-stringshttps://www.w3.org/TR/rdf12-concepts/#rdf-strings
149+
# @since 3.4.4
150+
def self.escape_uchar(u)
151+
#require 'byebug'; byebug
152+
case u.ord
153+
when (0x00..0xFFFF)
154+
sprintf("\\u%04X", u.ord)
155+
else
156+
sprintf("\\U%08X", u.ord)
157+
end
158+
end
159+
143160
##
144161
# @param [Integer, #ord] u
145162
# @return [String]
146163
# @see http://www.w3.org/TR/rdf-testcases/#ntrip_strings
164+
# @deprecated use escape_uchar, this name is non-intuitive
147165
def self.escape_utf16(u)
148166
sprintf("\\u%04X", u.ord)
149167
end
@@ -152,6 +170,7 @@ def self.escape_utf16(u)
152170
# @param [Integer, #ord] u
153171
# @return [String]
154172
# @see http://www.w3.org/TR/rdf-testcases/#ntrip_strings
173+
# @deprecated use escape_uchar, this name is non-intuitive
155174
def self.escape_utf32(u)
156175
sprintf("\\U%08X", u.ord)
157176
end
@@ -283,9 +302,9 @@ def format_uri(uri, **options)
283302
buffer.set_encoding(encoding)
284303
string.each_char do |u|
285304
buffer << case u.ord
286-
when (0x00..0x20) then self.class.escape_utf16(u)
305+
when (0x00..0x20) then self.class.escape_uchar(u)
287306
when 0x22, 0x3c, 0x3e, 0x5c, 0x5e, 0x60, 0x7b, 0x7c, 0x7d # "<>\^`{|}
288-
self.class.escape_utf16(u)
307+
self.class.escape_uchar(u)
289308
else u
290309
end
291310
end
@@ -297,11 +316,10 @@ def format_uri(uri, **options)
297316
buffer.set_encoding(Encoding::ASCII)
298317
string.each_byte do |u|
299318
buffer << case u
300-
when (0x00..0x20) then self.class.escape_utf16(u)
319+
when (0x00..0x20) then self.class.escape_uchar(u)
301320
when 0x22, 0x3c, 0x3e, 0x5c, 0x5e, 0x60, 0x7b, 0x7c, 0x7d # "<>\^`{|}
302-
self.class.escape_utf16(u)
303-
when (0x80..0xFFFF) then self.class.escape_utf16(u)
304-
when (0x10000..0x10FFFF) then self.class.escape_utf32(u)
321+
self.class.escape_uchar(u)
322+
when (0x80..0x10FFFF) then self.class.escape_uchar(u)
305323
else u
306324
end
307325
end

spec/ntriples_spec.rb

Lines changed: 12 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1273,27 +1273,22 @@
12731273
context "c14n" do
12741274
shared_examples "c14n" do |statement, result|
12751275
context "given #{statement}" do
1276-
subject {RDF::NTriples::Writer.buffer(validate: false, canonicalize: true, logger: logger) {|w| w << statement}}
1277-
if result
1278-
specify {expect(subject).to eq "#{result}\n"}
1279-
else
1280-
specify {expect {subject}.to raise_error(RDF::WriterError)}
1276+
it "produces #{result}" do
1277+
g = parse(statement, canonicalize: true)
1278+
expect(g.count).to eq 1
1279+
c14n = RDF::NTriples::Writer.buffer(validate: false, canonicalize: true, logger: logger) {|w| w << g}
1280+
expect(c14n).to eq "#{result}\n"
12811281
end
12821282
end
1283-
end
1283+
end
12841284

12851285
{
1286-
RDF::Statement(RDF::URI("https://rubygems.org/gems/rdf"), RDF::URI("http://purl.org/dc/terms/creator").dup, RDF::URI("http://ar.to/#self")) =>
1287-
RDF::Statement(RDF::URI("https://rubygems.org/gems/rdf"), RDF::URI("http://purl.org/dc/terms/creator").dup, RDF::URI("http://ar.to/#self")),
1288-
RDF::Statement(RDF::URI("https://rubygems.org/gems/rdf"), RDF::URI("http://purl.org/dc/terms/creator").dup, RDF::Literal("literal")) =>
1289-
RDF::Statement(RDF::URI("https://rubygems.org/gems/rdf"), RDF::URI("http://purl.org/dc/terms/creator").dup, RDF::Literal("literal")),
1290-
RDF::Statement(RDF::URI('file:///path/to/file with spaces.txt'), RDF::URI("http://purl.org/dc/terms/creator").dup, RDF::URI("http://ar.to/#self")) =>
1291-
RDF::Statement(RDF::URI('file:///path/to/file%20with%20spaces.txt'), RDF::URI("http://purl.org/dc/terms/creator").dup, RDF::URI("http://ar.to/#self")),
1292-
RDF::Statement(nil, RDF::URI("http://purl.org/dc/terms/creator").dup, RDF::URI("http://ar.to/#self")) => nil,
1293-
RDF::Statement(RDF::URI("https://rubygems.org/gems/rdf"), nil, RDF::URI("http://ar.to/#self")) => nil,
1294-
RDF::Statement(RDF::URI("https://rubygems.org/gems/rdf"), RDF::URI("http://purl.org/dc/terms/creator").dup, nil) => nil,
1295-
RDF::Statement(RDF::Literal("literal"), RDF::URI("http://purl.org/dc/terms/creator").dup, RDF::URI("http://ar.to/#self")) => nil,
1296-
RDF::Statement(RDF::URI("https://rubygems.org/gems/rdf"), RDF::Literal("literal"), RDF::URI("http://ar.to/#self")) => nil,
1286+
%(<https://rubygems.org/gems/rdf> <http://purl.org/dc/terms/creator> <http://ar.to/#self> .) =>
1287+
%(<https://rubygems.org/gems/rdf> <http://purl.org/dc/terms/creator> <http://ar.to/#self> .),
1288+
%(<https://rubygems.org/gems/rdf> <http://purl.org/dc/terms/creator> "literal" .) =>
1289+
%(<https://rubygems.org/gems/rdf> <http://purl.org/dc/terms/creator> "literal" .),
1290+
%(<http://a.example/s> <http://a.example/p> "\\U00000000\\U00000001\\U00000002\\U00000003\\U00000004\\U00000005\\U00000006\\U00000007\\U0000000B\\U0000000E\\U0000000F\\U00000010\\U00000011\\U00000012\\U00000013\\U00000014\\U00000015\\U00000016\\U00000017\\U00000018\\U00000019\\U0000001A\\U0000001B\\U0000001C\\U0000001D\\U0000001E\\U0000001F\\U0000007F\\U0000FFFE\\U0000FFFF" .) =>
1291+
%(<http://a.example/s> <http://a.example/p> "\\u0000\\u0001\\u0002\\u0003\\u0004\\u0005\\u0006\\u0007\\u000B\\u000E\\u000F\\u0010\\u0011\\u0012\\u0013\\u0014\\u0015\\u0016\\u0017\\u0018\\u0019\\u001A\\u001B\\u001C\\u001D\\u001E\\u001F\\u007F\\uFFFE\\uFFFF" .),
12971292
}.each do |st, result|
12981293
include_examples "c14n", st, result
12991294
end

0 commit comments

Comments
 (0)