Skip to content

Commit 1db066a

Browse files
committed
Add more special handling for URI File scheme in general, and some windows-specific code to disambiguate the drive letter from a scheme.
Fixes #443.
1 parent e478a7c commit 1db066a

File tree

3 files changed

+67
-7
lines changed

3 files changed

+67
-7
lines changed

lib/rdf/model/uri.rb

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,10 @@ class URI
8080
# scheme, authority, path, query, fragment
8181
IRI_PARTS = /^(?:([^:\/?#]+):)?(?:\/\/([^\/?#]*))?([^?#]*)(\?[^#]*)?(#.*)?$/.freeze
8282

83+
# Special version for file-scheme on Windows (path SHOULD begin with /, but may not)
84+
# scheme, authority, path, query, fragment
85+
FILE_PARTS = /^file:(?:\/\/(#{IHOST}))?(\/?[^?#]*)(\?[^#]*)?(#.*)?$/.freeze
86+
8387
# Remove dot expressions regular expressions
8488
RDS_2A = /^\.?\.\/(.*)$/.freeze
8589
RDS_2B1 = /^\/\.$/.freeze
@@ -851,8 +855,7 @@ def inspect
851855
# lexical representation of URI, either absolute or relative
852856
# @return [String]
853857
def value
854-
return @value if @value
855-
@value = [
858+
@value ||= [
856859
("#{scheme}:" if absolute?),
857860
("//#{authority}" if authority),
858861
path,
@@ -883,17 +886,35 @@ def object
883886
#
884887
# @param [String, to_s] value
885888
# @return [Object{Symbol => String}]
889+
# @see https://datatracker.ietf.org/doc/html/rfc8089
886890
def parse(value)
887-
value = value.to_s.dup.force_encoding(Encoding::ASCII_8BIT)
891+
value = value.to_s.dup.force_encoding(Encoding::UTF_8) unless value && value.encoding == Encoding::UTF_8
888892
parts = {}
889-
if matchdata = IRI_PARTS.match(value)
893+
if matchdata = FILE_PARTS.match(value)
894+
# A file-based URI is always in the folloring form:
895+
# * file:/path - absolute path, no host name
896+
# * file:///path - absolute path, empty host name
897+
# * file://hostname/path - absolute path with authority.
898+
# * file://path – is invalid, but treated as file:///path
899+
scheme = 'file'
900+
authority, path, query, fragment = matchdata[1..-1]
901+
if authority && authority.match?(/^[A-Za-z]$/) && Gem.win_platform?
902+
# In this case, if the authority is a drive letter and part of the path
903+
authority, path = nil, "#{authority}#{path}"
904+
end
905+
# We accept paths that aren't absolute, but coerce them to be absolute
906+
path = "/#{path}" unless path.start_with?('/')
907+
elsif matchdata = IRI_PARTS.match(value)
890908
scheme, authority, path, query, fragment = matchdata[1..-1]
909+
authority = nil if authority && authority.empty?
891910

892-
if Gem.win_platform? && scheme && !authority && scheme.match?(/^[a-zA-Z]$/)
893-
# A drive letter, not a scheme
894-
scheme, path = nil, "#{scheme}:#{path}"
911+
if scheme && scheme.match?(/^[A-Za-z]$/) && Gem.win_platform?
912+
# On Windows treat D:/foo/bar as a path, not a scheme
913+
scheme, authority, path = 'file', nil, "/#{scheme}:#{path}"
895914
end
915+
end
896916

917+
if matchdata
897918
userinfo, hostport = authority.to_s.split('@', 2)
898919
hostport, userinfo = userinfo, nil unless hostport
899920
user, password = userinfo.to_s.split(':', 2)

rdf.gemspec

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ Gem::Specification.new do |gem|
3535
gem.add_runtime_dependency 'logger', '~> 1.5'
3636
gem.add_runtime_dependency 'ostruct', '~> 0.6'
3737
gem.add_development_dependency 'base64', '~> 0.2'
38+
gem.add_development_dependency 'fiddle', '~> 1.1'
3839
gem.add_development_dependency 'rdf-spec', '~> 3.3'
3940
gem.add_development_dependency 'rdf-turtle', '~> 3.3'
4041
gem.add_development_dependency 'rdf-vocab', '~> 3.3'

spec/model_uri_spec.rb

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -524,6 +524,44 @@
524524
expect(u1.canonicalize.hash).to eq u2.hash
525525
end
526526
end
527+
528+
context "Windows specific canonicalization", skip: ('only windows' unless Gem.win_platform?) do
529+
{
530+
"no scheme, relative path starting with drive letter" => [
531+
"D:a/b",
532+
"file:/D:a/b"
533+
],
534+
"no authority and relative path" => [
535+
"file:D:a/b",
536+
"file:/D:a/b"
537+
],
538+
"no authority and absolute path" => [
539+
"file:/D:a/b",
540+
"file:/D:a/b"
541+
],
542+
"scheme with //, no authority and absolute path" => [
543+
"file://D:a/b",
544+
"file:/D:a/b"
545+
],
546+
"empty authority and absolute path" => [
547+
"file:///D:a/b",
548+
"file:///D:a/b"
549+
],
550+
"authority and absolute path" => [
551+
"file://host/D:a/b",
552+
"file://host/D:a/b"
553+
],
554+
}.each do |name, (input, output)|
555+
it name do
556+
u1 = RDF::URI(input)
557+
u2 = RDF::URI(output)
558+
expect(u1.canonicalize.to_s).to eq u2.to_s
559+
expect(u1.canonicalize).to eq u1.canonicalize
560+
expect(u1.canonicalize.hash).to eq u2.hash
561+
end
562+
end
563+
end
564+
527565
it "#canonicalize! alters resource" do
528566
u1 = RDF::URI("eXAMPLE:example.com/foo")
529567
u2 = RDF::URI("example:example.com/foo")

0 commit comments

Comments
 (0)