@@ -80,6 +80,10 @@ class URI
8080 # scheme, authority, path, query, fragment
8181 IRI_PARTS = /^(?:([^:\/ ?#]+):)?(?:\/ \/ ([^\/ ?#]*))?([^?#]*)(\? [^#]*)?(#.*)?$/ . freeze
8282
83+ # Special version for file-scheme on Windows (path SHOULD begin with /, but may not)
84+ # scheme, authority, path, query, fragment
85+ FILE_PARTS = /^file:(?:\/ \/ (#{ IHOST } ))?(\/ ?[^?#]*)(\? [^#]*)?(#.*)?$/ . freeze
86+
8387 # Remove dot expressions regular expressions
8488 RDS_2A = /^\. ?\. \/ (.*)$/ . freeze
8589 RDS_2B1 = /^\/ \. $/ . freeze
@@ -851,8 +855,7 @@ def inspect
851855 # lexical representation of URI, either absolute or relative
852856 # @return [String]
853857 def value
854- return @value if @value
855- @value = [
858+ @value ||= [
856859 ( "#{ scheme } :" if absolute? ) ,
857860 ( "//#{ authority } " if authority ) ,
858861 path ,
@@ -883,17 +886,35 @@ def object
883886 #
884887 # @param [String, to_s] value
885888 # @return [Object{Symbol => String}]
889+ # @see https://datatracker.ietf.org/doc/html/rfc8089
886890 def parse ( value )
887- value = value . to_s . dup . force_encoding ( Encoding ::ASCII_8BIT )
891+ value = value . to_s . dup . force_encoding ( Encoding ::UTF_8 ) unless value && value . encoding == Encoding :: UTF_8
888892 parts = { }
889- if matchdata = IRI_PARTS . match ( value )
893+ if matchdata = FILE_PARTS . match ( value )
894+ # A file-based URI is always in the folloring form:
895+ # * file:/path - absolute path, no host name
896+ # * file:///path - absolute path, empty host name
897+ # * file://hostname/path - absolute path with authority.
898+ # * file://path – is invalid, but treated as file:///path
899+ scheme = 'file'
900+ authority , path , query , fragment = matchdata [ 1 ..-1 ]
901+ if authority && authority . match? ( /^[A-Za-z]$/ ) && Gem . win_platform?
902+ # In this case, if the authority is a drive letter and part of the path
903+ authority , path = nil , "#{ authority } #{ path } "
904+ end
905+ # We accept paths that aren't absolute, but coerce them to be absolute
906+ path = "/#{ path } " unless path . start_with? ( '/' )
907+ elsif matchdata = IRI_PARTS . match ( value )
890908 scheme , authority , path , query , fragment = matchdata [ 1 ..-1 ]
909+ authority = nil if authority && authority . empty?
891910
892- if Gem . win_platform? && scheme && ! authority && scheme . match? ( /^[a-zA-Z ]$/ )
893- # A drive letter , not a scheme
894- scheme , path = nil , "#{ scheme } :#{ path } "
911+ if scheme && scheme . match? ( /^[A-Za-z ]$/ ) && Gem . win_platform?
912+ # On Windows treat D:/foo/bar as a path , not a scheme
913+ scheme , authority , path = 'file' , nil , "/ #{ scheme } :#{ path } "
895914 end
915+ end
896916
917+ if matchdata
897918 userinfo , hostport = authority . to_s . split ( '@' , 2 )
898919 hostport , userinfo = userinfo , nil unless hostport
899920 user , password = userinfo . to_s . split ( ':' , 2 )
0 commit comments