@@ -72,6 +72,10 @@ def self.escape(string, encoding = nil)
7272 buffer << case u . ord
7373 when ( 0x00 ..0x7F )
7474 escape_ascii ( u , encoding )
75+ when ( 0xFFFE ..0xFFFF )
76+ # NOT A CHARACTER
77+ # @see https://corp.unicode.org/~asmus/proposed_faq/private_use.html#history1
78+ escape_uchar ( u )
7579 else
7680 u
7781 end
@@ -100,12 +104,10 @@ def self.escape(string, encoding = nil)
100104 # @see http://www.w3.org/TR/rdf-testcases/#ntrip_strings
101105 def self . escape_unicode ( u , encoding )
102106 case ( u = u . ord )
103- when ( 0x00 ..0x7F ) # ASCII 7-bit
107+ when ( 0x00 ..0x7F ) # ECHAR
104108 escape_ascii ( u , encoding )
105- when ( 0x80 ..0xFFFF ) # Unicode BMP
106- escape_utf16 ( u )
107- when ( 0x10000 ..0x10FFFF ) # Unicode
108- escape_utf32 ( u )
109+ when ( 0x80 ...0x10FFFF ) # UCHAR
110+ escape_uchar ( u )
109111 else
110112 raise ArgumentError . new ( "expected a Unicode codepoint in (0x00..0x10FFFF), but got 0x#{ u . to_s ( 16 ) } " )
111113 end
@@ -132,18 +134,34 @@ def self.escape_ascii(u, encoding)
132134 when ( 0x0D ) then "\\ r"
133135 when ( 0x22 ) then "\\ \" "
134136 when ( 0x5C ) then "\\ \\ "
135- when ( 0x00 ..0x1F ) then escape_utf16 ( u )
136- when ( 0x7F ) then escape_utf16 ( u )
137+ when ( 0x00 ..0x1F ) then escape_uchar ( u )
138+ when ( 0x7F ) then escape_uchar ( u ) # DEL
137139 when ( 0x20 ..0x7E ) then u . chr
138140 else
139141 raise ArgumentError . new ( "expected an ASCII character in (0x00..0x7F), but got 0x#{ u . to_s ( 16 ) } " )
140142 end
141143 end
142144
145+ ##
146+ # @param [Integer, #ord] u
147+ # @return [String]
148+ # @see https://www.w3.org/TR/rdf12-concepts/#rdf-stringshttps://www.w3.org/TR/rdf12-concepts/#rdf-strings
149+ # @since 3.4.4
150+ def self . escape_uchar ( u )
151+ #require 'byebug'; byebug
152+ case u . ord
153+ when ( 0x00 ..0xFFFF )
154+ sprintf ( "\\ u%04X" , u . ord )
155+ else
156+ sprintf ( "\\ U%08X" , u . ord )
157+ end
158+ end
159+
143160 ##
144161 # @param [Integer, #ord] u
145162 # @return [String]
146163 # @see http://www.w3.org/TR/rdf-testcases/#ntrip_strings
164+ # @deprecated use escape_uchar, this name is non-intuitive
147165 def self . escape_utf16 ( u )
148166 sprintf ( "\\ u%04X" , u . ord )
149167 end
@@ -152,6 +170,7 @@ def self.escape_utf16(u)
152170 # @param [Integer, #ord] u
153171 # @return [String]
154172 # @see http://www.w3.org/TR/rdf-testcases/#ntrip_strings
173+ # @deprecated use escape_uchar, this name is non-intuitive
155174 def self . escape_utf32 ( u )
156175 sprintf ( "\\ U%08X" , u . ord )
157176 end
@@ -283,9 +302,9 @@ def format_uri(uri, **options)
283302 buffer . set_encoding ( encoding )
284303 string . each_char do |u |
285304 buffer << case u . ord
286- when ( 0x00 ..0x20 ) then self . class . escape_utf16 ( u )
305+ when ( 0x00 ..0x20 ) then self . class . escape_uchar ( u )
287306 when 0x22 , 0x3c , 0x3e , 0x5c , 0x5e , 0x60 , 0x7b , 0x7c , 0x7d # "<>\^`{|}
288- self . class . escape_utf16 ( u )
307+ self . class . escape_uchar ( u )
289308 else u
290309 end
291310 end
@@ -297,11 +316,10 @@ def format_uri(uri, **options)
297316 buffer . set_encoding ( Encoding ::ASCII )
298317 string . each_byte do |u |
299318 buffer << case u
300- when ( 0x00 ..0x20 ) then self . class . escape_utf16 ( u )
319+ when ( 0x00 ..0x20 ) then self . class . escape_uchar ( u )
301320 when 0x22 , 0x3c , 0x3e , 0x5c , 0x5e , 0x60 , 0x7b , 0x7c , 0x7d # "<>\^`{|}
302- self . class . escape_utf16 ( u )
303- when ( 0x80 ..0xFFFF ) then self . class . escape_utf16 ( u )
304- when ( 0x10000 ..0x10FFFF ) then self . class . escape_utf32 ( u )
321+ self . class . escape_uchar ( u )
322+ when ( 0x80 ..0x10FFFF ) then self . class . escape_uchar ( u )
305323 else u
306324 end
307325 end
0 commit comments