@@ -14,6 +14,14 @@ import (
1414 "github.com/slash3b/utfbom"
1515)
1616
17+ var (
18+ utf8BOM = []byte {0xef , 0xbb , 0xbf }
19+ utf16BEBOM = []byte {0xfe , 0xff }
20+ utf16LEBOM = []byte {0xff , 0xfe }
21+ utf32BEBOM = []byte {0x00 , 0x00 , 0xfe , 0xff }
22+ utf32LEBOM = []byte {0xff , 0xfe , 0x00 , 0x00 }
23+ )
24+
1725func TestDetectBom (t * testing.T ) {
1826 testCases := []struct {
1927 name string
@@ -127,6 +135,36 @@ func ExampleTrim() {
127135 // output bytes:0x68656c6c6f
128136}
129137
138+ func ExamplePrepend () {
139+ // Prepend a UTF-8 BOM to a simple string.
140+ // The UTF-8 BOM is represented by the rune \ufeff.
141+ withBOM := utfbom .Prepend ("hello" , utfbom .UTF8 )
142+ fmt .Printf ("String with UTF-8 BOM: %q\n " , withBOM )
143+ fmt .Printf ("Bytes: %#x\n \n " , withBOM )
144+
145+ // Prepend a UTF-16LE BOM to a byte slice that is also UTF-16LE encoded.
146+ // This represents the word "world" in UTF-16 Little Endian.
147+ data := []byte {0x77 , 0x00 , 0x6f , 0x00 , 0x72 , 0x00 , 0x6c , 0x00 , 0x64 , 0x00 }
148+ withBOMBytes := utfbom .Prepend (data , utfbom .UTF16LittleEndian )
149+ fmt .Printf ("Bytes with UTF-16LE BOM: %#x\n \n " , withBOMBytes )
150+
151+ // The Prepend function is idempotent.
152+ // If a BOM already exists, it will not add another one.
153+ alreadyHasBOM := "\ufeff hello"
154+ idempotentResult := utfbom .Prepend (alreadyHasBOM , utfbom .UTF8 )
155+ fmt .Printf ("Idempotent result: %q\n " , idempotentResult )
156+ fmt .Printf ("Bytes are unchanged: %#x\n " , idempotentResult )
157+
158+ // output:
159+ // String with UTF-8 BOM: "\ufeffhello"
160+ // Bytes: 0xefbbbf68656c6c6f
161+ //
162+ // Bytes with UTF-16LE BOM: 0xfffe77006f0072006c006400
163+ //
164+ // Idempotent result: "\ufeffhello"
165+ // Bytes are unchanged: 0xefbbbf68656c6c6f
166+ }
167+
130168func ExampleReader () {
131169 csvFile := "\uFEFF Index,Customer Id,First Name\n " +
132170 "1,DD37Cf93aecA6Dc,Sheryl"
@@ -351,3 +389,101 @@ func TestReader_WrappeeReaderHasTinyPayload_OneByteBuffer(t *testing.T) {
351389 be .Err (t , err , io .EOF )
352390 be .Equal (t , 0 , n )
353391}
392+
393+ func TestEncoding_Bytes (t * testing.T ) {
394+ t .Parallel ()
395+
396+ testCases := []struct {
397+ name string
398+ enc utfbom.Encoding
399+ expected []byte
400+ }{
401+ {
402+ name : "Unknown" ,
403+ enc : utfbom .Unknown ,
404+ expected : nil ,
405+ },
406+ {
407+ name : "UTF8" ,
408+ enc : utfbom .UTF8 ,
409+ expected : []byte {0xef , 0xbb , 0xbf },
410+ },
411+ {
412+ name : "UTF16BigEndian" ,
413+ enc : utfbom .UTF16BigEndian ,
414+ expected : []byte {0xfe , 0xff },
415+ },
416+ {
417+ name : "UTF16LittleEndian" ,
418+ enc : utfbom .UTF16LittleEndian ,
419+ expected : []byte {0xff , 0xfe },
420+ },
421+ {
422+ name : "UTF32BigEndian" ,
423+ enc : utfbom .UTF32BigEndian ,
424+ expected : []byte {0x00 , 0x00 , 0xfe , 0xff },
425+ },
426+ {
427+ name : "UTF32LittleEndian" ,
428+ enc : utfbom .UTF32LittleEndian ,
429+ expected : []byte {0xff , 0xfe , 0x00 , 0x00 },
430+ },
431+ {
432+ name : "InvalidEncoding" ,
433+ enc : utfbom .Encoding (999 ),
434+ expected : nil ,
435+ },
436+ }
437+
438+ for _ , tc := range testCases {
439+ t .Run (tc .name , func (t * testing.T ) {
440+ got := tc .enc .Bytes ()
441+ be .Equal (t , got , tc .expected )
442+ })
443+ }
444+ }
445+
446+ func TestPrepend (t * testing.T ) {
447+ t .Parallel ()
448+
449+ t .Run ("byte_slice" , func (t * testing.T ) {
450+ data := []byte ("data" )
451+
452+ testCases := []struct {
453+ name string
454+ input []byte
455+ enc utfbom.Encoding
456+ expected []byte
457+ }{
458+ {"unknown_on_data" , data , utfbom .Unknown , data },
459+ {"unknown_on_empty" , []byte {}, utfbom .Unknown , []byte {}},
460+ {"unknown_on_nil" , nil , utfbom .Unknown , nil },
461+ {"utf8_on_data" , data , utfbom .UTF8 , append (utf8BOM , data ... )},
462+ {"utf8_on_empty" , []byte {}, utfbom .UTF8 , utf8BOM },
463+ {"utf8_on_nil" , nil , utfbom .UTF8 , utf8BOM },
464+ {"utf16be_on_data" , data , utfbom .UTF16BigEndian , append (utf16BEBOM , data ... )},
465+ {"utf16be_on_empty" , []byte {}, utfbom .UTF16BigEndian , utf16BEBOM },
466+ {"utf16be_on_nil" , nil , utfbom .UTF16BigEndian , utf16BEBOM },
467+ {"utf16le_on_data" , data , utfbom .UTF16LittleEndian , append (utf16LEBOM , data ... )},
468+ {"utf16le_on_empty" , []byte {}, utfbom .UTF16LittleEndian , utf16LEBOM },
469+ {"utf16le_on_nil" , nil , utfbom .UTF16LittleEndian , utf16LEBOM },
470+ {"utf32be_on_data" , data , utfbom .UTF32BigEndian , append (utf32BEBOM , data ... )},
471+ {"utf32be_on_empty" , []byte {}, utfbom .UTF32BigEndian , utf32BEBOM },
472+ {"utf32be_on_nil" , nil , utfbom .UTF32BigEndian , utf32BEBOM },
473+ {"utf32le_on_data" , data , utfbom .UTF32LittleEndian , append (utf32LEBOM , data ... )},
474+ {"utf32le_on_empty" , []byte {}, utfbom .UTF32LittleEndian , utf32LEBOM },
475+ {"utf32le_on_nil" , nil , utfbom .UTF32LittleEndian , utf32LEBOM },
476+ {"idempotent_when_bom_exists" , append (utf8BOM , data ... ), utfbom .UTF8 , append (utf8BOM , data ... )},
477+ {"idempotent_when_different_bom_exists" , append (utf32LEBOM , data ... ), utfbom .UTF16BigEndian , append (utf32LEBOM , data ... )},
478+ }
479+
480+ for _ , tc := range testCases {
481+ t .Run (tc .name , func (t * testing.T ) {
482+ t .Parallel ()
483+
484+ got := utfbom .Prepend (tc .input , tc .enc )
485+ be .Equal (t , got , tc .expected )
486+ })
487+ }
488+ })
489+ }
0 commit comments