@@ -13,14 +13,7 @@ import (
1313 "sync"
1414)
1515
16- var (
17- _ io.Reader = (* Reader )(nil )
18- utf8BOM = []byte {0xef , 0xbb , 0xbf }
19- utf16BEBOM = []byte {0xfe , 0xff }
20- utf16LEBOM = []byte {0xff , 0xfe }
21- utf32BEBOM = []byte {0x00 , 0x00 , 0xfe , 0xff }
22- utf32LEBOM = []byte {0xff , 0xfe , 0x00 , 0x00 }
23- )
16+ var _ io.Reader = (* Reader )(nil )
2417
2518// ErrRead helps to trace error origin.
2619var ErrRead = errors .New ("utfbom: I/O error during BOM processing" )
@@ -64,32 +57,32 @@ const (
6457// - UTF-16 Little Endian (BOM: 0xff 0xfe)
6558// - UTF-32 Big Endian (BOM: 0x00 0x00 0xfe 0xff)
6659// - UTF-32 Little Endian (BOM: 0xff 0xfe 0x00 0x00)
67- func DetectEncoding [T string | []byte ](input T ) Encoding {
68- ibs := []byte (input )
60+ func DetectEncoding [T ~ string | ~ []byte ](input T ) Encoding {
61+ b := []byte (input )
6962
70- if len (ibs ) < 2 {
63+ if len (b ) < 2 {
7164 return Unknown
7265 }
7366
74- if len (ibs ) >= 3 && bytes .HasPrefix (ibs , utf8BOM ) {
67+ if len (b ) >= 3 && bytes .HasPrefix (b , [] byte { 0xef , 0xbb , 0xbf } ) {
7568 return UTF8
7669 }
7770
78- if len (ibs ) >= 4 {
79- if bytes .HasPrefix (ibs , utf32BEBOM ) {
71+ if len (b ) >= 4 {
72+ if bytes .HasPrefix (b , [] byte { 0x00 , 0x00 , 0xfe , 0xff } ) {
8073 return UTF32BigEndian
8174 }
8275
83- if bytes .HasPrefix (ibs , utf32LEBOM ) {
76+ if bytes .HasPrefix (b , [] byte { 0xff , 0xfe , 0x00 , 0x00 } ) {
8477 return UTF32LittleEndian
8578 }
8679 }
8780
88- if bytes .HasPrefix (ibs , utf16BEBOM ) {
81+ if bytes .HasPrefix (b , [] byte { 0xfe , 0xff } ) {
8982 return UTF16BigEndian
9083 }
9184
92- if bytes .HasPrefix (ibs , utf16LEBOM ) {
85+ if bytes .HasPrefix (b , [] byte { 0xff , 0xfe } ) {
9386 return UTF16LittleEndian
9487 }
9588
@@ -108,7 +101,7 @@ func (e Encoding) AnyOf(es ...Encoding) bool {
108101 return false
109102}
110103
111- // Strings returns human-readable name of encoding.
104+ // String returns the human-readable name of the encoding.
112105func (e Encoding ) String () string {
113106 switch e {
114107 case UTF8 :
@@ -146,21 +139,21 @@ func (e Encoding) Bytes() []byte {
146139 default :
147140 return nil
148141 case UTF8 :
149- return utf8BOM
142+ return [] byte { 0xef , 0xbb , 0xbf }
150143 case UTF16BigEndian :
151- return utf16BEBOM
144+ return [] byte { 0xfe , 0xff }
152145 case UTF16LittleEndian :
153- return utf16LEBOM
146+ return [] byte { 0xff , 0xfe }
154147 case UTF32BigEndian :
155- return utf32BEBOM
148+ return [] byte { 0x00 , 0x00 , 0xfe , 0xff }
156149 case UTF32LittleEndian :
157- return utf32LEBOM
150+ return [] byte { 0xff , 0xfe , 0x00 , 0x00 }
158151 }
159152}
160153
161- // Trim removes the BOM prefix from the input `s` based on the encoding `enc` .
154+ // Trim removes the BOM prefix from the input.
162155// Supports string or []byte inputs and returns the same type without the BOM.
163- func Trim [T string | []byte ](input T ) (T , Encoding ) {
156+ func Trim [T ~ string | ~ []byte ](input T ) (T , Encoding ) {
164157 b := []byte (input )
165158 enc := DetectEncoding (b )
166159
@@ -174,7 +167,7 @@ func Trim[T string | []byte](input T) (T, Encoding) {
174167// Prepend adds the corresponding Byte Order Mark (BOM) for a given encoding
175168// to the beginning of a string or byte slice.
176169// If the provided encoding is Unknown, the input is returned unmodified.
177- func Prepend [T string | []byte ](input T , enc Encoding ) T {
170+ func Prepend [T ~ string | ~ []byte ](input T , enc Encoding ) T {
178171 if enc == Unknown {
179172 return input
180173 }
@@ -190,6 +183,8 @@ func Prepend[T string | []byte](input T, enc Encoding) T {
190183
191184// Reader implements automatic BOM (Unicode Byte Order Mark) checking and
192185// removing as necessary for an io.Reader object.
186+ //
187+ // Reader is not safe for concurrent use.
193188type Reader struct {
194189 rd * bufio.Reader
195190 once sync.Once
@@ -198,6 +193,7 @@ type Reader struct {
198193}
199194
200195// NewReader wraps an incoming reader.
196+ // Passing a nil reader will cause a panic on the first Read call.
201197func NewReader (rd io.Reader ) * Reader {
202198 return & Reader {
203199 rd : bufio .NewReader (rd ),
@@ -207,10 +203,8 @@ func NewReader(rd io.Reader) *Reader {
207203}
208204
209205// Read implements the io.Reader interface.
210- // On the first read call, it reads from the underlying Reader, detects and removes any Byte Order Mark (BOM).
211- // Subsequent calls delegate directly to the underlying Reader without BOM handling.
212- // Read is only safe for concurrent use during the first call due to sync.Once; after that, thread-safety
213- // depends on the underlying Reader. It is best to assume unsafe concurrent use.
206+ // On the first call, it detects and removes any Byte Order Mark (BOM).
207+ // Subsequent calls delegate directly to the underlying Reader.
214208func (r * Reader ) Read (buf []byte ) (int , error ) {
215209 const maxBOMLen = 4
216210
0 commit comments