@@ -7,22 +7,23 @@ package utfbom
77
88import (
99 "bufio"
10+ "bytes"
1011 "errors"
1112 "io"
1213 "sync"
1314)
1415
1516var (
1617 _ io.Reader = (* Reader )(nil )
17- utf8BOM = [3 ]byte {0xef , 0xbb , 0xbf }
18- utf16BEBOM = [2 ]byte {0xfe , 0xff }
19- utf16LEBOM = [2 ]byte {0xff , 0xfe }
20- utf32BEBOM = [4 ]byte {0x00 , 0x00 , 0xfe , 0xff }
21- utf32LEBOM = [4 ]byte {0xff , 0xfe , 0x00 , 0x00 }
18+ utf8BOM = []byte {0xef , 0xbb , 0xbf }
19+ utf16BEBOM = []byte {0xfe , 0xff }
20+ utf16LEBOM = []byte {0xff , 0xfe }
21+ utf32BEBOM = []byte {0x00 , 0x00 , 0xfe , 0xff }
22+ utf32LEBOM = []byte {0xff , 0xfe , 0x00 , 0x00 }
2223)
2324
2425// ErrRead helps to trace error origin.
25- var ErrRead = errors .New ("utfbom library unable to detect BOM" )
26+ var ErrRead = errors .New ("utfbom: I/O error during BOM processing " )
2627
2728// Encoding is a character encoding standard.
2829type Encoding int
@@ -64,39 +65,31 @@ const (
6465// - UTF-32 Big Endian (BOM: 0x00 0x00 0xfe 0xff)
6566// - UTF-32 Little Endian (BOM: 0xff 0xfe 0x00 0x00)
6667func DetectEncoding [T string | []byte ](input T ) Encoding {
67- bytes := []byte (input )
68+ ibs := []byte (input )
6869
69- if len (bytes ) < 2 {
70+ if len (ibs ) < 2 {
7071 return Unknown
7172 }
7273
73- if len (bytes ) >= 4 {
74- if utf32BEBOM [0 ] == bytes [0 ] &&
75- utf32BEBOM [1 ] == bytes [1 ] &&
76- utf32BEBOM [2 ] == bytes [2 ] &&
77- utf32BEBOM [3 ] == bytes [3 ] {
74+ if len (ibs ) >= 4 {
75+ if bytes .HasPrefix (ibs , utf32BEBOM ) {
7876 return UTF32BigEndian
7977 }
8078
81- if utf32LEBOM [0 ] == bytes [0 ] &&
82- utf32LEBOM [1 ] == bytes [1 ] &&
83- utf32LEBOM [2 ] == bytes [2 ] &&
84- utf32LEBOM [3 ] == bytes [3 ] {
79+ if bytes .HasPrefix (ibs , utf32LEBOM ) {
8580 return UTF32LittleEndian
8681 }
8782 }
8883
89- if len (bytes ) >= 3 {
90- if utf8BOM [0 ] == bytes [0 ] && utf8BOM [1 ] == bytes [1 ] && utf8BOM [2 ] == bytes [2 ] {
91- return UTF8
92- }
84+ if len (ibs ) >= 3 && bytes .HasPrefix (ibs , utf8BOM ) {
85+ return UTF8
9386 }
9487
95- if utf16BEBOM [ 0 ] == bytes [ 0 ] && utf16BEBOM [ 1 ] == bytes [ 1 ] {
88+ if bytes . HasPrefix ( ibs , utf16BEBOM ) {
9689 return UTF16BigEndian
9790 }
9891
99- if utf16LEBOM [ 0 ] == bytes [ 0 ] && utf16LEBOM [ 1 ] == bytes [ 1 ] {
92+ if bytes . HasPrefix ( ibs , utf16LEBOM ) {
10093 return UTF16LittleEndian
10194 }
10295
@@ -194,7 +187,9 @@ func (r *Reader) Read(buf []byte) (int, error) {
194187
195188 r .once .Do (func () {
196189 bytes , err := r .rd .Peek (maxBOMLen )
197- if err != nil {
190+ // do not error out in case underlying payload is too small
191+ // still attempt to read fewer than n bytes.
192+ if err != nil && ! errors .Is (err , io .EOF ) && ! errors .Is (err , io .ErrUnexpectedEOF ) {
198193 bomErr = errors .Join (ErrRead , err )
199194
200195 return
0 commit comments