slash3b · slash3b · Jul 22, 2025 · Jul 22, 2025 · Jul 22, 2025
diff --git a/.golangci.yml b/.golangci.yml
@@ -11,6 +11,7 @@ linters:
     - exhaustive
     - ireturn
     - wrapcheck
+    - varnamelen
   settings:
     lll:
       line-length: 160

diff --git a/utfbom.go b/utfbom.go
@@ -71,6 +71,10 @@ func DetectEncoding[T string | []byte](input T) Encoding {
 		return Unknown
 	}
 
+	if len(ibs) >= 3 && bytes.HasPrefix(ibs, utf8BOM) {
+		return UTF8
+	}
+
 	if len(ibs) >= 4 {
 		if bytes.HasPrefix(ibs, utf32BEBOM) {
 			return UTF32BigEndian
@@ -81,10 +85,6 @@ func DetectEncoding[T string | []byte](input T) Encoding {
 		}
 	}
 
-	if len(ibs) >= 3 && bytes.HasPrefix(ibs, utf8BOM) {
-		return UTF8
-	}
-
 	if bytes.HasPrefix(ibs, utf16BEBOM) {
 		return UTF16BigEndian
 	}
@@ -140,17 +140,52 @@ func (e Encoding) Len() int {
 	}
 }
 
+// Bytes returns encoding bytes.
+func (e Encoding) Bytes() []byte {
+	switch e {
+	default:
+		return nil
+	case UTF8:
+		return utf8BOM
+	case UTF16BigEndian:
+		return utf16BEBOM
+	case UTF16LittleEndian:
+		return utf16LEBOM
+	case UTF32BigEndian:
+		return utf32BEBOM
+	case UTF32LittleEndian:
+		return utf32LEBOM
+	}
+}
+
 // Trim removes the BOM prefix from the input `s` based on the encoding `enc`.
 // Supports string or []byte inputs and returns the same type without the BOM.
 func Trim[T string | []byte](input T) (T, Encoding) {
-	bytes := []byte(input)
-	enc := DetectEncoding(bytes)
+	b := []byte(input)
+	enc := DetectEncoding(b)
 
 	if enc == Unknown {
 		return input, enc
 	}
 
-	return T(bytes[enc.Len():]), enc
+	return T(b[enc.Len():]), enc
+}
+
+// Prepend adds the corresponding Byte Order Mark (BOM) for a given encoding
+// to the beginning of a string or byte slice.
+// If the provided encoding is Unknown, the input is returned unmodified.
+func Prepend[T string | []byte](input T, enc Encoding) T {
+	if enc == Unknown {
+		return input
+	}
+
+	b := []byte(input)
+
+	if DetectEncoding(b) != Unknown {
+		return input
+	}
+
+	return T(append(enc.Bytes(), b...))
 }
 
 // Reader implements automatic BOM (Unicode Byte Order Mark) checking and
@@ -186,7 +221,7 @@ func (r *Reader) Read(buf []byte) (int, error) {
 	var bomErr error
 
 	r.once.Do(func() {
-		bytes, err := r.rd.Peek(maxBOMLen)
+		b, err := r.rd.Peek(maxBOMLen)
 		// do not error out in case underlying payload is too small
 		// still attempt to read fewer than n bytes.
 		if err != nil && !errors.Is(err, io.EOF) && !errors.Is(err, io.ErrUnexpectedEOF) {
@@ -195,7 +230,7 @@ func (r *Reader) Read(buf []byte) (int, error) {
 			return
 		}
 
-		r.Enc = DetectEncoding(bytes)
+		r.Enc = DetectEncoding(b)
 		if r.Enc != Unknown {
 			_, err = r.rd.Discard(r.Enc.Len())
 			if err != nil {

diff --git a/utfbom_test.go b/utfbom_test.go
@@ -14,6 +14,14 @@ import (
 	"github.com/slash3b/utfbom"
 )
 
+var (
+	utf8BOM    = []byte{0xef, 0xbb, 0xbf}
+	utf16BEBOM = []byte{0xfe, 0xff}
+	utf16LEBOM = []byte{0xff, 0xfe}
+	utf32BEBOM = []byte{0x00, 0x00, 0xfe, 0xff}
+	utf32LEBOM = []byte{0xff, 0xfe, 0x00, 0x00}
+)
+
 func TestDetectBom(t *testing.T) {
 	testCases := []struct {
 		name     string
@@ -127,6 +135,36 @@ func ExampleTrim() {
 	// output bytes:0x68656c6c6f
 }
 
+func ExamplePrepend() {
+	// Prepend a UTF-8 BOM to a simple string.
+	// The UTF-8 BOM is represented by the rune \ufeff.
+	withBOM := utfbom.Prepend("hello", utfbom.UTF8)
+	fmt.Printf("String with UTF-8 BOM: %q\n", withBOM)
+	fmt.Printf("Bytes: %#x\n\n", withBOM)
+
+	// Prepend a UTF-16LE BOM to a byte slice that is also UTF-16LE encoded.
+	// This represents the word "world" in UTF-16 Little Endian.
+	data := []byte{0x77, 0x00, 0x6f, 0x00, 0x72, 0x00, 0x6c, 0x00, 0x64, 0x00}
+	withBOMBytes := utfbom.Prepend(data, utfbom.UTF16LittleEndian)
+	fmt.Printf("Bytes with UTF-16LE BOM: %#x\n\n", withBOMBytes)
+
+	// The Prepend function is idempotent.
+	// If a BOM already exists, it will not add another one.
+	alreadyHasBOM := "\ufeffhello"
+	idempotentResult := utfbom.Prepend(alreadyHasBOM, utfbom.UTF8)
+	fmt.Printf("Idempotent result: %q\n", idempotentResult)
+	fmt.Printf("Bytes are unchanged: %#x\n", idempotentResult)
+
+	// output:
+	// String with UTF-8 BOM: "\ufeffhello"
+	// Bytes: 0xefbbbf68656c6c6f
+	//
+	// Bytes with UTF-16LE BOM: 0xfffe77006f0072006c006400
+	//
+	// Idempotent result: "\ufeffhello"
+	// Bytes are unchanged: 0xefbbbf68656c6c6f
+}
+
 func ExampleReader() {
 	csvFile := "\uFEFFIndex,Customer Id,First Name\n" +
 		"1,DD37Cf93aecA6Dc,Sheryl"
@@ -351,3 +389,101 @@ func TestReader_WrappeeReaderHasTinyPayload_OneByteBuffer(t *testing.T) {
 	be.Err(t, err, io.EOF)
 	be.Equal(t, 0, n)
 }
+
+func TestEncoding_Bytes(t *testing.T) {
+	t.Parallel()
+
+	testCases := []struct {
+		name     string
+		enc      utfbom.Encoding
+		expected []byte
+	}{
+		{
+			name:     "Unknown",
+			enc:      utfbom.Unknown,
+			expected: nil,
+		},
+		{
+			name:     "UTF8",
+			enc:      utfbom.UTF8,
+			expected: []byte{0xef, 0xbb, 0xbf},
+		},
+		{
+			name:     "UTF16BigEndian",
+			enc:      utfbom.UTF16BigEndian,
+			expected: []byte{0xfe, 0xff},
+		},
+		{
+			name:     "UTF16LittleEndian",
+			enc:      utfbom.UTF16LittleEndian,
+			expected: []byte{0xff, 0xfe},
+		},
+		{
+			name:     "UTF32BigEndian",
+			enc:      utfbom.UTF32BigEndian,
+			expected: []byte{0x00, 0x00, 0xfe, 0xff},
+		},
+		{
+			name:     "UTF32LittleEndian",
+			enc:      utfbom.UTF32LittleEndian,
+			expected: []byte{0xff, 0xfe, 0x00, 0x00},
+		},
+		{
+			name:     "InvalidEncoding",
+			enc:      utfbom.Encoding(999),
+			expected: nil,
+		},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			got := tc.enc.Bytes()
+			be.Equal(t, got, tc.expected)
+		})
+	}
+}
+
+func TestPrepend(t *testing.T) {
+	t.Parallel()
+
+	t.Run("byte_slice", func(t *testing.T) {
+		data := []byte("data")
+
+		testCases := []struct {
+			name     string
+			input    []byte
+			enc      utfbom.Encoding
+			expected []byte
+		}{
+			{"unknown_on_data", data, utfbom.Unknown, data},
+			{"unknown_on_empty", []byte{}, utfbom.Unknown, []byte{}},
+			{"unknown_on_nil", nil, utfbom.Unknown, nil},
+			{"utf8_on_data", data, utfbom.UTF8, append(utf8BOM, data...)},
+			{"utf8_on_empty", []byte{}, utfbom.UTF8, utf8BOM},
+			{"utf8_on_nil", nil, utfbom.UTF8, utf8BOM},
+			{"utf16be_on_data", data, utfbom.UTF16BigEndian, append(utf16BEBOM, data...)},
+			{"utf16be_on_empty", []byte{}, utfbom.UTF16BigEndian, utf16BEBOM},
+			{"utf16be_on_nil", nil, utfbom.UTF16BigEndian, utf16BEBOM},
+			{"utf16le_on_data", data, utfbom.UTF16LittleEndian, append(utf16LEBOM, data...)},
+			{"utf16le_on_empty", []byte{}, utfbom.UTF16LittleEndian, utf16LEBOM},
+			{"utf16le_on_nil", nil, utfbom.UTF16LittleEndian, utf16LEBOM},
+			{"utf32be_on_data", data, utfbom.UTF32BigEndian, append(utf32BEBOM, data...)},
+			{"utf32be_on_empty", []byte{}, utfbom.UTF32BigEndian, utf32BEBOM},
+			{"utf32be_on_nil", nil, utfbom.UTF32BigEndian, utf32BEBOM},
+			{"utf32le_on_data", data, utfbom.UTF32LittleEndian, append(utf32LEBOM, data...)},
+			{"utf32le_on_empty", []byte{}, utfbom.UTF32LittleEndian, utf32LEBOM},
+			{"utf32le_on_nil", nil, utfbom.UTF32LittleEndian, utf32LEBOM},
+			{"idempotent_when_bom_exists", append(utf8BOM, data...), utfbom.UTF8, append(utf8BOM, data...)},
+			{"idempotent_when_different_bom_exists", append(utf32LEBOM, data...), utfbom.UTF16BigEndian, append(utf32LEBOM, data...)},
+		}
+
+		for _, tc := range testCases {
+			t.Run(tc.name, func(t *testing.T) {
+				t.Parallel()
+
+				got := utfbom.Prepend(tc.input, tc.enc)
+				be.Equal(t, got, tc.expected)
+			})
+		}
+	})
+}