Skip to content

Commit 6ad8e69

Browse files
authored
to 3.0: enable escape and escape the JSON control characters. (#22607)
Enable escape character and escape the JSON control characters. ``` create table t1 (a int primary key, b json); insert into t1 values(1, '{"key1":"你好\\t不\\r好\\f呀\\n\\\\"}'); insert into t1 values(2, '{"key2":"谢谢\\t你,\\r我非常\\f好\\n\\\\"}'); 1. select * from t1 into outfile "/tmp/gg/t1.csv" fields terminated by "," enclosed by '\''; 2. load data infile "/tmp/gg/t1.csv" into table t1 fields terminated by "," enclosed by '\'' ignore 1 lines; ``` All JSON control characters in the JSON column of t1 should be escaped when we select data out to the outfile. Approved by: @XuPeng-SH, @heni02, @daviszhen
1 parent c3544d5 commit 6ad8e69

File tree

4 files changed

+162
-1
lines changed

4 files changed

+162
-1
lines changed

pkg/frontend/export.go

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -311,13 +311,53 @@ func formatJsonString(str string, flag bool, terminatedBy string) string {
311311
return tmp
312312
}
313313

314+
func escapeJSONControlChars(s string) string {
315+
var builder strings.Builder
316+
builder.Grow(len(s))
317+
318+
// \\t
319+
// \\\\t
320+
// abc\t
321+
// {"a": "abc", "b": "abc\t"}
322+
for i := 0; i < len(s); {
323+
if i+2 > len(s) {
324+
builder.WriteString(s[i:])
325+
break
326+
}
327+
switch s[i : i+2] {
328+
case `\t`:
329+
i += 2
330+
builder.WriteString(`\\t`)
331+
case `\n`:
332+
i += 2
333+
builder.WriteString(`\\n`)
334+
case `\r`:
335+
i += 2
336+
builder.WriteString(`\\r`)
337+
case `\f`:
338+
i += 2
339+
builder.WriteString(`\\f`)
340+
case `\\`:
341+
i += 2
342+
builder.WriteString(`\\\\`)
343+
default:
344+
builder.WriteString(s[i : i+1])
345+
i++
346+
}
347+
}
348+
349+
return builder.String()
350+
}
351+
314352
func constructByte(ctx context.Context, obj FeSession, bat *batch.Batch, index int32, ByteChan chan *BatchByte, ep *ExportConfig) {
315353
ses := obj.(*Session)
316354
symbol := ep.Symbol
317355
closeby := ep.userConfig.Fields.EnclosedBy.Value
318356
terminated := ep.userConfig.Fields.Terminated.Value
319357
flag := ep.ColumnFlag
320358

359+
escape = closeby
360+
321361
buffer := &bytes.Buffer{}
322362

323363
for i := 0; i < bat.RowCount(); i++ {
@@ -329,7 +369,12 @@ func constructByte(ctx context.Context, obj FeSession, bat *batch.Batch, index i
329369
switch vec.GetType().Oid { //get col
330370
case types.T_json:
331371
val := types.DecodeJson(vec.GetBytesAt(i))
332-
formatOutputString(ep, []byte(formatJsonString(val.String(), flag[j], terminated)), symbol[j], closeby, flag[j], buffer)
372+
formatOutputString(ep, []byte(formatJsonString(
373+
escapeJSONControlChars(
374+
val.String(),
375+
),
376+
flag[j], terminated),
377+
), symbol[j], closeby, flag[j], buffer)
333378
case types.T_bool:
334379
val := vector.GetFixedAtNoTypeCheck[bool](vec, i)
335380
if val {

pkg/frontend/export_test.go

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ package frontend
1717
import (
1818
"bytes"
1919
"context"
20+
"strings"
2021
"testing"
2122

2223
"github.com/prashantv/gostub"
@@ -301,3 +302,91 @@ func Test_exportDataToCSVFile(t *testing.T) {
301302
convey.So(exportDataFromResultSetToCSVFile(ep), convey.ShouldBeNil)
302303
})
303304
}
305+
306+
func TestEscapeJSONControl(t *testing.T) {
307+
tests := []struct {
308+
name string
309+
input string
310+
expected string
311+
}{
312+
{
313+
name: "empty string",
314+
input: "",
315+
expected: "",
316+
},
317+
{
318+
name: "normal characters",
319+
input: `{"a":"hello world 123"}`,
320+
expected: `{"a":"hello world 123"}`,
321+
},
322+
{
323+
name: "UTF-8 characters",
324+
input: `{"a":"hello 世界 😊"}`,
325+
expected: `{"a":"hello 世界 😊"}`,
326+
},
327+
{
328+
name: "single newline",
329+
input: `{"a":"abc` + `\n` + `def"}`,
330+
expected: `{"a":"abc\\ndef"}`,
331+
},
332+
{
333+
name: "single carriage return",
334+
input: `{"a":"abc` + `\r` + `def"}`,
335+
expected: `{"a":"abc\\rdef"}`,
336+
},
337+
{
338+
name: "single tab",
339+
input: `{"a":"abc` + `\t` + `def"}`,
340+
expected: `{"a":"abc\\tdef"}`,
341+
},
342+
{
343+
name: "single form feed",
344+
input: `{"a":"abc` + `\f` + `def"}`,
345+
expected: `{"a":"abc\\fdef"}`,
346+
},
347+
{
348+
name: "multiple newlines",
349+
input: `{"a":"abc` + `\n\n` + `def"}`,
350+
expected: `{"a":"abc\\n\\ndef"}`,
351+
},
352+
{
353+
name: "escaped sequences unchanged",
354+
input: `{"a":"abc\\n\\r\\t\\b\\fdef"}`,
355+
expected: `{"a":"abc\\\\n\\\\r\\\\t\\\\b\\\\fdef"}`,
356+
},
357+
{
358+
name: "mixed escaped and actual",
359+
input: `{"a":"abc\\n` + `\t` + `def\\r"}`,
360+
expected: `{"a":"abc\\\\n\\tdef\\\\r"}`,
361+
},
362+
{
363+
name: "control at start",
364+
input: `\n` + `{"a":"def"}`,
365+
expected: `\\n{"a":"def"}`,
366+
},
367+
{
368+
name: "control at end",
369+
input: `{"a":"def"}` + `\n`,
370+
expected: `{"a":"def"}\\n`,
371+
},
372+
{
373+
name: "long string with controls",
374+
input: strings.Repeat("a", 10000) + `\n` + strings.Repeat("b", 10000) + `\t` + strings.Repeat("c", 10000),
375+
expected: strings.Repeat("a", 10000) + `\\n` + strings.Repeat("b", 10000) + `\\t` + strings.Repeat("c", 10000),
376+
},
377+
{
378+
name: "backslash literal",
379+
input: `{"a":"abc\\def"}`,
380+
expected: `{"a":"abc\\\\def"}`,
381+
},
382+
}
383+
384+
for _, tt := range tests {
385+
t.Run(tt.name, func(t *testing.T) {
386+
res := escapeJSONControlChars(tt.input)
387+
if res != tt.expected {
388+
t.Errorf("EscapeJSONControl(%v) =\n%v,\nwant\n%v", tt.input, res, tt.expected)
389+
}
390+
})
391+
}
392+
}

test/distributed/cases/load_data/load_data.result

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1063,4 +1063,18 @@ load data infile '$resources/into_outfile/load_data/t1.csv' into table t1 ignore
10631063
select count(*) from t1;
10641064
count(*)
10651065
3
1066+
create table t2 (a int primary key, b json);
1067+
insert into t2 values(1, '{"key1":"你好\\t不\\r好\\f呀\\n\\\\"}');
1068+
insert into t2 values(2, '{"key2":"谢谢\\t你,\\r我非常\\f好\\n\\\\"}');
1069+
select * from t2 order by a asc;
1070+
a b
1071+
1 {"key1": "你好\t不\r好\f呀\n\\"}
1072+
2 {"key2": "谢谢\t你,\r我非常\f好\n\\"}
1073+
select * from t2 into outfile '$resources/into_outfile/load_data/t2.csv';
1074+
truncate table t2;
1075+
load data infile '$resources/into_outfile/load_data/t2.csv' into table t2 ignore 1 lines;
1076+
select * from t2 order by a asc;
1077+
a b
1078+
1 {"key1": "你好\t不\r好\f呀\n\\"}
1079+
2 {"key2": "谢谢\t你,\r我非常\f好\n\\"}
10661080
drop database test;

test/distributed/cases/load_data/load_data.sql

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -625,4 +625,17 @@ truncate table t1;
625625
load data infile '$resources/into_outfile/load_data/t1.csv' into table t1 ignore 1 lines;
626626
select count(*) from t1;
627627

628+
629+
create table t2 (a int primary key, b json);
630+
631+
insert into t2 values(1, '{"key1":"你好\\t不\\r好\\f呀\\n\\\\"}');
632+
insert into t2 values(2, '{"key2":"谢谢\\t你,\\r我非常\\f好\\n\\\\"}');
633+
select * from t2 order by a asc;
634+
635+
select * from t2 into outfile '$resources/into_outfile/load_data/t2.csv';
636+
truncate table t2;
637+
638+
load data infile '$resources/into_outfile/load_data/t2.csv' into table t2 ignore 1 lines;
639+
select * from t2 order by a asc;
640+
628641
drop database test;

0 commit comments

Comments
 (0)