Skip to content

Commit 872cbcf

Browse files
authored
Merge pull request #835 from googlecodelabs/image-urls
Support data URLs in Google Doc exports.
2 parents 8b5107f + 9fff679 commit 872cbcf

File tree

5 files changed

+113
-27
lines changed

5 files changed

+113
-27
lines changed

claat/fetch/fetch.go

Lines changed: 36 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,7 @@ func (f *Fetcher) SlurpImages(src, dir string, n []nodes.Node, images map[string
229229
for _, imageNode := range imageNodes {
230230
go func(imageNode *nodes.ImageNode) {
231231
url := imageNode.Src
232-
file, err := f.slurpBytes(src, dir, url)
232+
file, err := f.slurpBytes(src, dir, url, imageNode.Bytes)
233233
if err == nil {
234234
imageNode.Src = filepath.Join(util.ImgDirname, file)
235235
}
@@ -251,40 +251,52 @@ func (f *Fetcher) SlurpImages(src, dir string, n []nodes.Node, images map[string
251251
return nil
252252
}
253253

254-
func (f *Fetcher) slurpBytes(codelabSrc, dir, imgURL string) (string, error) {
255-
// images can be local in Markdown cases or remote.
254+
func (f *Fetcher) slurpBytes(codelabSrc, dir, imgURL string, imgBytes []byte) (string, error) {
255+
// images can be data URLs, local in Markdown cases or remote.
256256
// Only proceed a simple copy on local reference.
257257
var b []byte
258258
var ext string
259-
u, err := url.Parse(imgURL)
260-
if err != nil {
261-
return "", err
262-
}
263-
264-
// If the codelab source is being downloaded from the network, then we should interpret
265-
// the image URL in the same way.
266-
srcUrl, err := url.Parse(codelabSrc)
267-
if err == nil && srcUrl.Host != "" {
268-
u = srcUrl.ResolveReference(u)
269-
}
259+
var err error
270260

271-
if u.Host == "" {
272-
if imgURL, err = restrictPathToParent(imgURL, filepath.Dir(codelabSrc)); err != nil {
273-
return "", err
261+
if len(imgBytes) > 0 {
262+
// Slurp bytes from image URL data.
263+
b = imgBytes
264+
if ext, err = imgExtFromBytes(b); err != nil {
265+
return "", fmt.Errorf("Error reading image type: %v", err)
274266
}
275-
if b, err = ioutil.ReadFile(imgURL); err != nil {
267+
} else {
268+
// Slurp bytes from local or remote URL.
269+
u, err := url.Parse(imgURL)
270+
if err != nil {
276271
return "", err
277272
}
278-
ext = filepath.Ext(imgURL)
279-
} else {
280-
if b, err = f.slurpRemoteBytes(u.String(), 5); err != nil {
281-
return "", fmt.Errorf("Error downloading image at %s: %v", u.String(), err)
273+
274+
// If the codelab source is being downloaded from the network, then we should interpret
275+
// the image URL in the same way.
276+
srcURL, err := url.Parse(codelabSrc)
277+
if err == nil && srcURL.Host != "" {
278+
u = srcURL.ResolveReference(u)
282279
}
283-
if ext, err = imgExtFromBytes(b); err != nil {
284-
return "", fmt.Errorf("Error reading image type at %s: %v", u.String(), err)
280+
281+
if u.Host == "" {
282+
if imgURL, err = restrictPathToParent(imgURL, filepath.Dir(codelabSrc)); err != nil {
283+
return "", err
284+
}
285+
if b, err = ioutil.ReadFile(imgURL); err != nil {
286+
return "", err
287+
}
288+
ext = filepath.Ext(imgURL)
289+
} else {
290+
if b, err = f.slurpRemoteBytes(u.String(), 5); err != nil {
291+
return "", fmt.Errorf("Error downloading image at %s: %v", u.String(), err)
292+
}
293+
if ext, err = imgExtFromBytes(b); err != nil {
294+
return "", fmt.Errorf("Error reading image type at %s: %v", u.String(), err)
295+
}
285296
}
286297
}
287298

299+
// Generate image file from slurped bytes.
288300
crc := crc64.Checksum(b, f.crcTable)
289301
file := fmt.Sprintf("%x%s", crc, ext)
290302
dst := filepath.Join(dir, file)

claat/nodes/image.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ type NewImageNodeOptions struct {
77
Width float32
88
Alt string
99
Title string
10+
Bytes []byte
1011
}
1112

1213
// NewImageNode creates a new ImageNode with the given options.
@@ -18,6 +19,7 @@ func NewImageNode(opts NewImageNodeOptions) *ImageNode {
1819
Width: opts.Width,
1920
Alt: opts.Alt,
2021
Title: opts.Title,
22+
Bytes: opts.Bytes,
2123
}
2224
}
2325

@@ -28,11 +30,12 @@ type ImageNode struct {
2830
Width float32
2931
Alt string
3032
Title string
33+
Bytes []byte
3134
}
3235

3336
// Empty returns true if its Src is zero, excluding space runes.
3437
func (in *ImageNode) Empty() bool {
35-
return strings.TrimSpace(in.Src) == ""
38+
return strings.TrimSpace(in.Src) == "" && len(in.Bytes) == 0
3639
}
3740

3841
// ImageNodes extracts everything except NodeImage nodes, recursively.

claat/nodes/image_test.go

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
package nodes
22

33
import (
4+
"encoding/base64"
45
"testing"
56

67
"github.com/google/go-cmp/cmp"
78
)
89

10+
var testBytes, _ = base64.StdEncoding.DecodeString("R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7")
11+
912
func TestNewImageNode(t *testing.T) {
1013
tests := []struct {
1114
name string
@@ -19,7 +22,7 @@ func TestNewImageNode(t *testing.T) {
1922
},
2023
},
2124
{
22-
name: "NonEmpty",
25+
name: "StandardURL",
2326
inOpts: NewImageNodeOptions{
2427
Src: "https://www.google.com/images/branding/googlelogo/1x/googlelogo_color_272x92dp.png",
2528
Width: 1.0,
@@ -34,6 +37,22 @@ func TestNewImageNode(t *testing.T) {
3437
Alt: "bar",
3538
},
3639
},
40+
{
41+
name: "DataURL",
42+
inOpts: NewImageNodeOptions{
43+
Width: 1.0,
44+
Title: "foo",
45+
Alt: "bar",
46+
Bytes: testBytes,
47+
},
48+
out: &ImageNode{
49+
node: node{typ: NodeImage},
50+
Width: 1.0,
51+
Title: "foo",
52+
Alt: "bar",
53+
Bytes: testBytes,
54+
},
55+
},
3756
}
3857
for _, tc := range tests {
3958
t.Run(tc.name, func(t *testing.T) {

claat/parser/gdoc/parse.go

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ package gdoc
1616

1717
import (
1818
"bytes"
19+
"encoding/base64"
1920
"fmt"
2021
"io"
2122
"net/url"
@@ -697,12 +698,32 @@ func image(ds *docState) nodes.Node {
697698
errorAlt = "The domain of the requested iframe (" + u.Hostname() + ") has not been whitelisted."
698699
fmt.Fprint(os.Stderr, errorAlt+"\n")
699700
}
701+
702+
var imageBytes []byte
703+
var imageSrc string
700704
s := nodeAttr(ds.cur, "src")
701705
if s == "" {
702706
return nil
707+
} else if strings.HasPrefix(s, "data:") {
708+
_, data, ok := strings.Cut(s, ",")
709+
if !ok {
710+
fmt.Fprint(os.Stderr, "Failed to decode data URL: "+s+" \n")
711+
return nil
712+
}
713+
b, err := base64.StdEncoding.DecodeString(data)
714+
if err != nil {
715+
fmt.Fprint(os.Stderr, "Failed to decode data URL: "+s+"\n"+err.Error()+"\n")
716+
return nil
717+
}
718+
imageSrc = ""
719+
imageBytes = b
720+
} else {
721+
imageSrc = s
722+
imageBytes = []byte{}
703723
}
704724
n := nodes.NewImageNode(nodes.NewImageNodeOptions{
705-
Src: s,
725+
Src: imageSrc,
726+
Bytes: imageBytes,
706727
Width: styleFloatValue(ds.cur, "width"),
707728
})
708729
n.MutateBlock(findBlockParent(ds.cur))

claat/parser/gdoc/parse_test.go

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ package gdoc
1616

1717
import (
1818
"bytes"
19+
"encoding/base64"
1920
"io"
2021
"reflect"
2122
"strings"
@@ -295,6 +296,9 @@ func TestParseDoc(t *testing.T) {
295296
<p><span>[[</span><span class="bold">import</span><span>&nbsp;</span><span><a href="https://example.com/import">shared</a></span><span>]]</span></p>
296297
297298
<img src="https://host/image.png" alt="alt text" title="title text">
299+
<p><img alt="JPEG" src="data:image/jpeg;base64,/9j/2wBDAP//////////////////////////////////////////////////////////////////////////////////////wAALCAABAAEBAREA/8QAFAABAAAAAAAAAAAAAAAAAAAAA//EABQQAQAAAAAAAAAAAAAAAAAAAAD/2gAIAQEAAD8AN//Z"></p>
300+
<p><img alt="GIF" src="data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7"></p>
301+
<p><img alt="PNG" src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVQYV2NgYAAAAAMAAWgmWQ0AAAAASUVORK5CYII="></p>
298302
<p><img src="https://host/small.png" style="height: 10px; width: 25.5px"> icon.</p>
299303
300304
<p><img alt="https://www.youtube.com/watch?v=vid" src="https://yt.com/vid.jpg"></p>
@@ -405,6 +409,33 @@ func TestParseDoc(t *testing.T) {
405409
para.MutateBlock(true)
406410
content.Append(para)
407411

412+
bytes, _ := base64.StdEncoding.DecodeString("/9j/2wBDAP//////////////////////////////////////////////////////////////////////////////////////wAALCAABAAEBAREA/8QAFAABAAAAAAAAAAAAAAAAAAAAA//EABQQAQAAAAAAAAAAAAAAAAAAAAD/2gAIAQEAAD8AN//Z")
413+
img = nodes.NewImageNode(nodes.NewImageNodeOptions{
414+
Bytes: bytes,
415+
Alt: "JPEG",
416+
})
417+
para = nodes.NewListNode(img)
418+
para.MutateBlock(true)
419+
content.Append(para)
420+
421+
bytes, _ = base64.StdEncoding.DecodeString("R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7")
422+
img = nodes.NewImageNode(nodes.NewImageNodeOptions{
423+
Bytes: bytes,
424+
Alt: "GIF",
425+
})
426+
para = nodes.NewListNode(img)
427+
para.MutateBlock(true)
428+
content.Append(para)
429+
430+
bytes, _ = base64.StdEncoding.DecodeString("iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVQYV2NgYAAAAAMAAWgmWQ0AAAAASUVORK5CYII=")
431+
img = nodes.NewImageNode(nodes.NewImageNodeOptions{
432+
Bytes: bytes,
433+
Alt: "PNG",
434+
})
435+
para = nodes.NewListNode(img)
436+
para.MutateBlock(true)
437+
content.Append(para)
438+
408439
img = nodes.NewImageNode(nodes.NewImageNodeOptions{
409440
Src: "https://host/small.png",
410441
Width: 25.5,

0 commit comments

Comments
 (0)