Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 35 additions & 5 deletions aklapi.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,16 @@ var (
)

// userAgent is sent with all outgoing HTTP requests. The Auckland Council
// website CDN (Fastly) returns 406 for requests that identify as Go's default
// http client, so we send a browser-compatible value instead.
const userAgent = "Mozilla/5.0 (compatible; aklapi/1.0)"
// collection page returns 406 unless requests resemble a modern browser.
const userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36"

const (
defaultAccept = "application/json, text/html, */*"
browserAccept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8"
browserAcceptLanguage = "en-NZ,en;q=0.9,en-US;q=0.8"
secCHUA = "\"Chromium\";v=\"135\", \"Not.A/Brand\";v=\"8\""
secCHUAPlatform = "\"macOS\""
)

// browserTransport is an http.RoundTripper that adds browser-like headers to
// every request before forwarding it to the underlying transport.
Expand All @@ -27,7 +34,30 @@ func (t *browserTransport) RoundTrip(req *http.Request) (*http.Response, error)
r.Header.Set("User-Agent", userAgent)
}
if r.Header.Get("Accept") == "" {
r.Header.Set("Accept", "application/json, text/html, */*")
r.Header.Set("Accept", defaultAccept)
}
return t.wrapped.RoundTrip(r)
if r.Header.Get("Accept-Language") == "" {
r.Header.Set("Accept-Language", browserAcceptLanguage)
}

wrapped := t.wrapped
if wrapped == nil {
wrapped = http.DefaultTransport
}
return wrapped.RoundTrip(r)
}

// setBrowserDocumentHeaders applies the navigation-style headers required by
// the collection HTML page.
func setBrowserDocumentHeaders(r *http.Request) {
r.Header.Set("Accept", browserAccept)
r.Header.Set("Cache-Control", "max-age=0")
r.Header.Set("Upgrade-Insecure-Requests", "1")
r.Header.Set("Sec-Fetch-Site", "none")
r.Header.Set("Sec-Fetch-Mode", "navigate")
r.Header.Set("Sec-Fetch-User", "?1")
r.Header.Set("Sec-Fetch-Dest", "document")
r.Header.Set("Sec-CH-UA", secCHUA)
r.Header.Set("Sec-CH-UA-Mobile", "?0")
r.Header.Set("Sec-CH-UA-Platform", secCHUAPlatform)
}
2 changes: 2 additions & 0 deletions rubbish.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,12 +120,14 @@ func fetchandparse(ctx context.Context, addressID string) (*CollectionDayDetailR
if err != nil {
return nil, err
}
setBrowserDocumentHeaders(req)
resp, err := collectionHTTPClient.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
slog.WarnContext(ctx, "collection request failed", "status", resp.StatusCode, "url", req.URL.String())
return nil, fmt.Errorf("collection API returned status code: %d", resp.StatusCode)
}
return parse(resp.Body)
Expand Down
143 changes: 143 additions & 0 deletions rubbish_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,149 @@ func TestFetchAndParse_StatusCodeError(t *testing.T) {
}
}

func TestCollectionRequestHeaders(t *testing.T) {
t.Cleanup(func() {
NoCache = false
})
NoCache = true

t.Run("fetchandparse uses browser headers", func(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if got := r.Header.Get("User-Agent"); got != userAgent {
http.Error(w, "missing browser user-agent", http.StatusNotAcceptable)
return
}
if got := r.Header.Get("Accept"); got != browserAccept {
http.Error(w, "missing browser accept", http.StatusNotAcceptable)
return
}
if got := r.Header.Get("Accept-Language"); got != browserAcceptLanguage {
http.Error(w, "missing browser accept-language", http.StatusNotAcceptable)
return
}
if got := r.Header.Get("Cache-Control"); got != "max-age=0" {
http.Error(w, "missing cache-control", http.StatusNotAcceptable)
return
}
if got := r.Header.Get("Upgrade-Insecure-Requests"); got != "1" {
http.Error(w, "missing upgrade-insecure-requests", http.StatusNotAcceptable)
return
}
if got := r.Header.Get("Sec-Fetch-Site"); got != "none" {
http.Error(w, "missing sec-fetch-site", http.StatusNotAcceptable)
return
}
if got := r.Header.Get("Sec-Fetch-Mode"); got != "navigate" {
http.Error(w, "missing sec-fetch-mode", http.StatusNotAcceptable)
return
}
if got := r.Header.Get("Sec-Fetch-User"); got != "?1" {
http.Error(w, "missing sec-fetch-user", http.StatusNotAcceptable)
return
}
if got := r.Header.Get("Sec-Fetch-Dest"); got != "document" {
http.Error(w, "missing sec-fetch-dest", http.StatusNotAcceptable)
return
}
if got := r.Header.Get("Sec-CH-UA"); got != secCHUA {
http.Error(w, "missing sec-ch-ua", http.StatusNotAcceptable)
return
}
if got := r.Header.Get("Sec-CH-UA-Mobile"); got != "?0" {
http.Error(w, "missing sec-ch-ua-mobile", http.StatusNotAcceptable)
return
}
if got := r.Header.Get("Sec-CH-UA-Platform"); got != secCHUAPlatform {
http.Error(w, "missing sec-ch-ua-platform", http.StatusNotAcceptable)
return
}
w.Write([]byte(taRsd1LuandaDrive))
}))
t.Cleanup(srv.Close)

oldURI := collectionDayURI
oldClient := collectionHTTPClient
t.Cleanup(func() {
collectionDayURI = oldURI
collectionHTTPClient = oldClient
})

collectionDayURI = srv.URL + "/rubbish/%s"
collectionHTTPClient = &http.Client{
Timeout: time.Second,
Transport: &browserTransport{wrapped: srv.Client().Transport},
}

got, err := fetchandparse(t.Context(), "42")
if err != nil {
t.Fatalf("fetchandparse() error = %v", err)
}
if got == nil {
t.Fatal("fetchandparse() returned nil result")
}
})

t.Run("collection day detail recovers from 406 with browser headers", func(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch r.URL.Path {
case "/addr":
writeAddrJSON(w, AddrResponse{Items: []Address{*testAddr}})
case "/rubbish/42":
if r.Header.Get("User-Agent") == userAgent &&
r.Header.Get("Accept") == browserAccept &&
r.Header.Get("Accept-Language") == browserAcceptLanguage &&
r.Header.Get("Cache-Control") == "max-age=0" &&
r.Header.Get("Upgrade-Insecure-Requests") == "1" &&
r.Header.Get("Sec-Fetch-Site") == "none" &&
r.Header.Get("Sec-Fetch-Mode") == "navigate" &&
r.Header.Get("Sec-Fetch-User") == "?1" &&
r.Header.Get("Sec-Fetch-Dest") == "document" &&
r.Header.Get("Sec-CH-UA") == secCHUA &&
r.Header.Get("Sec-CH-UA-Mobile") == "?0" &&
r.Header.Get("Sec-CH-UA-Platform") == secCHUAPlatform {
w.Write([]byte(taRsd1LuandaDrive))
return
}
http.Error(w, "collection API returned status code: 406", http.StatusNotAcceptable)
default:
http.NotFound(w, r)
}
}))
t.Cleanup(srv.Close)

oldAddrURI := addrURI
oldCollectionDayURI := collectionDayURI
oldAddrClient := addrHTTPClient
oldCollectionClient := collectionHTTPClient
t.Cleanup(func() {
addrURI = oldAddrURI
collectionDayURI = oldCollectionDayURI
addrHTTPClient = oldAddrClient
collectionHTTPClient = oldCollectionClient
})

addrURI = srv.URL + "/addr"
collectionDayURI = srv.URL + "/rubbish/%s"
addrHTTPClient = &http.Client{
Timeout: time.Second,
Transport: &browserTransport{wrapped: srv.Client().Transport},
}
collectionHTTPClient = &http.Client{
Timeout: time.Second,
Transport: &browserTransport{wrapped: srv.Client().Transport},
}

got, err := CollectionDayDetail(t.Context(), "500 Queen Street")
if err != nil {
t.Fatalf("CollectionDayDetail() error = %v", err)
}

assert.NotNil(t, got)
assert.Equal(t, testAddr, got.Address)
assert.Len(t, got.Collections, 3)
})
}

func TestCollectionDayDetailResult_NextRubbish(t *testing.T) {
type fields struct {
Collections []RubbishCollection
Expand Down