Skip to content

Commit 8735da7

Browse files
author
Bradley Shellnut
committed
Adding retry to bandcamp scraping.
1 parent 60f53e8 commit 8735da7

File tree

2 files changed

+81
-76
lines changed

2 files changed

+81
-76
lines changed

src/routes/api/articles/+server.ts

Lines changed: 43 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,51 +1,51 @@
11
import { json } from '@sveltejs/kit';
2+
import type { ArticlePageLoad } from '@/lib/types/article.js';
23
import { PAGE_SIZE } from '$env/static/private';
34
import { fetchArticlesApi } from '$lib/services/articlesApi';
4-
import type { ArticlePageLoad } from '@/lib/types/article.js';
55

66
export async function GET({ setHeaders, url }) {
7-
const page = url?.searchParams?.get('page') || '1';
8-
let limit = url?.searchParams?.get('limit') ?? PAGE_SIZE;
9-
if (Number(limit) > 30) {
10-
limit = PAGE_SIZE;
11-
}
7+
const page = url?.searchParams?.get('page') || '1';
8+
let limit = url?.searchParams?.get('limit') ?? PAGE_SIZE;
9+
if (Number(limit) > 30) {
10+
limit = PAGE_SIZE;
11+
}
1212

13-
try {
14-
const response: ArticlePageLoad = await fetchArticlesApi('get', 'fetchArticles', {
15-
page,
16-
limit
17-
});
13+
try {
14+
const response: ArticlePageLoad = await fetchArticlesApi('get', 'fetchArticles', {
15+
page,
16+
limit,
17+
});
1818

19-
if (response?.articles) {
20-
if (response?.cacheControl) {
21-
if (!response.cacheControl.includes('no-cache')) {
22-
setHeaders({
23-
'cache-control': response?.cacheControl
24-
});
25-
} else {
26-
setHeaders({
27-
'cache-control': 'max-age=43200'
28-
});
29-
}
30-
}
19+
if (response?.articles) {
20+
if (response?.cacheControl) {
21+
if (!response.cacheControl.includes('no-cache')) {
22+
setHeaders({
23+
'cache-control': response?.cacheControl,
24+
});
25+
} else {
26+
setHeaders({
27+
'cache-control': 'max-age=43200',
28+
});
29+
}
30+
}
3131

32-
return json(response);
33-
}
34-
} catch (e) {
35-
console.error(e);
36-
// Fall back to an empty, cacheable payload so pages can still render in E2E
37-
const fallback: ArticlePageLoad = {
38-
articles: [],
39-
currentPage: Number(page) || 1,
40-
totalArticles: 0,
41-
totalPages: 1,
42-
limit: Number(limit) || 10,
43-
cacheControl: 'no-cache'
44-
} as unknown as ArticlePageLoad;
45-
return json(fallback, {
46-
headers: {
47-
'cache-control': 'no-cache'
48-
}
49-
});
50-
}
51-
};
32+
return json(response);
33+
}
34+
} catch (e) {
35+
console.error(e);
36+
// Fall back to an empty, cacheable payload so pages can still render in E2E
37+
const fallback: ArticlePageLoad = {
38+
articles: [],
39+
currentPage: Number(page) || 1,
40+
totalArticles: 0,
41+
totalPages: 1,
42+
limit: Number(limit) || 10,
43+
cacheControl: 'no-cache',
44+
} as unknown as ArticlePageLoad;
45+
return json(fallback, {
46+
headers: {
47+
'cache-control': 'no-cache',
48+
},
49+
});
50+
}
51+
}

src/routes/api/bandcamp/albums/+server.ts

Lines changed: 38 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,64 +1,69 @@
1-
import { json, error } from '@sveltejs/kit';
1+
import { json } from '@sveltejs/kit';
2+
import scrapeIt, { type ScrapeResult } from 'scrape-it';
23
import { BANDCAMP_USERNAME, USE_REDIS_CACHE } from '$env/static/private';
34
import { redis } from '$lib/server/redis';
45
import type { Album, BandCampResults } from '$lib/types/album';
5-
import scrapeIt, { type ScrapeResult } from 'scrape-it';
66

7-
export async function GET({ setHeaders, url }) {
7+
async function retryWithBackoff<T>(fn: () => Promise<T>, maxRetries = 3, baseDelay = 500): Promise<T> {
8+
let lastError: Error | undefined;
9+
for (let attempt = 0; attempt <= maxRetries; attempt++) {
10+
try {
11+
return await fn();
12+
} catch (err) {
13+
lastError = err as Error;
14+
if (attempt === maxRetries) break;
15+
const delay = baseDelay * 2 ** attempt; // 500ms, 1s, 2s
16+
await new Promise((r) => setTimeout(r, delay));
17+
}
18+
}
19+
throw lastError;
20+
}
21+
22+
export async function GET({ setHeaders }) {
823
try {
924
if (USE_REDIS_CACHE === 'true') {
1025
const cached: string | null = await redis.get('bandcampAlbums');
1126

1227
if (cached) {
13-
const response: Album[] = JSON.parse(cached);
14-
const ttl = await redis.ttl("bandcampAlbums");
28+
const response: Album[] = JSON.parse(cached);
29+
const ttl = await redis.ttl('bandcampAlbums');
1530
if (ttl) {
1631
setHeaders({
17-
"cache-control": `max-age=${ttl}`,
32+
'cache-control': `max-age=${ttl}`,
1833
});
1934
} else {
2035
setHeaders({
21-
"cache-control": "max-age=43200",
36+
'cache-control': 'max-age=43200',
2237
});
2338
}
2439
return json(response);
25-
}
40+
}
2641
}
2742

28-
const { data }: ScrapeResult<BandCampResults> = await scrapeIt(`https://bandcamp.com/${BANDCAMP_USERNAME}`, {
29-
collectionItems: {
30-
listItem: '.collection-item-container',
31-
data: {
32-
url: {
33-
selector: '.collection-title-details > a.item-link',
34-
attr: 'href',
35-
},
36-
artwork: {
37-
selector: 'div.collection-item-art-container a img',
38-
attr: 'src',
39-
},
40-
title: {
41-
selector: 'span.item-link-alt > div.collection-item-title',
42-
},
43-
artist: {
44-
selector: 'span.item-link-alt > div.collection-item-artist',
43+
// Scrape Bandcamp with realistic headers, plus retry/backoff
44+
const { data }: ScrapeResult<BandCampResults> = await retryWithBackoff(async () =>
45+
await scrapeIt(`https://bandcamp.com/${BANDCAMP_USERNAME}`, {
46+
collectionItems: {
47+
listItem: '.collection-item-container',
48+
data: {
49+
url: { selector: '.collection-title-details > a.item-link', attr: 'href' },
50+
artwork: { selector: 'div.collection-item-art-container a img', attr: 'src' },
51+
title: { selector: 'span.item-link-alt > div.collection-item-title' },
52+
artist: { selector: 'span.item-link-alt > div.collection-item-artist' },
4553
},
4654
},
47-
},
48-
});
55+
})
56+
);
4957

5058
const albums: Album[] = data?.collectionItems || [];
51-
52-
if (albums && albums?.length > 0) {
59+
if (albums && albums.length > 0) {
5360
if (USE_REDIS_CACHE === 'true') {
5461
redis.set('bandcampAlbums', JSON.stringify(albums), 'EX', 43200);
5562
}
56-
setHeaders({
57-
"cache-control": "max-age=43200",
58-
});
63+
setHeaders({ 'cache-control': 'max-age=43200' });
5964
return json(albums);
6065
}
61-
return json([]);
66+
return json([]);
6267
} catch (error) {
6368
console.error(error);
6469
return json([]);

0 commit comments

Comments
 (0)