From 9285116c5eb763d183f6c6001d5429512da8cfc3 Mon Sep 17 00:00:00 2001 From: GulSam00 Date: Wed, 7 Jan 2026 01:06:13 +0900 Subject: [PATCH 1/7] =?UTF-8?q?fix=20:=20yaml=20=ED=8C=8C=EC=9D=BC=20node?= =?UTF-8?q?=20=EB=B2=84=EC=A0=84=2018=20->=2020?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/crawl-recent-tj.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/crawl-recent-tj.yml b/.github/workflows/crawl-recent-tj.yml index 057a080..8e61263 100644 --- a/.github/workflows/crawl-recent-tj.yml +++ b/.github/workflows/crawl-recent-tj.yml @@ -12,10 +12,10 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Use Node.js 18 + - name: Use Node.js 20 uses: actions/setup-node@v4 with: - node-version: "18" + node-version: "20" - name: Install pnpm uses: pnpm/action-setup@v2 From fe565c999db3ce0935e8f8ab3e3da20aa1d43904 Mon Sep 17 00:00:00 2001 From: GulSam00 Date: Thu, 8 Jan 2026 00:21:25 +0900 Subject: [PATCH 2/7] =?UTF-8?q?fix=20:=20ky-youtube=20action=20=ED=8C=8C?= =?UTF-8?q?=EC=9D=BC=20=EC=9E=AC=EC=8B=9C=EB=8F=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/{temp => workflows}/update-ky-youtube.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) rename .github/{temp => workflows}/update-ky-youtube.yml (96%) diff --git a/.github/temp/update-ky-youtube.yml b/.github/workflows/update-ky-youtube.yml similarity index 96% rename from .github/temp/update-ky-youtube.yml rename to .github/workflows/update-ky-youtube.yml index 322a7e8..c607433 100644 --- a/.github/temp/update-ky-youtube.yml +++ b/.github/workflows/update-ky-youtube.yml @@ -20,10 +20,10 @@ jobs: ref: feat/songUpdate persist-credentials: false # 수동 인증으로 푸시 제어 - - name: Use Node.js 18 + - name: Use Node.js 20 uses: actions/setup-node@v4 with: - node-version: "18" + node-version: "20" - name: Install pnpm uses: pnpm/action-setup@v2 From 37baf233bf75a9132e77e125b529c0467ffb2715 Mon Sep 17 00:00:00 2001 From: GulSam00 Date: Thu, 8 Jan 2026 00:22:05 +0900 Subject: [PATCH 3/7] =?UTF-8?q?fix=20:=20gemini=20=ED=99=9C=EC=9A=A9=20cra?= =?UTF-8?q?wlYoutube=20=EB=A6=AC=ED=8E=99=ED=86=A0=EB=A7=81,=20=EC=BD=94?= =?UTF-8?q?=EB=93=9C=20=EA=B5=AC=EC=A1=B0=20=EB=B0=8F=20=EC=95=88=EC=A0=95?= =?UTF-8?q?=EC=84=B1=20=EA=B0=9C=EC=84=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../crawling/src/crawling/crawlYoutube.ts | 208 +++++++++++------- .../crawling/src/crawling/crawlYoutubeTemp.ts | 109 +++++++++ 2 files changed, 238 insertions(+), 79 deletions(-) create mode 100644 packages/crawling/src/crawling/crawlYoutubeTemp.ts diff --git a/packages/crawling/src/crawling/crawlYoutube.ts b/packages/crawling/src/crawling/crawlYoutube.ts index d4280c7..10f8f16 100644 --- a/packages/crawling/src/crawling/crawlYoutube.ts +++ b/packages/crawling/src/crawling/crawlYoutube.ts @@ -1,109 +1,159 @@ import * as cheerio from 'cheerio'; -import puppeteer from 'puppeteer'; +import puppeteer, { Browser, Page } from 'puppeteer'; import { getInvalidKYSongsDB, getSongsKyNullDB } from '@/supabase/getDB'; import { postInvalidKYSongsDB } from '@/supabase/postDB'; import { updateSongsKyDB } from '@/supabase/updateDB'; import { Song } from '@/types'; -import { saveCrawlYoutubeFailedKYSongs, updateDataLog } from '@/utils/logData'; +import { updateDataLog } from '@/utils/logData'; import { isValidKYExistNumber } from './isValidKYExistNumber'; -// youtube에서 KY 노래방 번호 크롤링 -// crawlYoutubeValid에서 진행하는 실제 사이트 검증도 포함 +// --- Constants --- +const BASE_YOUTUBE_SEARCH_URL = 'https://www.youtube.com/@KARAOKEKY/search'; -// action 우분투 환경에서의 호환을 위해 추가 -const browser = await puppeteer.launch({ - headless: true, -}); +// --- Helper Functions --- -const page = await browser.newPage(); - -const baseUrl = 'https://www.youtube.com/@KARAOKEKY/search'; - -const scrapeSongNumber = async (query: string) => { - const searchUrl = `${baseUrl}?query=${encodeURIComponent(query)}`; - - // page.goto의 waitUntil 문제였음! - await page.goto(searchUrl, { - waitUntil: 'networkidle2', - timeout: 0, - }); +/** + * 텍스트에서 KY 노래방 번호를 추출합니다. + */ +const extractKaraokeNumber = (title: string): string | null => { + const matchResult = title.match(/KY\.\s*(\d{2,5})\)/); + return matchResult ? matchResult[1] : null; +}; - const html = await page.content(); - const $ = cheerio.load(html); +/** + * 유튜브 검색 결과 페이지에서 노래 번호를 스크래핑합니다. + */ +const scrapeSongNumber = async (page: Page, query: string): Promise => { + const searchUrl = `${BASE_YOUTUBE_SEARCH_URL}?query=${encodeURIComponent(query)}`; - // id contents 의 첫번째 ytd-item-section-renderer 찾기 - // const firstItem = $("#contents ytd-item-section-renderer").first(); + try { + // waitUntil을 통해 네트워크가 안정될 때까지 대기 + // 30초 타임아웃 설정 (무한 대기 방지) + await page.goto(searchUrl, { + waitUntil: 'networkidle2', + // timeout: 0, + }); - const firstItem = $('ytd-video-renderer').first(); + const html = await page.content(); + const $ = cheerio.load(html); - // yt-formatted-string 찾기 - const title = firstItem.find('yt-formatted-string').first().text().trim(); + const firstItem = $('ytd-video-renderer').first(); - const karaokeNumber = extractKaraokeNumber(title); + // 검색 결과가 없는 경우 처리 + if (firstItem.length === 0) { + return null; + } - return karaokeNumber; + const title = firstItem.find('yt-formatted-string').first().text().trim(); + return extractKaraokeNumber(title); + } catch (error) { + console.warn(`[Scraping Failed] Query: ${query}`, error); + return null; + } }; -const extractKaraokeNumber = (title: string) => { - // KY. 찾고 ) 가 올때까지 찾기 - const matchResult = title.match(/KY\.\s*(\d{2,5})\)/); - const karaokeNumber = matchResult ? matchResult[1] : null; - return karaokeNumber; +/** + * 성공한 데이터를 DB에 업데이트하고 로그를 남깁니다. + */ +const handleSuccess = async (song: Song, kyNum: string) => { + const result = await updateSongsKyDB({ ...song, num_ky: kyNum }); + // console.log(`[Update Success] ${song.title}: ${kyNum}`, result); // 로그 너무 많으면 주석 처리 + updateDataLog(result.success, 'crawlYoutubeSuccess.txt'); }; -const updateData = async (data: Song) => { - const result = await updateSongsKyDB(data); - console.log(result); - updateDataLog(result.success, 'crawlYoutubeSuccess.txt'); - updateDataLog(result.failed, 'crawlYoutubeFailed.txt'); +/** + * 실패한 데이터를 Invalid DB에 저장하고 로그를 남깁니다. + */ +const handleFailure = async (song: Song) => { + await postInvalidKYSongsDB(song); + updateDataLog(false, 'crawlYoutubeFailed.txt'); // false 로그 처리 방식에 따라 수정 필요 }; -// failedSongs을 가져와서 실패한 노래를 건너뛰는 게 아니라 실패 시 update_date를 수정해 작업 순위를 뒤로 미룬다면? -const data = await getSongsKyNullDB(); -const failedSongs = await getInvalidKYSongsDB(); +// --- Main Logic --- -console.log('getSongsKyNullDB : ', data.length); -console.log('failedSongs : ', failedSongs.length); -let index = 0; -let successCount = 0; +const main = async () => { + console.log('🚀 크롤링 작업을 시작합니다...'); -for (const song of data) { - if (failedSongs.find(failedSong => failedSong.id === song.id)) { - continue; - } - const query = song.title + '-' + song.artist; + // 1. 브라우저 초기화 + const browser = await puppeteer.launch({ + headless: true, + args: ['--no-sandbox', '--disable-setuid-sandbox'], // 리눅스 환경 호환성 + }); - let resultKyNum = null; try { - resultKyNum = await scrapeSongNumber(query); - } catch (error) { - continue; - } - - if (resultKyNum) { - let isValid = true; - try { - isValid = await isValidKYExistNumber(page, resultKyNum, song.title, song.artist); - } catch (error) { - continue; + const page = await browser.newPage(); + + // 2. 데이터 가져오기 + // Promise.all로 병렬 요청하여 대기 시간 단축 + const [targetSongs, failedSongs] = await Promise.all([ + getSongsKyNullDB(), + getInvalidKYSongsDB(), + ]); + + console.log(`📊 처리 대상 곡: ${targetSongs.length}개`); + console.log(`🚫 이미 실패한 곡: ${failedSongs.length}개`); + + // 3. 최적화: 실패한 곡 ID를 Set으로 변환 (검색 속도 O(1)로 향상) + const failedSongIds = new Set(failedSongs.map(s => s.id)); + + let processedCount = 0; + let successCount = 0; + + // 4. 순차 처리 루프 + for (const song of targetSongs) { + processedCount++; + const query = `${song.title}-${song.artist}`; + + // 4-1. 이미 실패했던 곡은 스킵 + if (failedSongIds.has(song.id)) { + continue; + } + + console.log(`[${processedCount}/${targetSongs.length}] 검색 중: ${query}`); + + // 4-2. 스크래핑 시도 + const resultKyNum = await scrapeSongNumber(page, query); + + if (!resultKyNum) { + // 검색 결과 없음 -> 실패 처리 + await handleFailure(song); + continue; + } + + // 4-3. 번호 유효성 검증 (실제 존재하는 번호인지 2차 확인) + let isValid = false; + try { + isValid = await isValidKYExistNumber(page, resultKyNum, song.title, song.artist); + } catch (error) { + console.error(`❌ 검증 중 에러 발생: ${query}`, error); + // 검증 에러 시 일단 실패 처리하거나 continue + continue; + } + + if (isValid) { + // 성공 처리 + await handleSuccess(song, resultKyNum); + successCount++; + console.log(`✅ 업데이트 완료: ${resultKyNum}`); + } else { + // 유효하지 않은 번호 -> 실패 처리 + await handleFailure(song); + console.log(`⚠️ 유효하지 않은 번호: ${resultKyNum}`); + } } - if (!isValid) { - await postInvalidKYSongsDB(song); - continue; - } else { - await updateData({ ...song, num_ky: resultKyNum }); - console.log('update song : ', resultKyNum); - successCount++; - } - } else await postInvalidKYSongsDB(song); - - index++; - console.log(query); - console.log('scrapeSongNumber : ', index); - console.log('successCount : ', successCount); -} + console.log('------------------------------------------------'); + console.log(`🎉 모든 작업 완료! 총 성공: ${successCount}건`); + } catch (error) { + console.error('🔥 치명적인 에러 발생:', error); + } finally { + // 5. 종료 처리: 에러가 나든 안 나든 브라우저는 반드시 닫음 + await browser.close(); + console.log('🔒 브라우저 종료됨'); + } +}; -browser.close(); +// 스크립트 실행 +main(); diff --git a/packages/crawling/src/crawling/crawlYoutubeTemp.ts b/packages/crawling/src/crawling/crawlYoutubeTemp.ts new file mode 100644 index 0000000..2cf7075 --- /dev/null +++ b/packages/crawling/src/crawling/crawlYoutubeTemp.ts @@ -0,0 +1,109 @@ +import * as cheerio from 'cheerio'; +import puppeteer from 'puppeteer'; + +import { getInvalidKYSongsDB, getSongsKyNullDB } from '@/supabase/getDB'; +import { postInvalidKYSongsDB } from '@/supabase/postDB'; +import { updateSongsKyDB } from '@/supabase/updateDB'; +import { Song } from '@/types'; +import { saveCrawlYoutubeFailedKYSongs, updateDataLog } from '@/utils/logData'; + +import { isValidKYExistNumber } from './isValidKYExistNumber'; + +// youtube에서 KY 노래방 번호 크롤링 +// crawlYoutubeValid에서 진행하는 실제 사이트 검증도 포함 + +// action 우분투 환경에서의 호환을 위해 추가 +const browser = await puppeteer.launch({ + headless: true, +}); + +const page = await browser.newPage(); + +const baseUrl = 'https://www.youtube.com/@KARAOKEKY/search'; + +const scrapeSongNumber = async (query: string) => { + const searchUrl = `${baseUrl}?query=${encodeURIComponent(query)}`; + + // page.goto의 waitUntil 문제였음! + await page.goto(searchUrl, { + waitUntil: 'networkidle2', + // timeout: 0, + }); + + const html = await page.content(); + const $ = cheerio.load(html); + + // id contents 의 첫번째 ytd-item-section-renderer 찾기 + // const firstItem = $("#contents ytd-item-section-renderer").first(); + + const firstItem = $('ytd-video-renderer').first(); + + // yt-formatted-string 찾기 + const title = firstItem.find('yt-formatted-string').first().text().trim(); + + const karaokeNumber = extractKaraokeNumber(title); + + return karaokeNumber; +}; + +const extractKaraokeNumber = (title: string) => { + // KY. 찾고 ) 가 올때까지 찾기 + const matchResult = title.match(/KY\.\s*(\d{2,5})\)/); + const karaokeNumber = matchResult ? matchResult[1] : null; + return karaokeNumber; +}; + +const updateData = async (data: Song) => { + const result = await updateSongsKyDB(data); + console.log(result); + updateDataLog(result.success, 'crawlYoutubeSuccess.txt'); + updateDataLog(result.failed, 'crawlYoutubeFailed.txt'); +}; + +// failedSongs을 가져와서 실패한 노래를 건너뛰는 게 아니라 실패 시 update_date를 수정해 작업 순위를 뒤로 미룬다면? +const data = await getSongsKyNullDB(); +const failedSongs = await getInvalidKYSongsDB(); + +console.log('getSongsKyNullDB : ', data.length); +console.log('failedSongs : ', failedSongs.length); +let index = 0; +let successCount = 0; + +for (const song of data) { + if (failedSongs.find(failedSong => failedSong.id === song.id)) { + continue; + } + const query = song.title + '-' + song.artist; + + let resultKyNum = null; + try { + resultKyNum = await scrapeSongNumber(query); + } catch (error) { + continue; + } + + if (resultKyNum) { + let isValid = true; + try { + isValid = await isValidKYExistNumber(page, resultKyNum, song.title, song.artist); + } catch (error) { + continue; + } + + if (!isValid) { + await postInvalidKYSongsDB(song); + continue; + } else { + await updateData({ ...song, num_ky: resultKyNum }); + console.log('update song : ', resultKyNum); + successCount++; + } + } else await postInvalidKYSongsDB(song); + + index++; + console.log(query); + console.log('scrapeSongNumber : ', index); + console.log('successCount : ', successCount); +} + +browser.close(); From 2033809fc35d44a115f309548722433cf5366a53 Mon Sep 17 00:00:00 2001 From: GulSam00 Date: Thu, 8 Jan 2026 01:04:18 +0900 Subject: [PATCH 4/7] =?UTF-8?q?fix=20:=20=EC=A2=85=EB=A3=8C=20=ED=99=95?= =?UTF-8?q?=EC=9D=B8=20=EC=9A=A9=20batch=20=EC=A0=9C=ED=95=9C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- packages/crawling/src/crawling/crawlYoutube.ts | 14 +++++++++----- .../crawling/src/crawling/isValidKYExistNumber.ts | 8 ++------ 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/packages/crawling/src/crawling/crawlYoutube.ts b/packages/crawling/src/crawling/crawlYoutube.ts index 10f8f16..9ff4582 100644 --- a/packages/crawling/src/crawling/crawlYoutube.ts +++ b/packages/crawling/src/crawling/crawlYoutube.ts @@ -11,7 +11,7 @@ import { isValidKYExistNumber } from './isValidKYExistNumber'; // --- Constants --- const BASE_YOUTUBE_SEARCH_URL = 'https://www.youtube.com/@KARAOKEKY/search'; - +const BATCH_LIMIT = 100; // ✅ 한 번 실행 시 최대 처리 개수 제한 // --- Helper Functions --- /** @@ -92,8 +92,11 @@ const main = async () => { getInvalidKYSongsDB(), ]); - console.log(`📊 처리 대상 곡: ${targetSongs.length}개`); - console.log(`🚫 이미 실패한 곡: ${failedSongs.length}개`); + const targetBatchSongs = targetSongs.slice(0, BATCH_LIMIT); + + console.log(`📊 ky가 null인 대상 곡: ${targetSongs.length}개`); + console.log(`🎯 작업 대상 곡 개수: ${targetBatchSongs.length}개`); + console.log(`🚫 이미 실패한 곡(유효하지 않은 KY 노래방 번호): ${failedSongs.length}개`); // 3. 최적화: 실패한 곡 ID를 Set으로 변환 (검색 속도 O(1)로 향상) const failedSongIds = new Set(failedSongs.map(s => s.id)); @@ -102,7 +105,7 @@ const main = async () => { let successCount = 0; // 4. 순차 처리 루프 - for (const song of targetSongs) { + for (const song of targetBatchSongs) { processedCount++; const query = `${song.title}-${song.artist}`; @@ -111,13 +114,14 @@ const main = async () => { continue; } - console.log(`[${processedCount}/${targetSongs.length}] 검색 중: ${query}`); + console.log(`[${processedCount}/${targetBatchSongs.length}] 검색 중: ${query}`); // 4-2. 스크래핑 시도 const resultKyNum = await scrapeSongNumber(page, query); if (!resultKyNum) { // 검색 결과 없음 -> 실패 처리 + console.log(`❌ 검색 결과 없음: ${query}`); await handleFailure(song); continue; } diff --git a/packages/crawling/src/crawling/isValidKYExistNumber.ts b/packages/crawling/src/crawling/isValidKYExistNumber.ts index d78d9e6..3ba8446 100644 --- a/packages/crawling/src/crawling/isValidKYExistNumber.ts +++ b/packages/crawling/src/crawling/isValidKYExistNumber.ts @@ -44,11 +44,7 @@ export const isValidKYExistNumber = async ( return true; } - console.log(number, ' - 금영 노래방과 일치하지 않는 번호와 데이터'); - console.log('title : '); - console.log('검색 쿼리 : ', parsedTitle, ' | ', '번호 결과 : ', titleResult); - console.log('artist : '); - console.log('검색 쿼리 : ', parsedArtist, ' | ', '번호 결과 : ', artistResult); - + console.log('검색 쿼리 : ', parsedTitle, ' - ', parsedArtist); + console.log('KY 노래방 검색 결과 : ', titleResult, ' - ', artistResult); return false; }; From 72d6e323b63227f38e336a2b9a9e370dff66b3ff Mon Sep 17 00:00:00 2001 From: GulSam00 Date: Thu, 8 Jan 2026 01:09:51 +0900 Subject: [PATCH 5/7] =?UTF-8?q?fix=20:=20log=20=EC=A0=80=EC=9E=A5=20?= =?UTF-8?q?=EB=A1=9C=EC=A7=81=20=EC=82=AD=EC=A0=9C,=20git=20action=20yaml?= =?UTF-8?q?=20=ED=8C=8C=EC=9D=BC=20=EC=88=98=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/temp/update-ky-youtube.yml | 63 +++++++++++++++++++ .github/workflows/update-ky-youtube.yml | 20 ------ .../crawling/src/crawling/crawlYoutube.ts | 7 +-- 3 files changed, 66 insertions(+), 24 deletions(-) create mode 100644 .github/temp/update-ky-youtube.yml diff --git a/.github/temp/update-ky-youtube.yml b/.github/temp/update-ky-youtube.yml new file mode 100644 index 0000000..c607433 --- /dev/null +++ b/.github/temp/update-ky-youtube.yml @@ -0,0 +1,63 @@ +name: Update ky by Youtube + +# 실행 일시 중지 +on: + schedule: + - cron: "0 14 * * *" # 한국 시간 23:00 실행 (UTC+9 → UTC 14:00) + workflow_dispatch: + +permissions: + contents: write # push 권한을 위해 필요 + +jobs: + run-npm-task: + runs-on: ubuntu-latest + + steps: + - name: Checkout branch + uses: actions/checkout@v4 + with: + ref: feat/songUpdate + persist-credentials: false # 수동 인증으로 푸시 제어 + + - name: Use Node.js 20 + uses: actions/setup-node@v4 + with: + node-version: "20" + + - name: Install pnpm + uses: pnpm/action-setup@v2 + with: + version: 9 + run_install: false + + - name: Install dependencies + working-directory: packages/crawling + run: pnpm install + + - name: Create .env file + working-directory: packages/crawling + run: | + echo "SUPABASE_URL=${{ secrets.SUPABASE_URL }}" >> .env + echo "SUPABASE_KEY=${{ secrets.SUPABASE_KEY }}" >> .env + + - name: run update script - packages/crawling/crawlYoutube.ts + working-directory: packages/crawling + run: pnpm run ky-youtube + + - name: Commit and push changes to feat/songUpdate branch + run: | + git config --global user.name "github-actions[bot]" + git config --global user.email "github-actions[bot]@users.noreply.github.com" + + git checkout feat/songUpdate + + git add . + if git diff --cached --quiet; then + echo "✅ No changes to commit" + else + git commit -m "chore: update crawled TJ song data [skip ci]" + git push origin feat/songUpdate + fi + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/update-ky-youtube.yml b/.github/workflows/update-ky-youtube.yml index c607433..02735bc 100644 --- a/.github/workflows/update-ky-youtube.yml +++ b/.github/workflows/update-ky-youtube.yml @@ -16,9 +16,6 @@ jobs: steps: - name: Checkout branch uses: actions/checkout@v4 - with: - ref: feat/songUpdate - persist-credentials: false # 수동 인증으로 푸시 제어 - name: Use Node.js 20 uses: actions/setup-node@v4 @@ -44,20 +41,3 @@ jobs: - name: run update script - packages/crawling/crawlYoutube.ts working-directory: packages/crawling run: pnpm run ky-youtube - - - name: Commit and push changes to feat/songUpdate branch - run: | - git config --global user.name "github-actions[bot]" - git config --global user.email "github-actions[bot]@users.noreply.github.com" - - git checkout feat/songUpdate - - git add . - if git diff --cached --quiet; then - echo "✅ No changes to commit" - else - git commit -m "chore: update crawled TJ song data [skip ci]" - git push origin feat/songUpdate - fi - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/packages/crawling/src/crawling/crawlYoutube.ts b/packages/crawling/src/crawling/crawlYoutube.ts index 9ff4582..9986816 100644 --- a/packages/crawling/src/crawling/crawlYoutube.ts +++ b/packages/crawling/src/crawling/crawlYoutube.ts @@ -5,13 +5,12 @@ import { getInvalidKYSongsDB, getSongsKyNullDB } from '@/supabase/getDB'; import { postInvalidKYSongsDB } from '@/supabase/postDB'; import { updateSongsKyDB } from '@/supabase/updateDB'; import { Song } from '@/types'; -import { updateDataLog } from '@/utils/logData'; import { isValidKYExistNumber } from './isValidKYExistNumber'; // --- Constants --- const BASE_YOUTUBE_SEARCH_URL = 'https://www.youtube.com/@KARAOKEKY/search'; -const BATCH_LIMIT = 100; // ✅ 한 번 실행 시 최대 처리 개수 제한 +const BATCH_LIMIT = 1000; // ✅ 한 번 실행 시 최대 처리 개수 제한 // --- Helper Functions --- /** @@ -60,7 +59,7 @@ const scrapeSongNumber = async (page: Page, query: string): Promise { const result = await updateSongsKyDB({ ...song, num_ky: kyNum }); // console.log(`[Update Success] ${song.title}: ${kyNum}`, result); // 로그 너무 많으면 주석 처리 - updateDataLog(result.success, 'crawlYoutubeSuccess.txt'); + // updateDataLog(result.success, 'crawlYoutubeSuccess.txt'); }; /** @@ -68,7 +67,7 @@ const handleSuccess = async (song: Song, kyNum: string) => { */ const handleFailure = async (song: Song) => { await postInvalidKYSongsDB(song); - updateDataLog(false, 'crawlYoutubeFailed.txt'); // false 로그 처리 방식에 따라 수정 필요 + // updateDataLog(false, 'crawlYoutubeFailed.txt'); // false 로그 처리 방식에 따라 수정 필요 }; // --- Main Logic --- From 6accdafb3afc7a722b8a87ed48e0528197b1ad39 Mon Sep 17 00:00:00 2001 From: GulSam00 Date: Thu, 8 Jan 2026 01:16:05 +0900 Subject: [PATCH 6/7] =?UTF-8?q?fix=20:=20console.log=20=EC=A0=9C=EA=B1=B0?= =?UTF-8?q?=20=EB=B0=8F=20=EC=88=98=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- packages/crawling/src/crawling/crawlYoutube.ts | 9 +++------ packages/crawling/src/supabase/getDB.ts | 12 ------------ 2 files changed, 3 insertions(+), 18 deletions(-) diff --git a/packages/crawling/src/crawling/crawlYoutube.ts b/packages/crawling/src/crawling/crawlYoutube.ts index 9986816..9ce215e 100644 --- a/packages/crawling/src/crawling/crawlYoutube.ts +++ b/packages/crawling/src/crawling/crawlYoutube.ts @@ -10,7 +10,6 @@ import { isValidKYExistNumber } from './isValidKYExistNumber'; // --- Constants --- const BASE_YOUTUBE_SEARCH_URL = 'https://www.youtube.com/@KARAOKEKY/search'; -const BATCH_LIMIT = 1000; // ✅ 한 번 실행 시 최대 처리 개수 제한 // --- Helper Functions --- /** @@ -91,11 +90,9 @@ const main = async () => { getInvalidKYSongsDB(), ]); - const targetBatchSongs = targetSongs.slice(0, BATCH_LIMIT); - console.log(`📊 ky가 null인 대상 곡: ${targetSongs.length}개`); - console.log(`🎯 작업 대상 곡 개수: ${targetBatchSongs.length}개`); console.log(`🚫 이미 실패한 곡(유효하지 않은 KY 노래방 번호): ${failedSongs.length}개`); + console.log(`🎯 추가 가능한 최대 곡 개수: ${targetSongs.length - failedSongs.length}개`); // 3. 최적화: 실패한 곡 ID를 Set으로 변환 (검색 속도 O(1)로 향상) const failedSongIds = new Set(failedSongs.map(s => s.id)); @@ -104,7 +101,7 @@ const main = async () => { let successCount = 0; // 4. 순차 처리 루프 - for (const song of targetBatchSongs) { + for (const song of targetSongs) { processedCount++; const query = `${song.title}-${song.artist}`; @@ -113,7 +110,7 @@ const main = async () => { continue; } - console.log(`[${processedCount}/${targetBatchSongs.length}] 검색 중: ${query}`); + console.log(`[${processedCount}/${targetSongs.length}] 검색 중: ${query}`); // 4-2. 스크래핑 시도 const resultKyNum = await scrapeSongNumber(page, query); diff --git a/packages/crawling/src/supabase/getDB.ts b/packages/crawling/src/supabase/getDB.ts index dd8c68d..20906bf 100644 --- a/packages/crawling/src/supabase/getDB.ts +++ b/packages/crawling/src/supabase/getDB.ts @@ -45,19 +45,7 @@ export async function getSongsKyNullDB(max: number = 50000) { if (error) throw error; - console.log('data', data.length); - return data; - - // const isKYNULLData: Song[] = []; - - // data.forEach((song) => { - // if (song.num_ky === null) { - // isKYNULLData.push(song); - // } - // }); - - // return isKYNULLData.slice(0, max); } export async function getSongsKyNotNullDB(max: number = 50000) { From 7ed8004683eda7446d73956931e61fd0e9f94035 Mon Sep 17 00:00:00 2001 From: sham <72376700+GulSam00@users.noreply.github.com> Date: Sat, 10 Jan 2026 14:49:15 +0900 Subject: [PATCH 7/7] Uncomment authentication check for public paths --- apps/web/src/auth.tsx | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/apps/web/src/auth.tsx b/apps/web/src/auth.tsx index ec5dd91..35e0bef 100644 --- a/apps/web/src/auth.tsx +++ b/apps/web/src/auth.tsx @@ -30,10 +30,10 @@ export default function AuthProvider({ children }: { children: React.ReactNode } return; } - // if (isPublicPath) { - // setIsAuthChecked(true); - // return; - // } + if (isPublicPath) { + setIsAuthChecked(true); + return; + } // 이미 인증된 상태면 바로 통과 (하지만 체크는 수행) const handleAuth = async () => {