From e2f13b92c1c50bf565d344f54092cd13cbf16b07 Mon Sep 17 00:00:00 2001 From: bkellam Date: Fri, 30 Jan 2026 17:14:51 -0800 Subject: [PATCH 1/5] fix --- packages/web/package.json | 1 + .../web/src/features/search/fileSourceApi.ts | 117 ++++++++---------- packages/web/src/lib/languageDetection.ts | 38 ++++++ packages/web/src/lib/serviceError.ts | 2 +- yarn.lock | 8 ++ 5 files changed, 98 insertions(+), 68 deletions(-) create mode 100644 packages/web/src/lib/languageDetection.ts diff --git a/packages/web/package.json b/packages/web/package.json index 6856221b6..a0c202d8a 100644 --- a/packages/web/package.json +++ b/packages/web/package.json @@ -145,6 +145,7 @@ "input-otp": "^1.4.2", "langfuse": "^3.38.4", "langfuse-vercel": "^3.38.4", + "linguist-languages": "^9.3.1", "lucide-react": "^0.517.0", "micromatch": "^4.0.8", "next": "15.5.9", diff --git a/packages/web/src/features/search/fileSourceApi.ts b/packages/web/src/features/search/fileSourceApi.ts index c77f4927b..738ec16be 100644 --- a/packages/web/src/features/search/fileSourceApi.ts +++ b/packages/web/src/features/search/fileSourceApi.ts @@ -1,85 +1,68 @@ import 'server-only'; -import { fileNotFound, ServiceError, unexpectedError } from "../../lib/serviceError"; +import { fileNotFound, notFound, ServiceError, unexpectedError } from "../../lib/serviceError"; import { FileSourceRequest, FileSourceResponse } from "./types"; -import { isServiceError } from "../../lib/utils"; -import { search } from "./searchApi"; import { sew } from "@/actions"; import { withOptionalAuthV2 } from "@/withAuthV2"; -import { QueryIR } from './ir'; -import escapeStringRegexp from "escape-string-regexp"; +import { getRepoPath } from '@sourcebot/shared'; +import { simpleGit } from 'simple-git'; +import { detectLanguageFromFilename } from "@/lib/languageDetection"; +import { getBrowsePath } from "@/app/[domain]/browse/hooks/utils"; +import { getCodeHostBrowseFileAtBranchUrl } from "@/lib/utils"; +import { SINGLE_TENANT_ORG_DOMAIN } from "@/lib/constants"; -// @todo (bkellam) #574 : We should really be using `git show :` to fetch file contents here. -// This will allow us to support permalinks to files at a specific revision that may not be indexed -// by zoekt. We should also refactor this out of the /search folder. - -export const getFileSource = async ({ path, repo, ref }: FileSourceRequest): Promise => sew(() => - withOptionalAuthV2(async () => { - const query: QueryIR = { - and: { - children: [ - { - repo: { - regexp: `^${escapeStringRegexp(repo)}$`, - }, - }, - { - substring: { - pattern: path, - case_sensitive: true, - file_name: true, - content: false, - } - }, - ...(ref ? [{ - branch: { - pattern: ref, - exact: true, - }, - }]: []) - ] - } - } - - const searchResponse = await search({ - queryType: 'ir', - query, - options: { - matches: 1, - whole: true, - } +export const getFileSource = async ({ path: filePath, repo: repoName, ref }: FileSourceRequest): Promise => sew(() => + withOptionalAuthV2(async ({ org, prisma }) => { + const repo = await prisma.repo.findFirst({ + where: { name: repoName, orgId: org.id }, }); - - if (isServiceError(searchResponse)) { - return searchResponse; + if (!repo) { + return notFound(`Repository "${repoName}" not found.`); } - const files = searchResponse.files; - - if (!files || files.length === 0) { - return fileNotFound(path, repo); - } + const { path: repoPath } = getRepoPath(repo); + const git = simpleGit().cwd(repoPath); - const file = files[0]; - const source = file.content ?? ''; - const language = file.language; + const gitRef = ref ?? 'HEAD'; - const repoInfo = searchResponse.repositoryInfo.find((repo) => repo.id === file.repositoryId); - if (!repoInfo) { - // This should never happen. - return unexpectedError("Repository info not found"); + let source: string; + try { + source = await git.raw(['show', `${gitRef}:${filePath}`]); + } catch (error: unknown) { + const errorMessage = error instanceof Error ? error.message : String(error); + if (errorMessage.includes('does not exist') || errorMessage.includes('fatal: path')) { + return fileNotFound(filePath, repoName); + } + if (errorMessage.includes('unknown revision') || errorMessage.includes('bad revision')) { + return unexpectedError(`Invalid git reference: ${gitRef}`); + } + throw error; } + const language = detectLanguageFromFilename(filePath); + const webUrl = getBrowsePath({ + repoName: repo.name, + revisionName: ref, + path: filePath, + pathType: 'blob', + domain: SINGLE_TENANT_ORG_DOMAIN, + }); + const externalWebUrl = getCodeHostBrowseFileAtBranchUrl({ + webUrl: repo.webUrl, + codeHostType: repo.external_codeHostType, + branchName: gitRef, + filePath, + }); + return { source, language, - path, - repo, - repoCodeHostType: repoInfo.codeHostType, - repoDisplayName: repoInfo.displayName, - repoExternalWebUrl: repoInfo.webUrl, + path: filePath, + repo: repoName, + repoCodeHostType: repo.external_codeHostType, + repoDisplayName: repo.displayName ?? undefined, + repoExternalWebUrl: repo.webUrl ?? undefined, branch: ref, - webUrl: file.webUrl, - externalWebUrl: file.externalWebUrl, + webUrl, + externalWebUrl, } satisfies FileSourceResponse; - })); diff --git a/packages/web/src/lib/languageDetection.ts b/packages/web/src/lib/languageDetection.ts new file mode 100644 index 000000000..2124b4bbc --- /dev/null +++ b/packages/web/src/lib/languageDetection.ts @@ -0,0 +1,38 @@ +import * as linguistLanguages from 'linguist-languages'; +import path from 'path'; + +const extensionToLanguage = new Map(); + +for (const [languageName, languageData] of Object.entries(linguistLanguages)) { + if ('extensions' in languageData && languageData.extensions) { + for (const ext of languageData.extensions) { + if (!extensionToLanguage.has(ext)) { + extensionToLanguage.set(ext, languageName); + } + } + } + if ('filenames' in languageData && languageData.filenames) { + for (const filename of languageData.filenames) { + if (!extensionToLanguage.has(filename)) { + extensionToLanguage.set(filename, languageName); + } + } + } +} + +export const detectLanguageFromFilename = (filename: string): string => { + const basename = path.basename(filename); + + // Check for exact filename match (e.g., Makefile, Dockerfile) + if (extensionToLanguage.has(basename)) { + return extensionToLanguage.get(basename)!; + } + + // Check for extension match + const ext = path.extname(filename).toLowerCase(); + if (ext && extensionToLanguage.has(ext)) { + return extensionToLanguage.get(ext)!; + } + + return ''; +}; diff --git a/packages/web/src/lib/serviceError.ts b/packages/web/src/lib/serviceError.ts index 6d8a19f17..a016bcb41 100644 --- a/packages/web/src/lib/serviceError.ts +++ b/packages/web/src/lib/serviceError.ts @@ -72,7 +72,7 @@ export const invalidZoektResponse = async (zoektResponse: Response): Promise => { +export const fileNotFound = (fileName: string, repository: string): ServiceError => { return { statusCode: StatusCodes.NOT_FOUND, errorCode: ErrorCode.FILE_NOT_FOUND, diff --git a/yarn.lock b/yarn.lock index f1d908481..892f4f469 100644 --- a/yarn.lock +++ b/yarn.lock @@ -8395,6 +8395,7 @@ __metadata: jsdom: "npm:^25.0.1" langfuse: "npm:^3.38.4" langfuse-vercel: "npm:^3.38.4" + linguist-languages: "npm:^9.3.1" lucide-react: "npm:^0.517.0" micromatch: "npm:^4.0.8" next: "npm:15.5.9" @@ -15018,6 +15019,13 @@ __metadata: languageName: node linkType: hard +"linguist-languages@npm:^9.3.1": + version: 9.3.1 + resolution: "linguist-languages@npm:9.3.1" + checksum: 10c0/41d5c16b9f7095310003598f4568254ac9736fc6f67daa1f62a11ae9aaf6acc847451675dbb8387b70ed8daaef75656dba8c8057ae93e07152304f3c27aa7440 + languageName: node + linkType: hard + "linkify-it@npm:^5.0.0": version: 5.0.0 resolution: "linkify-it@npm:5.0.0" From 2142512b769ceaab908d8b8bd9983e228aa5c401 Mon Sep 17 00:00:00 2001 From: bkellam Date: Fri, 30 Jan 2026 17:17:53 -0800 Subject: [PATCH 2/5] changelog --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ae02a8e91..da91d9e16 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Changed +- Changed `/api/source` api to support fetching source code for any revision, not just revisions that are indexed by zoekt. [#829](https://github.com/sourcebot-dev/sourcebot/pull/829) + ## [4.10.20] - 2026-01-28 ### Fixed From 61dae1e650696716f899df7ac886319d36d0d4f4 Mon Sep 17 00:00:00 2001 From: bkellam Date: Fri, 30 Jan 2026 17:21:34 -0800 Subject: [PATCH 3/5] feedback --- packages/web/src/features/search/fileSourceApi.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/web/src/features/search/fileSourceApi.ts b/packages/web/src/features/search/fileSourceApi.ts index 738ec16be..700bc56ee 100644 --- a/packages/web/src/features/search/fileSourceApi.ts +++ b/packages/web/src/features/search/fileSourceApi.ts @@ -32,7 +32,7 @@ export const getFileSource = async ({ path: filePath, repo: repoName, ref }: Fil if (errorMessage.includes('does not exist') || errorMessage.includes('fatal: path')) { return fileNotFound(filePath, repoName); } - if (errorMessage.includes('unknown revision') || errorMessage.includes('bad revision')) { + if (errorMessage.includes('unknown revision') || errorMessage.includes('bad revision') || errorMessage.includes('invalid object name')) { return unexpectedError(`Invalid git reference: ${gitRef}`); } throw error; From 04c9c3df8154152a62f9c5b5b0950bc6c8cd41b9 Mon Sep 17 00:00:00 2001 From: bkellam Date: Fri, 30 Jan 2026 17:30:05 -0800 Subject: [PATCH 4/5] feedback --- packages/web/src/lib/languageDetection.ts | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/packages/web/src/lib/languageDetection.ts b/packages/web/src/lib/languageDetection.ts index 2124b4bbc..e39f38de7 100644 --- a/packages/web/src/lib/languageDetection.ts +++ b/packages/web/src/lib/languageDetection.ts @@ -6,8 +6,9 @@ const extensionToLanguage = new Map(); for (const [languageName, languageData] of Object.entries(linguistLanguages)) { if ('extensions' in languageData && languageData.extensions) { for (const ext of languageData.extensions) { - if (!extensionToLanguage.has(ext)) { - extensionToLanguage.set(ext, languageName); + const normalizedExt = ext.toLowerCase(); + if (!extensionToLanguage.has(normalizedExt)) { + extensionToLanguage.set(normalizedExt, languageName); } } } From 7f047b8b67a01a38d20870301cd8325104d1e1c7 Mon Sep 17 00:00:00 2001 From: bkellam Date: Fri, 30 Jan 2026 19:27:02 -0800 Subject: [PATCH 5/5] use default branch instead of head for git show operation --- packages/backend/src/github.ts | 1 + packages/backend/src/repoCompileUtils.test.ts | 1 + packages/backend/src/repoCompileUtils.ts | 20 +++++++++++++++++-- packages/backend/src/repoIndexManager.ts | 8 ++++++++ .../migration.sql | 2 ++ packages/db/prisma/schema.prisma | 1 + .../web/src/features/search/fileSourceApi.ts | 4 +++- 7 files changed, 34 insertions(+), 3 deletions(-) create mode 100644 packages/db/prisma/migrations/20260131013553_add_default_branch_to_repo_table/migration.sql diff --git a/packages/backend/src/github.ts b/packages/backend/src/github.ts index a43d81f0e..53f3a01b8 100644 --- a/packages/backend/src/github.ts +++ b/packages/backend/src/github.ts @@ -28,6 +28,7 @@ export type OctokitRepository = { stargazers_count?: number, watchers_count?: number, subscribers_count?: number, + default_branch?: string, forks_count?: number, archived?: boolean, topics?: string[], diff --git a/packages/backend/src/repoCompileUtils.test.ts b/packages/backend/src/repoCompileUtils.test.ts index 89fb61719..1411db338 100644 --- a/packages/backend/src/repoCompileUtils.test.ts +++ b/packages/backend/src/repoCompileUtils.test.ts @@ -6,6 +6,7 @@ vi.mock('./git.js', () => ({ isPathAValidGitRepoRoot: vi.fn(), getOriginUrl: vi.fn(), isUrlAValidGitRepo: vi.fn(), + getLocalDefaultBranch: vi.fn(), })); // Mock the glob module diff --git a/packages/backend/src/repoCompileUtils.ts b/packages/backend/src/repoCompileUtils.ts index 6bd23ff3b..e0a44c7e4 100644 --- a/packages/backend/src/repoCompileUtils.ts +++ b/packages/backend/src/repoCompileUtils.ts @@ -15,7 +15,7 @@ import { BitbucketConnectionConfig, GerritConnectionConfig, GiteaConnectionConfi import { ProjectVisibility } from "azure-devops-node-api/interfaces/CoreInterfaces.js"; import path from 'path'; import { glob } from 'glob'; -import { getOriginUrl, isPathAValidGitRepoRoot, isUrlAValidGitRepo } from './git.js'; +import { getLocalDefaultBranch, getOriginUrl, isPathAValidGitRepoRoot, isUrlAValidGitRepo } from './git.js'; import assert from 'assert'; import GitUrlParse from 'git-url-parse'; import { RepoMetadata } from '@sourcebot/shared'; @@ -118,6 +118,7 @@ export const createGitHubRepoRecord = ({ cloneUrl: cloneUrl.toString(), webUrl: repo.html_url, name: repoName, + defaultBranch: repo.default_branch, displayName: repoDisplayName, imageUrl: repo.owner.avatar_url, isFork: repo.fork, @@ -185,6 +186,7 @@ export const compileGitlabConfig = async ( cloneUrl: cloneUrl.toString(), webUrl: projectUrl, name: repoName, + defaultBranch: project.default_branch, displayName: repoDisplayName, imageUrl: avatarUrl, isFork: isFork, @@ -257,6 +259,7 @@ export const compileGiteaConfig = async ( webUrl: repo.html_url, name: repoName, displayName: repoDisplayName, + defaultBranch: repo.default_branch, imageUrl: repo.owner?.avatar_url, isFork: repo.fork!, isPublic: isPublic, @@ -339,6 +342,10 @@ export const compileGerritConfig = async ( webUrl: webUrl, name: repoName, displayName: repoDisplayName, + // @note: the gerrit api doesn't return the default branch (without a seperate query). + // Instead, the default branch will be set once the repo is cloned. + // @see: repoIndexManager.ts + defaultBranch: undefined, isFork: false, isArchived: false, org: { @@ -444,6 +451,7 @@ export const compileBitbucketConfig = async ( const repoName = path.join(repoNameRoot, displayName); const cloneUrl = getCloneUrl(repo); const webUrl = getWebUrl(repo); + const defaultBranch = isServer ? (repo as BitbucketServerRepository).defaultBranch : (repo as BitbucketCloudRepository).mainbranch?.name; const record: RepoData = { external_id: externalId, @@ -453,6 +461,7 @@ export const compileBitbucketConfig = async ( webUrl: webUrl, name: repoName, displayName: displayName, + defaultBranch, isFork: isFork, isPublic: isPublic, isArchived: isArchived, @@ -557,6 +566,8 @@ export const compileGenericGitHostConfig_file = async ( const remoteUrl = GitUrlParse(origin); + const defaultBranch = await getLocalDefaultBranch({ path: repoPath }); + // @note: matches the naming here: // https://github.com/sourcebot-dev/zoekt/blob/main/gitindex/index.go#L293 // Go's url.URL.Host includes the port if present (even default ports like 443), @@ -573,6 +584,7 @@ export const compileGenericGitHostConfig_file = async ( cloneUrl: `file://${repoPath}`, name: repoName, displayName: repoName, + defaultBranch, isFork: false, isArchived: false, org: { @@ -612,7 +624,6 @@ export const compileGenericGitHostConfig_file = async ( } } - export const compileGenericGitHostConfig_url = async ( config: GenericGitHostConnectionConfig, connectionId: number, @@ -645,6 +656,10 @@ export const compileGenericGitHostConfig_url = async ( cloneUrl: remoteUrl.toString(), name: repoName, displayName: repoName, + // @note: we can't determine the default branch from the remote url. + // Instead, the default branch will be set once the repo is cloned. + // @see: repoIndexManager.ts + defaultBranch: undefined, isFork: false, isArchived: false, org: { @@ -719,6 +734,7 @@ export const compileAzureDevOpsConfig = async ( webUrl: webUrl, name: repoName, displayName: repoDisplayName, + defaultBranch: repo.defaultBranch, imageUrl: null, isFork: !!repo.isFork, isArchived: false, diff --git a/packages/backend/src/repoIndexManager.ts b/packages/backend/src/repoIndexManager.ts index 154b3da26..fae26bdae 100644 --- a/packages/backend/src/repoIndexManager.ts +++ b/packages/backend/src/repoIndexManager.ts @@ -498,6 +498,7 @@ export class RepoIndexManager { }); const pushedAt = await getLatestCommitTimestamp({ path: repoPath }); + const defaultBranch = await getLocalDefaultBranch({ path: repoPath }); const jobMetadata = repoIndexingJobMetadataSchema.parse(jobData.metadata); @@ -511,6 +512,13 @@ export class RepoIndexManager { ...(jobData.repo.metadata as RepoMetadata), indexedRevisions: jobMetadata.indexedRevisions, } satisfies RepoMetadata, + // @note: always update the default branch. While this field can be set + // during connection syncing, by setting it here we ensure that a) the + // default branch is as up to date as possible (since repo indexing happens + // more frequently than connection syncing) and b) for hosts where it is + // impossible to determine the default branch from the host's API + // (e.g., generic git url), we still set the default branch here. + defaultBranch: defaultBranch, } }); diff --git a/packages/db/prisma/migrations/20260131013553_add_default_branch_to_repo_table/migration.sql b/packages/db/prisma/migrations/20260131013553_add_default_branch_to_repo_table/migration.sql new file mode 100644 index 000000000..f93e8b1cb --- /dev/null +++ b/packages/db/prisma/migrations/20260131013553_add_default_branch_to_repo_table/migration.sql @@ -0,0 +1,2 @@ +-- AlterTable +ALTER TABLE "Repo" ADD COLUMN "defaultBranch" TEXT; diff --git a/packages/db/prisma/schema.prisma b/packages/db/prisma/schema.prisma index 6c0affcce..423fb5960 100644 --- a/packages/db/prisma/schema.prisma +++ b/packages/db/prisma/schema.prisma @@ -59,6 +59,7 @@ model Repo { webUrl String? connections RepoToConnection[] imageUrl String? + defaultBranch String? permittedAccounts AccountToRepoPermission[] permissionSyncJobs RepoPermissionSyncJob[] diff --git a/packages/web/src/features/search/fileSourceApi.ts b/packages/web/src/features/search/fileSourceApi.ts index 700bc56ee..b7735958e 100644 --- a/packages/web/src/features/search/fileSourceApi.ts +++ b/packages/web/src/features/search/fileSourceApi.ts @@ -22,7 +22,9 @@ export const getFileSource = async ({ path: filePath, repo: repoName, ref }: Fil const { path: repoPath } = getRepoPath(repo); const git = simpleGit().cwd(repoPath); - const gitRef = ref ?? 'HEAD'; + const gitRef = ref ?? + repo.defaultBranch ?? + 'HEAD'; let source: string; try {