Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/curvy-penguins-work.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"create-llama": patch
---

add filter for query in ts templates
7 changes: 7 additions & 0 deletions helpers/typescript.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,12 @@ export const installTSTemplate = async ({
nextConfigJson.output = "export";
nextConfigJson.images = { unoptimized: true };
console.log("\nUsing static site generation\n");

// if having backend, copy overwrite next.config.simple.mjs to next.config.mjs
await fs.copyFile(
path.join(root, "next.config.simple.mjs"),
path.join(root, "next.config.mjs"),
);
} else {
if (vectorDb === "milvus") {
nextConfigJson.experimental.serverComponentsExternalPackages =
Expand All @@ -64,6 +70,7 @@ export const installTSTemplate = async ({
);
}
}
await fs.rm(path.join(root, "next.config.simple.mjs"));
await fs.writeFile(
nextConfigJsonFile,
JSON.stringify(nextConfigJson, null, 2) + os.EOL,
Expand Down
38 changes: 36 additions & 2 deletions templates/components/engines/typescript/agent/chat.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
import { BaseToolWithCall, OpenAIAgent, QueryEngineTool } from "llamaindex";
import {
BaseToolWithCall,
MetadataFilters,
OpenAIAgent,
QueryEngineTool,
} from "llamaindex";
import fs from "node:fs/promises";
import path from "node:path";
import { getDataSource } from "./index";
Expand All @@ -14,7 +19,7 @@ export async function createChatEngine(documentIds?: string[]) {
tools.push(
new QueryEngineTool({
queryEngine: index.asQueryEngine({
preFilters: undefined, // TODO: Add filters once LITS supports it (getQueryFilters)
preFilters: generateFilters(documentIds || []),
}),
metadata: {
name: "data_query_engine",
Expand All @@ -41,3 +46,32 @@ export async function createChatEngine(documentIds?: string[]) {
systemPrompt: process.env.SYSTEM_PROMPT,
});
}

function generateFilters(documentIds: string[]): MetadataFilters | undefined {
if (!documentIds.length) {
return {
filters: [
{
key: "private",
value: ["true"],
operator: "nin",
},
],
};
}
return {
filters: [
{
key: "private",
value: "true",
operator: "!=",
},
{
key: "doc_id",
value: documentIds,
operator: "in",
},
],
condition: "or",
};
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@ export async function runPipeline(documents: Document[], filename: string) {
for (const document of documents) {
document.metadata = {
...document.metadata,
doc_id: document.id_,
file_name: filename,
private: "true", // to separate from other public documents
is_local_file: "true", // to distinguish from cloud data sources
};
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ export function retrieveDocumentIds(annotations?: JSONValue[]): string[] {
) {
const files = data.files as DocumentFile[];
for (const file of files) {
if (Array.isArray(file.content)) {
if (Array.isArray(file.content.value)) {
// it's an array, so it's an array of doc IDs
for (const id of file.content) {
for (const id of file.content.value) {
ids.push(id);
}
}
Expand Down
12 changes: 6 additions & 6 deletions templates/components/llamaindex/typescript/streaming/events.ts
Original file line number Diff line number Diff line change
Expand Up @@ -77,17 +77,18 @@ export function createCallbackManager(stream: StreamData) {
const callbackManager = new CallbackManager();

callbackManager.on("retrieve-end", async (data) => {
const { nodes, query } = data.detail.payload;
const { nodes, query } = data.detail;
await appendSourceData(stream, nodes);
appendEventData(stream, `Retrieving context for query: '${query}'`);
appendEventData(
stream,
`Retrieved ${nodes.length} sources to use as context for the query`,
);
LLamaCloudFileService.downloadFiles(nodes); // don't await to avoid blocking chat streaming
});

callbackManager.on("llm-tool-call", (event) => {
const { name, input } = event.detail.payload.toolCall;
const { name, input } = event.detail.toolCall;
const inputString = Object.entries(input)
.map(([key, value]) => `${key}: ${value}`)
.join(", ");
Expand All @@ -98,7 +99,7 @@ export function createCallbackManager(stream: StreamData) {
});

callbackManager.on("llm-tool-result", (event) => {
const { toolCall, toolResult } = event.detail.payload;
const { toolCall, toolResult } = event.detail;
appendToolData(stream, toolCall, toolResult);
});

Expand All @@ -118,9 +119,8 @@ async function getNodeUrl(metadata: Metadata) {
const pipelineId = metadata["pipeline_id"];
if (pipelineId && !isLocalFile) {
// file is from LlamaCloud and was not ingested locally
// TODO trigger but don't await file download and just use convention to generate the URL (see Python code)
// return `${process.env.FILESERVER_URL_PREFIX}/output/llamacloud/${pipelineId}\$${fileName}`;
return await LLamaCloudFileService.getFileUrl(fileName, pipelineId);
const name = LLamaCloudFileService.toDownloadedName(pipelineId, fileName);
return `${process.env.FILESERVER_URL_PREFIX}/output/llamacloud/${name}`;
}
const isPrivate = metadata["private"] === "true";
const folder = isPrivate ? "output/uploaded" : "data";
Expand Down
135 changes: 75 additions & 60 deletions templates/components/llamaindex/typescript/streaming/service.ts
Original file line number Diff line number Diff line change
@@ -1,86 +1,66 @@
import { Metadata, NodeWithScore } from "llamaindex";
import fs from "node:fs";
import https from "node:https";
import path from "node:path";

const LLAMA_CLOUD_OUTPUT_DIR = "output/llamacloud";
const LLAMA_CLOUD_BASE_URL = "https://cloud.llamaindex.ai/api/v1";
const FILE_DELIMITER = "$"; // delimiter between pipelineId and filename

export interface LlamaCloudFile {
interface LlamaCloudFile {
name: string;
file_id: string;
project_id: string;
}

export class LLamaCloudFileService {
static async getFiles(pipelineId: string): Promise<LlamaCloudFile[]> {
const url = `${LLAMA_CLOUD_BASE_URL}/pipelines/${pipelineId}/files`;
const headers = {
Accept: "application/json",
Authorization: `Bearer ${process.env.LLAMA_CLOUD_API_KEY}`,
};
const response = await fetch(url, { method: "GET", headers });
const data = await response.json();
return data;
public static async downloadFiles(nodes: NodeWithScore<Metadata>[]) {
const files = this.nodesToDownloadFiles(nodes);
if (!files.length) return;
console.log("Downloading files from LlamaCloud...");
for (const file of files) {
await this.downloadFile(file.pipelineId, file.fileName);
}
}

static async getFileDetail(
projectId: string,
fileId: string,
): Promise<{ url: string }> {
const url = `${LLAMA_CLOUD_BASE_URL}/files/${fileId}/content?project_id=${projectId}`;
const headers = {
Accept: "application/json",
Authorization: `Bearer ${process.env.LLAMA_CLOUD_API_KEY}`,
};
const response = await fetch(url, { method: "GET", headers });
const data = (await response.json()) as { url: string };
return data;
public static toDownloadedName(pipelineId: string, fileName: string) {
return `${pipelineId}${FILE_DELIMITER}${fileName}`;
}

static async getFileUrl(
name: string,
pipelineId: string,
): Promise<string | null> {
try {
const files = await this.getFiles(pipelineId);
for (const file of files) {
if (file.name === name) {
const fileId = file.file_id;
const projectId = file.project_id;
const fileDetail = await this.getFileDetail(projectId, fileId);
const localFileUrl = this.downloadFile(fileDetail.url, fileId, name);
return localFileUrl;
}
private static nodesToDownloadFiles(nodes: NodeWithScore<Metadata>[]) {
const downloadFiles: Array<{
pipelineId: string;
fileName: string;
}> = [];
for (const node of nodes) {
const isLocalFile = node.node.metadata["is_local_file"] === "true";
const pipelineId = node.node.metadata["pipeline_id"];
const fileName = node.node.metadata["file_name"];
if (isLocalFile || !pipelineId || !fileName) continue;
const isDuplicate = downloadFiles.some(
(f) => f.pipelineId === pipelineId && f.fileName === fileName,
);
if (!isDuplicate) {
downloadFiles.push({ pipelineId, fileName });
}
return null;
} catch (error) {
console.error("Error fetching file from LlamaCloud:", error);
return null;
}
return downloadFiles;
}

static downloadFile(url: string, fileId: string, filename: string) {
const FILE_DELIMITER = "$"; // delimiter between fileId and filename
const downloadedFileName = `${fileId}${FILE_DELIMITER}${filename}`;
const downloadedFilePath = path.join(
LLAMA_CLOUD_OUTPUT_DIR,
downloadedFileName,
);
const urlPrefix = `${process.env.FILESERVER_URL_PREFIX}/${LLAMA_CLOUD_OUTPUT_DIR}`;
const fileUrl = `${urlPrefix}/${downloadedFileName}`;

private static async downloadFile(pipelineId: string, fileName: string) {
try {
const downloadedName = this.toDownloadedName(pipelineId, fileName);
const downloadedPath = path.join(LLAMA_CLOUD_OUTPUT_DIR, downloadedName);

// Check if file already exists
if (fs.existsSync(downloadedFilePath)) return fileUrl;
if (fs.existsSync(downloadedPath)) return;

// Create directory if it doesn't exist
if (!fs.existsSync(LLAMA_CLOUD_OUTPUT_DIR)) {
fs.mkdirSync(LLAMA_CLOUD_OUTPUT_DIR, { recursive: true });
}
const urlToDownload = await this.getFileUrlByName(pipelineId, fileName);
if (!urlToDownload) throw new Error("File not found in LlamaCloud");

const file = fs.createWriteStream(downloadedFilePath);
const file = fs.createWriteStream(downloadedPath);
https
.get(url, (response) => {
.get(urlToDownload, (response) => {
response.pipe(file);
file.on("finish", () => {
file.close(() => {
Expand All @@ -89,15 +69,50 @@ export class LLamaCloudFileService {
});
})
.on("error", (err) => {
fs.unlink(downloadedFilePath, () => {
fs.unlink(downloadedPath, () => {
console.error("Error downloading file:", err);
throw err;
});
});

return fileUrl;
} catch (error) {
throw new Error(`Error downloading file from LlamaCloud: ${error}`);
}
}

private static async getFileUrlByName(
pipelineId: string,
name: string,
): Promise<string | null> {
const files = await this.getAllFiles(pipelineId);
const file = files.find((file) => file.name === name);
if (!file) return null;
return await this.getFileUrlById(file.project_id, file.file_id);
}

private static async getFileUrlById(
projectId: string,
fileId: string,
): Promise<string> {
const url = `${LLAMA_CLOUD_BASE_URL}/files/${fileId}/content?project_id=${projectId}`;
const headers = {
Accept: "application/json",
Authorization: `Bearer ${process.env.LLAMA_CLOUD_API_KEY}`,
};
const response = await fetch(url, { method: "GET", headers });
const data = (await response.json()) as { url: string };
return data.url;
}

private static async getAllFiles(
pipelineId: string,
): Promise<LlamaCloudFile[]> {
const url = `${LLAMA_CLOUD_BASE_URL}/pipelines/${pipelineId}/files`;
const headers = {
Accept: "application/json",
Authorization: `Bearer ${process.env.LLAMA_CLOUD_API_KEY}`,
};
const response = await fetch(url, { method: "GET", headers });
const data = await response.json();
return data;
}
}
2 changes: 2 additions & 0 deletions templates/components/vectordbs/python/llamacloud/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,10 @@ def generate_datasource():
documents = get_documents()

# Set is_local_file=true to distinguish locally ingested files from LlamaCloud files
# Set private=false to mark the document as public (required for filtering)
for doc in documents:
doc.metadata["is_local_file"] = "true"
doc.metadata["private"] = "false"

LlamaCloudIndex.from_documents(
documents=documents,
Expand Down
3 changes: 3 additions & 0 deletions templates/components/vectordbs/python/none/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ def generate_datasource():
storage_dir = os.environ.get("STORAGE_DIR", "storage")
# load the documents and create the index
documents = get_documents()
# Set private=false to mark the document as public (required for filtering)
for doc in documents:
doc.metadata["private"] = "false"
index = VectorStoreIndex.from_documents(
documents,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,12 @@ dotenv.config();
async function loadAndIndex() {
const documents = await getDocuments();
// Set is_local_file=true to distinguish locally ingested files from LlamaCloud files
// Set private=false to mark the document as public (required for filtering)
for (const document of documents) {
document.metadata = {
...document.metadata,
is_local_file: "true",
private: "false",
};
}
await getDataSource();
Expand Down
5 changes: 5 additions & 0 deletions templates/components/vectordbs/typescript/none/generate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@ async function generateDatasource() {
persistDir: STORAGE_CACHE_DIR,
});
const documents = await getDocuments();
// Set private=false to mark the document as public (required for filtering)
documents.forEach((doc) => {
doc.metadata["private"] = "false";
});

await VectorStoreIndex.fromDocuments(documents, {
storageContext,
});
Expand Down
2 changes: 1 addition & 1 deletion templates/types/streaming/express/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
"dotenv": "^16.3.1",
"duck-duck-scrape": "^2.2.5",
"express": "^4.18.2",
"llamaindex": "0.4.14",
"llamaindex": "0.5.6",
"pdf2json": "3.0.5",
"ajv": "^8.12.0",
"@e2b/code-interpreter": "^0.0.5",
Expand Down
3 changes: 2 additions & 1 deletion templates/types/streaming/nextjs/next.config.mjs
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
/** @type {import('next').NextConfig} */
import fs from "fs";
import withLlamaIndex from "llamaindex/next";
import webpack from "./webpack.config.mjs";

const nextConfig = JSON.parse(fs.readFileSync("./next.config.json", "utf-8"));
nextConfig.webpack = webpack;

export default nextConfig;
export default withLlamaIndex(nextConfig);
8 changes: 8 additions & 0 deletions templates/types/streaming/nextjs/next.config.simple.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
/** @type {import('next').NextConfig} */
import fs from "fs";
import webpack from "./webpack.config.mjs";

const nextConfig = JSON.parse(fs.readFileSync("./next.config.json", "utf-8"));
nextConfig.webpack = webpack;

export default nextConfig;
Loading