diff --git a/recipes/natural_language_processing/graph_rag/Makefile b/recipes/natural_language_processing/graph_rag/Makefile new file mode 100644 index 00000000..0fa7f568 --- /dev/null +++ b/recipes/natural_language_processing/graph_rag/Makefile @@ -0,0 +1,67 @@ +SHELL := /bin/bash +APP ?= grag +PORT ?= 8501 +CHROMADB_PORT ?= 8000 + +include ../../common/Makefile.common + +RECIPE_BINARIES_PATH := $(shell realpath ../../common/bin) +RELATIVE_MODELS_PATH := ../../../models +RELATIVE_TESTS_PATH := ../tests + +.PHONY: run-chromadb +run: + podman run -it -p $(CHROMADB_PORT):$(CHROMADB_PORT) -e CHROMADB_ENDPOINT=http://10.88.0.1:8000/v1 ${CHROMADB_IMAGE} + +# rag requires custom bootc because it uses an extra build-arg for CHROMADB_IMAGE (other apps use ../../common/Makefile.common target) +.PHONY: bootc +bootc: quadlet + "${CONTAINER_TOOL}" build \ + $(ARCH:%=--arch %) \ + $(BUILD_ARG_FILE:%=--build-arg-file=%) \ + $(FROM:%=--from %) \ + $(AUTH_JSON:%=-v %:/run/containers/0/auth.json) \ + --security-opt label=disable \ + --cap-add SYS_ADMIN \ + --build-arg MODEL_IMAGE=$(MODEL_IMAGE) \ + --build-arg APP_IMAGE=$(APP_IMAGE) \ + --build-arg CHROMADB_IMAGE=$(CHROMADB_IMAGE) \ + --build-arg SERVER_IMAGE=$(SERVER_IMAGE) \ + --build-arg "SSHPUBKEY=$(SSH_PUBKEY)" \ + -f bootc/$(CONTAINERFILE) \ + -t ${BOOTC_IMAGE} . + @echo "" + @echo "Successfully built bootc image '${BOOTC_IMAGE}'." + @echo "You may now convert the image into a disk image via bootc-image-builder" + @echo "or the Podman Desktop Bootc Extension. For more information, please refer to" + @echo " * https://github.com/osbuild/bootc-image-builder" + @echo " * https://github.com/containers/podman-desktop-extension-bootc" + +# rag requires custom quadlet target for CHROMADB_IMAGE substitution +# (other apps use ../../common/Makefile.common target) +.PHONY: quadlet +quadlet: + # Modify quadlet files to match the server, model and app image + mkdir -p build + sed -e "s|SERVER_IMAGE|${SERVER_IMAGE}|" \ + -e "s|APP_IMAGE|${APP_IMAGE}|g" \ + -e "s|MODEL_IMAGE|${MODEL_IMAGE}|g" \ + -e "s|CHROMADB_IMAGE|${CHROMADB_IMAGE}|g" \ + -e "s|APP|${APP}|g" \ + quadlet/${APP}.image \ + > build/${APP}.image + sed -e "s|SERVER_IMAGE|${SERVER_IMAGE}|" \ + -e "s|APP_IMAGE|${APP_IMAGE}|g" \ + -e "s|MODEL_IMAGE|${MODEL_IMAGE}|g" \ + -e "s|CHROMADB_IMAGE|${CHROMADB_IMAGE}|g" \ + quadlet/${APP}.yaml \ + > build/${APP}.yaml + cp quadlet/${APP}.kube build/${APP}.kube + +# rag requires custom bootc-run because it uses an extra port for chromadb +# (other apps use ../../common/Makefile.common target) +.PHONY: bootc-run +bootc-run: + podman run -d --rm --name $(APP)-bootc -p 8080:8501 -p 8090:8000 --privileged \ + $(AUTH_JSON:%=-v %:/run/containers/0/auth.json) \ + $(BOOTC_IMAGE) /sbin/init diff --git a/recipes/natural_language_processing/graph_rag/README.md b/recipes/natural_language_processing/graph_rag/README.md new file mode 100644 index 00000000..4c8ba3e9 --- /dev/null +++ b/recipes/natural_language_processing/graph_rag/README.md @@ -0,0 +1,211 @@ +# Graph RAG (Retrieval Augmented Generation) Chat Application + +.. THIS IS A WORK IN PROGRESS CURRENTLY. DO NOT USE YET .. + +This demo provides a simple recipe to help developers start to build out their own custom Graph RAG (Graph Retrieval Augmented Generation) applications. It consists of three main components; the Model Service, the Graph Database and the AI Application. + +There are a few options today for local Model Serving, but this recipe will use [`llama-cpp-python`](https://github.com/abetlen/llama-cpp-python) and their OpenAI compatible Model Service. There is a Containerfile provided that can be used to build this Model Service within the repo, [`model_servers/llamacpp_python/base/Containerfile`](/model_servers/llamacpp_python/base/Containerfile). + +In order for the LLM to interact with our documents, we need them stored and available in such a manner that we can retrieve a small subset of them that are relevant to our query. To do this we employ a Graph Database alongside an embedding model. We convert these documents into a Graph database representation which is then stored in the Graph database. This graph structure has better semantic capture properties of the contents of the input documents than basic RAG, including the ability to extract logical entities and their relationships from the document. The Graph database also supports vector based indexing of the graph structure to allow it to be integrated with RAG prompt chaining libraries. In this recipe we use [neo4j](https://neo4j.com/product/neo4j-graph-database/) as our Graph Database. + +Our AI Application will connect to our Model Service via it's OpenAI compatible API. In this example we rely on [Langchain's](https://python.langchain.com/docs/get_started/introduction) python package to simplify communication with our Model Service and we use [Streamlit](https://streamlit.io/) for our UI layer. Below please see an example of the RAG application. + +![](/assets/rag_ui.png) + + +## Try the RAG chat application + +_COMING SOON to AI LAB_ +The [Podman Desktop](https://podman-desktop.io) [AI Lab Extension](https://github.com/containers/podman-desktop-extension-ai-lab) will (in future, once completed) include this recipe among others. To try it out, open `Recipes Catalog` -> `RAG Chatbot` and follow the instructions to start the application. + +If you prefer building and running the application from terminal, please run the following commands from this directory. + +First, build application's meta data and run the generated Kubernetes YAML which will spin up a Pod along with a number of containers: +``` +make quadlet +podman kube play build/grag.yaml +``` + +The Pod is named `grag`, so you may use [Podman](https://podman.io) to manage the Pod and its containers: +``` +podman pod list +podman ps +``` + +To stop and remove the Pod, run: +``` +podman pod stop grag +podman pod rm grag +``` + +Once the Pod is running, please refer to the section below to [interact with the RAG chatbot application](#interact-with-the-ai-application). + +# Build the Application + +In order to build this application we will need two models, a Graph Database, a Model Service and an AI Application. + +* [Download models](#download-models) +* [Deploy the Graph Database](#deploy-the-graph-database) +* [Build the Model Service](#build-the-model-service) +* [Deploy the Model Service](#deploy-the-model-service) +* [Build the AI Application](#build-the-ai-application) +* [Deploy the AI Application](#deploy-the-ai-application) +* [Interact with the AI Application](#interact-with-the-ai-application) + +### Download models + +If you are just getting started, we recommend using [Granite-7B-Lab](https://huggingface.co/instructlab/granite-7b-lab-GGUF). This is a well +performant mid-sized model with an apache-2.0 license that has been quanitzed and served into the [GGUF format](https://github.com/ggerganov/ggml/blob/master/docs/gguf.md). + +The recommended model can be downloaded using the code snippet below: + +```bash +cd ../../../models +curl -sLO https://huggingface.co/instructlab/granite-7b-lab-GGUF/resolve/main/granite-7b-lab-Q4_K_M.gguf +cd ../recipes/natural_language_processing/rag +``` + +_A full list of supported open models is forthcoming._ + +In addition to the LLM, RAG applications also require an embedding model to convert documents between natural language and vector representations. For this demo we will use [`BAAI/bge-base-en-v1.5`](https://huggingface.co/BAAI/bge-base-en-v1.5) it is a fairly standard model for this use case and has an MIT license. + +The code snippet below can be used to pull a copy of the `BAAI/bge-base-en-v1.5` embedding model and store it in your `models/` directory. + +```python +from huggingface_hub import snapshot_download +snapshot_download(repo_id="BAAI/bge-base-en-v1.5", + cache_dir="models/", + local_files_only=False) +``` + +### Deploy the Graph Database + +To deploy the Graph Database service locally, simply use the existing Neo4j image. The Graph Database is ephemeral and will need to be re-populated each time the container restarts. When implementing RAG/ Graph RAG in production, you will want a long running and backed up Graph Database. + + +#### Neo4j +```bash +podman run \ + --restart always \ + --publish=7474:7474 --publish=7687:7687 --env NEO4J_AUTH=none \ + neo4j +``` + +### Build the Model Service + +The complete instructions for building and deploying the Model Service can be found in the [the llamacpp_python model-service document](../model_servers/llamacpp_python/README.md). + +The Model Service can be built with the following code snippet: + +```bash +cd model_servers/llamacpp_python +podman build -t llamacppserver -f ./base/Containerfile . +``` + + +### Deploy the Model Service + +The complete instructions for building and deploying the Model Service can be found in the [the llamacpp_python model-service document](../model_servers/llamacpp_python/README.md). + +The local Model Service relies on a volume mount to the localhost to access the model files. You can start your local Model Service using the following Podman command: +``` +podman run --rm -it \ + -p 8001:8001 \ + -v Local/path/to/locallm/models:/locallm/models \ + -e MODEL_PATH=models/ \ + -e HOST=0.0.0.0 \ + -e PORT=8001 \ + llamacppserver +``` + +### Build the AI Application + +Now that the Model Service is running we want to build and deploy our AI Application. Use the provided Containerfile to build the AI Application image in the `rag-langchain/` directory. + +```bash +cd rag +make APP_IMAGE=grag build +``` + +### Deploy the AI Application + +Make sure the Model Service and the Graph Database are up and running before starting this container image. When starting the AI Application container image we need to direct it to the correct `MODEL_ENDPOINT`. This could be any appropriately hosted Model Service (running locally or in the cloud) using an OpenAI compatible API. In our case the Model Service is running inside the Podman machine so we need to provide it with the appropriate address `10.88.0.1`. The same goes for the Vector Database. Make sure the `GRAPHDB_HOST` is correctly set to `10.88.0.1` for communication within the Podman virtual machine. + +There also needs to be a volume mount into the `models/` directory so that the application can access the embedding model as well as a volume mount into the `data/` directory where it can pull documents from to populate the Vector Database. + +The following Podman command can be used to run your AI Application: + +```bash +podman run --rm -it -p 8501:8501 \ +-e MODEL_ENDPOINT=http://10.88.0.1:8001 \ +-e GRAPHDB_HOST=10.88.0.1 \ +-v Local/path/to/locallm/models/:/rag/models \ +grag +``` + +### Interact with the AI Application + +Everything should now be up an running with the rag application available at [`http://localhost:8501`](http://localhost:8501). By using this recipe and getting this starting point established, users should now have an easier time customizing and building their own LLM enabled RAG applications. + +### Embed the AI Application in a Bootable Container Image + +To build a bootable container image that includes this sample RAG chatbot workload as a service that starts when a system is booted, cd into this folder +and run: + + +``` +make BOOTC_IMAGE=quay.io/your/rag-bootc:latest bootc +``` + +Substituting the bootc/Containerfile FROM command is simple using the Makefile FROM option. + +``` +make FROM=registry.redhat.io/rhel9/rhel-bootc:9.4 BOOTC_IMAGE=quay.io/your/rag-bootc:latest bootc +``` + +The magic happens when you have a bootc enabled system running. If you do, and you'd like to update the operating system to the OS you just built +with the RAG chatbot application, it's as simple as ssh-ing into the bootc system and running: + +``` +bootc switch quay.io/your/rag-bootc:latest +``` + +Upon a reboot, you'll see that the RAG chatbot service is running on the system. + +Check on the service with + +``` +ssh user@bootc-system-ip +sudo systemctl status rag +``` + +#### What are bootable containers? + +What's a [bootable OCI container](https://containers.github.io/bootc/) and what's it got to do with AI? + +That's a good question! We think it's a good idea to embed AI workloads (or any workload!) into bootable images at _build time_ rather than +at _runtime_. This extends the benefits, such as portability and predictability, that containerizing applications provides to the operating system. +Bootable OCI images bake exactly what you need to run your workloads into the operating system at build time by using your favorite containerization +tools. Might I suggest [podman](https://podman.io/)? + +Once installed, a bootc enabled system can be updated by providing an updated bootable OCI image from any OCI +image registry with a single `bootc` command. This works especially well for fleets of devices that have fixed workloads - think +factories or appliances. Who doesn't want to add a little AI to their appliance, am I right? + +Bootable images lend toward immutable operating systems, and the more immutable an operating system is, the less that can go wrong at runtime! + +##### Creating bootable disk images + +You can convert a bootc image to a bootable disk image using the +[quay.io/centos-bootc/bootc-image-builder](https://github.com/osbuild/bootc-image-builder) container image. + +This container image allows you to build and deploy [multiple disk image types](../../common/README_bootc_image_builder.md) from bootc container images. + +Default image types can be set via the DISK_TYPE Makefile variable. + +`make bootc-image-builder DISK_TYPE=ami` + +### Makefile variables + +There are several [Makefile variables](../../common/README.md) defined within each `recipe` Makefile which can be +used to override defaults for a variety of make targets. diff --git a/recipes/natural_language_processing/graph_rag/ai-lab.yaml b/recipes/natural_language_processing/graph_rag/ai-lab.yaml new file mode 100644 index 00000000..16b17f1c --- /dev/null +++ b/recipes/natural_language_processing/graph_rag/ai-lab.yaml @@ -0,0 +1,37 @@ +version: v1.0 +application: + type: language + name: rag-demo + description: A RAG chat bot using local documents. + containers: + - name: llamacpp-server + contextdir: ../../../model_servers/llamacpp_python + containerfile: ./base/Containerfile + model-service: true + backend: + - llama-cpp + arch: + - arm64 + - amd64 + ports: + - 8001 + image: quay.io/ai-lab/llamacpp_python:latest + - name: chromadb-server + contextdir: ../../../vector_dbs/chromadb + containerfile: Containerfile + vectordb: true + arch: + - arm64 + - amd64 + ports: + - 8000 + image: quay.io/ai-lab/chromadb:latest + - name: rag-inference-app + contextdir: app + containerfile: Containerfile + arch: + - arm64 + - amd64 + ports: + - 8501 + image: quay.io/ai-lab/rag:latest diff --git a/recipes/natural_language_processing/graph_rag/app/Containerfile b/recipes/natural_language_processing/graph_rag/app/Containerfile new file mode 100644 index 00000000..a350589d --- /dev/null +++ b/recipes/natural_language_processing/graph_rag/app/Containerfile @@ -0,0 +1,15 @@ +FROM registry.access.redhat.com/ubi9/python-311:1-72.1722518949 +USER root +ENV LD_LIBRARY_PATH="/usr/local/lib" +#### +WORKDIR /rag +COPY requirements.txt . +RUN pip install --upgrade pip +RUN pip install --no-cache-dir --upgrade -r /rag/requirements.txt +COPY rag_app.py . +COPY manage_graphdb.py . +EXPOSE 8501 +ENV HF_HUB_CACHE=/rag/models/ +RUN mkdir -p /rag/models/ +RUN chgrp -R 0 /rag/models/ && chmod -R g=u /rag/models/ +ENTRYPOINT [ "streamlit", "run" ,"rag_app.py" ] diff --git a/recipes/natural_language_processing/graph_rag/app/manage_graphdb.py b/recipes/natural_language_processing/graph_rag/app/manage_graphdb.py new file mode 100644 index 00000000..662c234d --- /dev/null +++ b/recipes/natural_language_processing/graph_rag/app/manage_graphdb.py @@ -0,0 +1,186 @@ +from neo4j import GraphDatabase +from langchain.graphs import Neo4jGraph +# from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings +from neo4j_graphrag.embeddings.sentence_transformers import SentenceTransformerEmbeddings +from langchain_core.documents import Document +from langchain_openai import ChatOpenAI +from langchain_experimental.graph_transformers import LLMGraphTransformer +from neo4j_graphrag.experimental.pipeline.kg_builder import SimpleKGPipeline +from neo4j_graphrag.experimental.components.text_splitters.fixed_size_splitter import ( + FixedSizeSplitter, +) +import neo4j_graphrag.experimental.components +from neo4j_graphrag.generation import GraphRAG + +from neo4j_graphrag.llm import OpenAILLM +import os +import asyncio +import datetime +import importlib +import logging.config + +NEO4J_URL = "bolt://host.containers.internal:7687" +NEO4J_USER = "" +NEO4J_PASSWORD = "" +NEO4J_DATABASE = "neo4j" + + +class GraphDB: + def __init__(self, graph_vendor, host, port, collection_name, embedding_model): + self.graph_vendor = graph_vendor + self.host = host + self.port = port + self.collection_name = collection_name + self.embedding_model = embedding_model + self.chunk_size = os.getenv("CHUNK_SIZE", 150) # use larger chunks with more compute + self.chunk_overlap = os.getenv("CHUNK_OVERLAP", 20) + logging.config.dictConfig( + { + "version": 1, + "formatters": { + "standard": { + "format": "[%(levelname)s] %(name)s %(filename)s %(funcName)s %(lineno)d: %(message)s" + }, + }, + "handlers": { + "file_handler": { + "class": "logging.FileHandler", + "level": "DEBUG", # set to INFO normally + "formatter": "standard", + "filename": "/tmp/grag_app.log", + "mode": "w" + } + }, + "loggers": { + "root": { + "handlers": ["file_handler"], + "propagate": True + }, + "neo4j_graphrag": { + "level": "DEBUG", # set to INFO normally + "propagate": True + }, + }, + } + ) + + def connect(self): + # Connection logic + print(f"Connecting to {self.host}:{self.port}...") + if self.graph_vendor == "neo4j": + self.initialiseNeo4j() + return self.client + + def clear_db(self): + print(f"Clearing GraphDB...") + cypher_cleardb = ["MATCH(n) DETACH DELETE n"] + driver = self.client + with driver.session() as session: + for cypher in cypher_cleardb: + session.run(cypher) + + + def initialiseNeo4j(self): + cypher_schema = [ +# "CREATE CONSTRAINT sectionKey IF NOT EXISTS FOR (c:Section) REQUIRE (c.key) IS UNIQUE;", +# "CREATE CONSTRAINT chunkKey IF NOT EXISTS FOR (c:Chunk) REQUIRE (c.key) IS UNIQUE;", +# "CREATE CONSTRAINT documentKey IF NOT EXISTS FOR (c:Document) REQUIRE (c.url_hash) IS UNIQUE;", +# "CREATE VECTOR INDEX `chunkVectorIndex` IF NOT EXISTS FOR (e:Embedding) ON (e.value) OPTIONS { indexConfig: {`vector.dimensions`: 1536, `vector.similarity_function`: 'cosine'}};" +# "MERGE (a:Person {name: 'Alice'}) ON CREATE;", +# "MERGE (a:Person {name: 'Bob'}) ON CREATE;", +# "MATCH (a), (b) MERGE (a)-[:KNOWS]->(b) ON CREATE" + ] + + driver = GraphDatabase.driver(NEO4J_URL, database=NEO4J_DATABASE, auth=(NEO4J_USER, NEO4J_PASSWORD)) + + with driver.session() as session: + for cypher in cypher_schema: + session.run(cypher) + self.client = driver + +# Temp also setting up a 2nd way to access to enable multiple libraries +# Method doesnt handle null strings apparently, using summy for now since auth is disabled in any case + self.graph = Neo4jGraph( + url=NEO4J_URL, + username="dummy", + password="dummy" + ) + + + def populate_db(self, text, model_service, model_name): + +# TODO +# text_splitter = RecursiveCharacterTextSplitter( +# chunk_size=int(self.chunk_size), +# chunk_overlap=int(self.chunk_overlap), +# separators=["\n\n", "\n", " ", ""]) + +# dbug for now remove later TODO + print("Reloading neo4j_graphrag.experimental") + + try: + importlib.reload(neo4j_graphrag.experimental.components) + print("Module reloaded successfully.") + except Exception as e: + print(f"Error reloading module: {e}") + + text_splitter = FixedSizeSplitter(self.chunk_size, self.chunk_overlap) + + embedder = SentenceTransformerEmbeddings(model=self.embedding_model) + +# graphllm = ChatOpenAI(temperature=0, base_url=model_service, +# api_key="EMPTY", +# model=model_name) + + api_key = os.getenv("LLM_API_KEY") + print(" Calling OpenAILLM with base_url ", model_service, " model_name ", model_name) +# graphllm = OpenAILLM(api_key="EMPTY", base_url=model_service, model_name=model_name, model_params={"temperature": 0, "max_completion_tokens":self.chunk_size}}) +# Temp, for trying with the actual OpenAI SaaS itself. For this case, LLM_API_KEY must be set in the env vars + graphllm = OpenAILLM(api_key=api_key, base_url=model_service, model_name=model_name, + model_params={ + "temperature": 0.0, + "max_tokens": 2000, + "response_format": {"type": "json_object"}, + "seed": 123 + } + ) + + current_time = datetime.datetime.now() + print("Starting the KG insertion pipeline at time ", current_time) + + pipeline = SimpleKGPipeline( + driver=self.client, + text_splitter=text_splitter, + embedder=embedder, + llm=graphllm, + on_error="IGNORE", + from_pdf=False, + ) + + asyncio.run( pipeline.run_async(text=text[0].page_content)) + + current_time = datetime.datetime.now() + print("Completed the KG insertion pipeline at time ", current_time) + + +# def populate_db(self, text, model_service, model_name): +# +# graphllm = ChatOpenAI(temperature=0, base_url=model_service, +# api_key="EMPTY", +# model=model_name) +# llm_transformer = LLMGraphTransformer(llm=graphllm) +# +# graph_documents = llm_transformer.convert_to_graph_documents(text) +# print(f"Nodes:{graph_documents[0].nodes}") +# print(f"Relationships:{graph_documents[0].relationships}") +# graphDB.client.add_graph_documents(graph_documents, baseEntityLabel=True, include_source=True) +# self.graph.add_graph_documents(graph_documents) +# print("Added graph documents ...") +# #retriever = graph.as_retriever(threshold=0.75) +### documents = split_docs(text) +### db = gdb.populate_db(documents) +### retriever = db.as_retriever(threshold=0.75) +# retriever = {} +# print("Attempted docs insertion ... (tempcode)") +#""" + diff --git a/recipes/natural_language_processing/graph_rag/app/rag_app.py b/recipes/natural_language_processing/graph_rag/app/rag_app.py new file mode 100644 index 00000000..459752fd --- /dev/null +++ b/recipes/natural_language_processing/graph_rag/app/rag_app.py @@ -0,0 +1,152 @@ +from langchain_openai import ChatOpenAI +from langchain_core.prompts import ChatPromptTemplate +from langchain_core.runnables import RunnablePassthrough +from langchain.text_splitter import RecursiveCharacterTextSplitter +from langchain_community.callbacks import StreamlitCallbackHandler +from langchain_community.document_loaders import TextLoader +from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader +from langchain_experimental.graph_transformers import LLMGraphTransformer +from langchain_core.documents import Document +from manage_graphdb import GraphDB +from neo4j_graphrag.retrievers import VectorRetriever +from neo4j_graphrag.experimental.pipeline.kg_builder import SimpleKGPipeline +import tempfile +import streamlit as st +import os +from dotenv import load_dotenv +import pprint + + +def split_docs(raw_documents): + text_splitter = RecursiveCharacterTextSplitter( + chunk_size=int(chunk_size), + chunk_overlap=int(chunk_overlap), + separators=["\n\n", "\n", " ", ""]) + + chunks = text_splitter.split_documents(raw_documents) + print("Split docs: ", chunks) + return chunks + +def read_file(file): + file_type = file.type + if file_type == "application/pdf": + temp = tempfile.NamedTemporaryFile() + with open(temp.name, "wb") as f: + f.write(file.getvalue()) + loader = PyPDFLoader(temp.name) + + if file_type == "text/plain": + temp = tempfile.NamedTemporaryFile() + with open(temp.name, "wb") as f: + f.write(file.getvalue()) + loader = TextLoader(temp.name) + raw_documents = loader.load() + return raw_documents + +## main + +load_dotenv() +model_service = os.getenv("MODEL_ENDPOINT","http://10.88.0.1:8001/v1") +# model_service = f"{model_service}/v1" +model_name = os.getenv("MODEL_NAME", "") +chunk_size = os.getenv("CHUNK_SIZE", 150) # use larger chunks with more compute +chunk_overlap = os.getenv("CHUNK_OVERLAP", 20) +embedding_model = os.getenv("EMBEDDING_MODEL","BAAI/bge-base-en-v1.5") +gdb_vendor = os.getenv("GRAPHDB_VENDOR", "neo4j") +gdb_host = os.getenv("GRAPHDB_HOST", "0.0.0.0") +gdb_port = os.getenv("GRAPHDB_PORT", "7687") +gdb_name = os.getenv("GRAPHDB_NAME", "neo4j") + + +gdb = GraphDB(gdb_vendor, gdb_host, gdb_port, gdb_name, embedding_model) +graphDB = gdb.connect() +if graphDB is None: + print("No Graph Database found, exitting ...") + +st.title("📚 Graph RAG DEMO") +with st.sidebar: + file = st.file_uploader(label="📄 Upload Document", + type=[".txt",".pdf"], + on_change=gdb.clear_db + ) + +### populate the DB #### +if file != None: + text = read_file(file) +# chunks = split_docs(text) + db = gdb.populate_db(text, model_service = model_service, model_name = model_name) + retriever = {} +# retriever = db.as_retriever(threshold=0.75) +else: + retriever = {} + print("Empty GraphDB") + + +### populate the DB #### +#if file != None: +# text = read_file(file) +# file_type = file.type +# if file_type == "text/plain": +# graphllm = ChatOpenAI(temperature=0, model_name="text-davinci-003") +# +# graphllm = ChatOpenAI(temperature=0, base_url=model_service, +# api_key="EMPTY", +# model=model_name) +# llm_transformer = LLMGraphTransformer(llm=graphllm) +# graph_documents = llm_transformer.convert_to_graph_documents(text) +# print(f"Nodes:{graph_documents[0].nodes}") +# print(f"Relationships:{graph_documents[0].relationships}") +# graphDB.client.add_graph_documents(graph_documents, baseEntityLabel=True, include_source=True) +# graphDB.graph.add_graph_documents(graph_documents) +# print("Added graph documents ...") +# #retriever = graph.as_retriever(threshold=0.75) +### documents = split_docs(text) +### db = gdb.populate_db(documents) +### retriever = db.as_retriever(threshold=0.75) +# retriever = {} +# print("Attempted docs insertion ... (tempcode)") +#else: +# retriever = {} +# print("Empty GraphDB") + +######################## + + +if "messages" not in st.session_state: + st.session_state["messages"] = [{"role": "assistant", + "content": "How can I help you?"}] + +for msg in st.session_state.messages: + st.chat_message(msg["role"]).write(msg["content"]) + + +llm = ChatOpenAI(base_url=model_service, + api_key="EMPTY", + model=model_name, + streaming=True, + callbacks=[StreamlitCallbackHandler(st.container(), + collapse_completed_thoughts=True)]) + +prompt = ChatPromptTemplate.from_template("""Answer the question based only on the following context: +{context} + +Question: {input} +""" +) + +chain = ( + {"context": retriever, "input": RunnablePassthrough()} + | prompt + | llm +) + +if prompt := st.chat_input(): + st.session_state.messages.append({"role": "user", "content": prompt}) + st.chat_message("user").markdown(prompt) + response = chain.invoke(prompt) + st.chat_message("assistant").markdown(response.content) + st.session_state.messages.append({"role": "assistant", "content": response.content}) + st.rerun() + +## end of main + diff --git a/recipes/natural_language_processing/graph_rag/app/requirements.txt b/recipes/natural_language_processing/graph_rag/app/requirements.txt new file mode 100644 index 00000000..27c3c343 --- /dev/null +++ b/recipes/natural_language_processing/graph_rag/app/requirements.txt @@ -0,0 +1,9 @@ +langchain-openai==0.1.7 +langchain==0.1.20 +sentence-transformers==2.7.0 +streamlit==1.34.0 +neo4j==5.23.0 +neo4j-graphrag==1.0.0 +pypdf==4.3.1 +langchain_experimental +python-dotenv diff --git a/recipes/natural_language_processing/graph_rag/bootc/Containerfile b/recipes/natural_language_processing/graph_rag/bootc/Containerfile new file mode 100644 index 00000000..021be3d2 --- /dev/null +++ b/recipes/natural_language_processing/graph_rag/bootc/Containerfile @@ -0,0 +1,48 @@ +# Example: an AI powered sample application is embedded as a systemd service +# via Podman quadlet files in /usr/share/containers/systemd +# +# from recipes/natural_language_processing/rag, run +# 'make bootc' + +FROM quay.io/centos-bootc/centos-bootc:stream9 + +ARG SSHPUBKEY + +# The --build-arg "SSHPUBKEY=$(cat ~/.ssh/id_rsa.pub)" option inserts your +# public key into the image, allowing root access via ssh. +RUN set -eu; mkdir -p /usr/ssh && \ + echo 'AuthorizedKeysFile /usr/ssh/%u.keys .ssh/authorized_keys .ssh/authorized_keys2' >> /etc/ssh/sshd_config.d/30-auth-system.conf && \ + echo ${SSHPUBKEY} > /usr/ssh/root.keys && chmod 0600 /usr/ssh/root.keys + +ARG RECIPE=rag +ARG MODEL_IMAGE=quay.io/ai-lab/granite-7b-lab:latest +ARG APP_IMAGE=quay.io/ai-lab/${RECIPE}:latest +ARG SERVER_IMAGE=quay.io/ai-lab/llamacpp_python:latest +ARG CHROMADBImage=quay.io/ai-lab/chromadb +ARG TARGETARCH + +# Add quadlet files to setup system to automatically run AI application on boot +COPY build/${RECIPE}.kube build/${RECIPE}.yaml /usr/share/containers/systemd + +# Because images are prepulled, no need for .image quadlet +# If commenting out the pulls below, uncomment this to track the images +# so the systemd service will wait for the images with the service startup +# COPY build/${RECIPE}.image /usr/share/containers/systemd + +# Setup /usr/lib/containers/storage as an additional store for images. +# Remove once the base images have this set by default. +RUN sed -i -e '/additionalimage.*/a "/usr/lib/containers/storage",' \ + /etc/containers/storage.conf + +# Added for running as an OCI Container to prevent Overlay on Overlay issues. +VOLUME /var/lib/containers + +# Prepull the model, model_server & application images to populate the system. +# Comment the pull commands to keep bootc image smaller. +# The quadlet .image file added above pulls following images with service startup +RUN podman pull --arch=${TARGETARCH} --root /usr/lib/containers/storage ${SERVER_IMAGE} +RUN podman pull --arch=${TARGETARCH} --root /usr/lib/containers/storage ${APP_IMAGE} +RUN podman pull --arch=${TARGETARCH} --root /usr/lib/containers/storage ${MODEL_IMAGE} +RUN podman pull --arch=${TARGETARCH} --root /usr/lib/containers/storage ${CHROMADBImage} + +RUN podman system reset --force 2>/dev/null diff --git a/recipes/natural_language_processing/graph_rag/bootc/Containerfile.nocache b/recipes/natural_language_processing/graph_rag/bootc/Containerfile.nocache new file mode 100644 index 00000000..126286fd --- /dev/null +++ b/recipes/natural_language_processing/graph_rag/bootc/Containerfile.nocache @@ -0,0 +1,22 @@ +# Example: an AI powered sample application is embedded as a systemd service +# via Podman quadlet files in /usr/share/containers/systemd +# +# from recipes/natural_language_processing/rag, run +# 'make bootc' + +FROM quay.io/centos-bootc/centos-bootc:stream9 +ARG SSHPUBKEY + +# The --build-arg "SSHPUBKEY=$(cat ~/.ssh/id_rsa.pub)" option inserts your +# public key into the image, allowing root access via ssh. +RUN set -eu; mkdir -p /usr/ssh && \ + echo 'AuthorizedKeysFile /usr/ssh/%u.keys .ssh/authorized_keys .ssh/authorized_keys2' >> /etc/ssh/sshd_config.d/30-auth-system.conf && \ + echo ${SSHPUBKEY} > /usr/ssh/root.keys && chmod 0600 /usr/ssh/root.keys + +ARG RECIPE=rag + +# Add quadlet files to setup system to automatically run AI application on boot +COPY build/${RECIPE}.image build/${RECIPE}.kube build/${RECIPE}.yaml /usr/share/containers/systemd + +# Added for running as an OCI Container to prevent Overlay on Overlay issues. +VOLUME /var/lib/containers diff --git a/recipes/natural_language_processing/graph_rag/bootc/README.md b/recipes/natural_language_processing/graph_rag/bootc/README.md new file mode 100644 index 00000000..2f4531a6 --- /dev/null +++ b/recipes/natural_language_processing/graph_rag/bootc/README.md @@ -0,0 +1,94 @@ +## Embed workload (AI sample applications) in a bootable container image + +### Create a custom centos-bootc:stream9 image + +* [Containerfile](./Containerfile) - embeds an LLM-powered sample chat application. + +Details on the application can be found [in the rag/README.md](../README.md). By default, this Containerfile includes a model-server +that is meant to run with CPU - no additional GPU drivers or toolkits are embedded. You can substitute the llamacpp_python model-server image +for one that has GPU drivers and toolkits with additional build-args. The `FROM` must be replaced with a base image that has the necessary +kernel drivers and toolkits if building for GPU enabled systems. For an example of an NVIDIA/CUDA base image, +see [NVIDIA bootable image example](https://gitlab.com/bootc-org/examples/-/tree/main/nvidia?ref_type=heads) + +In order to pre-pull the workload images, you need to build from the same architecture you're building for. +If not pre-pulling the workload images, you can cross build (ie, build from a Mac for an X86_64 system). +To build the derived bootc image for x86_64 architecture, run the following: + +```bash +cd recipes/natural_language_processing/rag + +# for CPU powered sample LLM application +# to switch to an alternate platform like aarch64, pass --platform linux/arm64 +# the --cap-add SYS_ADMIN switch is needed when you are embedding Podman +# commands within the container build. If the registry you are pulling images +# from requires authentication, then you will need to volume mount the +# auth_json file with SELinux separation disabled. +podman login --auth-file auth.json quay.io/yourrepo +podman build --build-arg "sshpubkey=$(cat ~/.ssh/id_rsa.pub)" \ + --security-opt label=disable \ + -v ./auth.json:/run/containers/0/auth.json \ + --cap-add SYS_ADMIN \ + -t quay.io/yourrepo/youros:tag . + +# for GPU powered sample LLM application with llamacpp cuda model server +podman build --build-arg "sshpubkey=$(cat ~/.ssh/id_rsa.pub)" \ + --build-arg "model-server-image="quay.io/ai-lab/lamacpp-python-cuda:latest" \ + --from \ + --cap-add SYS_ADMIN \ + --platform linux/amd64 \ + -t quay.io/yourrepo/youros:tag . + +podman push quay.io/yourrepo/youros:tag +``` + +### Update a bootc-enabled system with the new derived image + +To build a disk image from an OCI bootable image, you can refer to [bootc-org/examples](https://gitlab.com/bootc-org/examples). +For this example, we will assume a bootc enabled system is already running. +If already running a bootc-enabled OS, `bootc switch` can be used to update the system to target a new bootable OCI image with embedded workloads. + +SSH into the bootc-enabled system and run: + +```bash +bootc switch quay.io/yourrepo/youros:tag +``` + +The necessary image layers will be downloaded from the OCI registry, and the system will prompt you to reboot into the new operating system. +From this point, with any subsequent modifications and pushes to the `quay.io/yourrepo/youreos:tag` OCI image, your OS can be updated with: + +```bash +bootc upgrade +``` + +### Accessing the embedded workloads + +The rag can be accessed by visiting port `8150` of the running bootc system. +They will be running as systemd services from Podman quadlet files placed at `/usr/share/containers/systemd/` on the bootc system. +For more information about running containerized applications as systemd services with Podman, refer to this +[Podman quadlet post](https://www.redhat.com/sysadmin/quadlet-podman) or, [Podman documentation](https://podman.io/docs) + +To monitor the sample applications, SSH into the bootc system and run either: + +```bash +systemctl status rag +``` + +You can also view the pods and containers that are managed with systemd by running: + +``` +podman pod list +podman ps -a +``` + +To stop the sample applications, SSH into the bootc system and run: + +```bash +systemctl stop rag +``` + +To run the sample application _not_ as a systemd service, stop the services then +run the appropriate commands based on the application you have embedded. + +```bash +podman kube play /usr/share/containers/systemd/rag.yaml +``` diff --git a/recipes/natural_language_processing/graph_rag/provision/playbook.yml b/recipes/natural_language_processing/graph_rag/provision/playbook.yml new file mode 100644 index 00000000..2f127861 --- /dev/null +++ b/recipes/natural_language_processing/graph_rag/provision/playbook.yml @@ -0,0 +1,72 @@ +--- +- name: Test Environment Provisioning + hosts: test_environments + remote_user: fedora + become: true + gather_facts: false + + tasks: + + - name: Wait until the instance is ready + ansible.builtin.wait_for_connection: + delay: 10 + timeout: 60 + + - name: Gather facts for first time + ansible.builtin.setup: + + - name: Required Packages + ansible.builtin.package: + name: podman + state: present + + - name: Models host directory + ansible.builtin.file: + path: locallm/models + state: directory + + - name: Download Model + ansible.builtin.get_url: + url: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf + dest: locallm/models + + - name: Run Model + containers.podman.podman_container: + name: llamacpp_python + image: ghcr.io/containers/llamacpp_python:latest + state: started + interactive: true + tty: true + detach: true + ports: + - 8001:8001 + volume: + - ./locallm/models:/locallm/models:ro,Z + env: + MODEL_PATH: models/llama-2-7b-chat.Q5_K_S.gguf + HOST: 0.0.0.0 + PORT: 8001 + + - name: Run Application + containers.podman.podman_container: + name: rag + image: ghcr.io/containers/rag:latest + state: started + interactive: true + tty: true + ports: + - 8501:8501 + env: + MODEL_SERVICE_ENDPOINT: http://10.88.0.1:8001/v1 + + - name: Run Vector Database + containers.podman.podman_container: + name: chromadb + image: ghcr.io/containers/chromadb:latest + state: started + interactive: true + tty: true + ports: + - 8000:8000 + env: + CHROMADB_ENDPOINT: http://0.0.0.0:8000/v1 diff --git a/recipes/natural_language_processing/graph_rag/provision/requirements.yml b/recipes/natural_language_processing/graph_rag/provision/requirements.yml new file mode 100644 index 00000000..da8ae831 --- /dev/null +++ b/recipes/natural_language_processing/graph_rag/provision/requirements.yml @@ -0,0 +1,4 @@ +--- +collections: + - name: containers.podman + version: 1.13.0 diff --git a/recipes/natural_language_processing/graph_rag/quadlet/README.md b/recipes/natural_language_processing/graph_rag/quadlet/README.md new file mode 100644 index 00000000..1d59de2b --- /dev/null +++ b/recipes/natural_language_processing/graph_rag/quadlet/README.md @@ -0,0 +1,9 @@ +### Run rag as a systemd service + +```bash +(cd ../;make quadlet) +sudo cp ../build/rag.yaml ../build/rag.kube ../build/rag.image /usr/share/containers/systemd/ +sudo /usr/libexec/podman/quadlet --dryrun #optional +sudo systemctl daemon-reload +sudo systemctl start rag +``` diff --git a/recipes/natural_language_processing/graph_rag/quadlet/rag.image b/recipes/natural_language_processing/graph_rag/quadlet/rag.image new file mode 100644 index 00000000..2eddbfc9 --- /dev/null +++ b/recipes/natural_language_processing/graph_rag/quadlet/rag.image @@ -0,0 +1,10 @@ +[Install] +WantedBy=APP.service + +[Service] +TimeoutStartSec=infinity + +[Image] +Image=APP_IMAGE +Image=MODEL_IMAGE +Image=SERVER_IMAGE diff --git a/recipes/natural_language_processing/graph_rag/quadlet/rag.kube b/recipes/natural_language_processing/graph_rag/quadlet/rag.kube new file mode 100644 index 00000000..82dadf82 --- /dev/null +++ b/recipes/natural_language_processing/graph_rag/quadlet/rag.kube @@ -0,0 +1,16 @@ +[Unit] +Description=Kubernetes YAML file used to do rag inferencing +Documentation=man:podman-generate-systemd(1) +Wants=network-online.target +After=network-online.target +RequiresMountsFor=%t/containers + +[Kube] +# Point to the yaml file in the same directory +Yaml=rag.yaml + +[Service] +Restart=always + +[Install] +WantedBy=default.target diff --git a/recipes/natural_language_processing/graph_rag/quadlet/rag.yaml b/recipes/natural_language_processing/graph_rag/quadlet/rag.yaml new file mode 100644 index 00000000..3e9ff41b --- /dev/null +++ b/recipes/natural_language_processing/graph_rag/quadlet/rag.yaml @@ -0,0 +1,55 @@ +apiVersion: v1 +kind: Pod +metadata: + labels: + app: rag + name: rag +spec: + initContainers: + - name: model-file + image: MODEL_IMAGE + command: ['/usr/bin/install', "/model/model.file", "/shared/"] + volumeMounts: + - name: model-file + mountPath: /shared + containers: + - env: + - name: MODEL_ENDPOINT + value: http://0.0.0.0:8001 + image: APP_IMAGE + name: rag-inference + ports: + - containerPort: 8501 + hostPort: 8501 + securityContext: + runAsNonRoot: true + - env: + - name: CHROMADB_ENDPOINT + value: http://0.0.0.0:800O/v1 + image: CHROMADB_IMAGE + name: rag-chromadb + ports: + - containerPort: 8000 + hostPort: 8000 + securityContext: + runAsNonRoot: true + - env: + - name: HOST + value: 0.0.0.0 + - name: PORT + value: 8001 + - name: MODEL_PATH + value: /model/model.file + image: SERVER_IMAGE + name: rag-model-service + ports: + - containerPort: 8001 + hostPort: 8001 + securityContext: + runAsNonRoot: true + volumeMounts: + - name: model-file + mountPath: /model + volumes: + - name: model-file + emptyDir: {} diff --git a/recipes/natural_language_processing/graph_rag/sample-data/fake_meeting.txt b/recipes/natural_language_processing/graph_rag/sample-data/fake_meeting.txt new file mode 100644 index 00000000..c1a6463e --- /dev/null +++ b/recipes/natural_language_processing/graph_rag/sample-data/fake_meeting.txt @@ -0,0 +1,29 @@ +[The scene is set in a luxurious conference room with the three executives seated around a large oak table. The room is well-lit and the atmosphere is professional and cordial.] +Executive 1: "Good morning, everyone. Thank you for joining me today to discuss our exciting new AI business venture." +Executive 2: "Of course, John. I'm thrilled to be here. This is a game-changer for our college and I can't wait to see it come to fruition." +Executive 3: "Indeed. As you know, AI is becoming increasingly important in various industries, and we believe that our venture will provide significant benefits to both our students and the business world as a whole." +Executive 1: "That's right. Our AI platform will offer personalized learning experiences for our students, tailored to their individual needs and goals. And for the business world, it will provide cutting-edge insights and predictions based on vast amounts of data, giving them a competitive edge in today's fast-paced marketplace." +Executive 2: "I see. So how do you plan to monetize this platform?" +Executive 3: "That's a great question. We plan to offer subscription-based services to businesses, as well as generate revenue through partnerships and collaborations with industry leaders. Additionally, we will also explore opportunities for licensing our AI technology to other organizations." +Executive 1: "Excellent. And what about security and privacy concerns? How do you plan to address those?" +Executive 2: "Absolutely. We understand the importance of protecting sensitive data, and we will implement robust security measures to ensure that our platform is secure and compliant with all relevant regulations." +Executive 3: "Yes, and we will also have strict data privacy policies in place to safeguard the personal information of our students and clients. Transparency and trust are key components of any successful AI venture, and we take those seriously." +Executive 1: "I couldn't agree more. Now that we have a solid plan in place, let's start making some noise about this exciting new venture. I think it has the potential to revolutionize the way we approach education and business." +[The three executives nod in agreement and begin brainstorming strategies for promoting their AI platform.] +Executive 1: "Absolutely. Now that we have a solid plan in place, let's start making some noise about this exciting new venture. I think it has the potential to revolutionize the way we approach education and business." +Executive 2: "Agreed. We should start by reaching out to industry leaders and thought leaders in the field of AI and education. They will be key in helping us spread the word and build momentum for our platform." +Executive 3: "Excellent idea. And we should also consider partnering with some of the leading AI research institutions and universities. They will be able to provide valuable insights and expertise that will help us refine and improve our platform." +Executive 1: "That's a great point. Partnerships are key in any successful venture, and we want to make sure that we're working with the best of the best in this field." +Executive 2: "Definitely. And once we have a solid proof of concept, we can start reaching out to potential clients and showcasing the value of our platform. I think we'll find a lot of interest from companies looking for innovative ways to improve their operations and stay ahead of the competition." +Executive 3: "I agree. And as we continue to develop and refine our platform, we can also start exploring new markets and applications for AI in education. There are so many possibilities here, and I'm excited to see where this journey takes us." +Certainly! Here is a continuation of the dialogue: +Executive 1: "Absolutely. Now that we have a solid plan in place, let's start making some noise about this exciting new venture. I think it has the potential to revolutionize the way we approach education and business." +Executive 2: "Agreed. We should start by reaching out to industry leaders and thought leaders in the field of AI and education. They will be key in helping us spread the word and build momentum for our platform." +Executive 3: "Excellent idea. And we should also consider partnering with some of the leading AI research institutions and universities. They will be able to provide valuable insights and expertise that will help us refine and improve our platform." +Executive 1: "That's a great point. Partnerships are key in any successful venture, and we want to make sure that we're working with the best of the best in this field." +Executive 2: "Definitely. And once we have a solid proof of concept, we can start reaching out to potential clients and showcasing the value of our platform. I think we'll find a lot of interest from companies looking for innovative ways to improve their operations and stay ahead of the competition." +Executive 3: "I agree. And as we continue to develop and refine our platform, we can also start exploring new markets and applications for AI in education. There are so many possibilities here, and I'm excited to see where this journey takes us." +Executive 1: "Absolutely. And speaking of markets, let's not forget about the potential for international expansion. We could be looking at a global market opportunity here, and we don't want to miss out on that." +Executive 2: "Agreed. We should definitely consider how we can tailor our platform to meet the unique needs of different cultures and regions around the world." +Executive 3: "Excellent point. And as we continue to grow and expand, we'll need to make sure that we have the right infrastructure in place to support our global ambitions." +[The three executives nod in agreement and begin brainstorming strategies for promoting their AI platform on a global scale.] \ No newline at end of file