redhat-performance
diff --git a/‎.dockerignore‎
Lines changed: 1 addition & 1 deletion b/‎.dockerignore‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎Dockerfile‎
Lines changed: 1 addition & 0 deletions b/‎Dockerfile‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎Makefile‎
Lines changed: 14 additions & 2 deletions b/‎Makefile‎
Lines changed: 14 additions & 2 deletions
diff --git a/‎README.md‎
Lines changed: 48 additions & 0 deletions b/‎README.md‎
Lines changed: 48 additions & 0 deletions
diff --git a/‎bugzooka/analysis/prompts.py‎
Lines changed: 14 additions & 0 deletions b/‎bugzooka/analysis/prompts.py‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎bugzooka/integrations/rag_client_util.py‎
Lines changed: 58 additions & 0 deletions b/‎bugzooka/integrations/rag_client_util.py‎
Lines changed: 58 additions & 0 deletions
diff --git a/‎bugzooka/integrations/slack_fetcher.py‎
Lines changed: 53 additions & 3 deletions b/‎bugzooka/integrations/slack_fetcher.py‎
Lines changed: 53 additions & 3 deletions
diff --git a/‎kustomize/configmap-prompts.yaml‎ ‎kustomize/base/configmap-prompts.yaml‎kustomize/configmap-prompts.yaml renamed to kustomize/base/configmap-prompts.yaml b/‎kustomize/configmap-prompts.yaml‎ ‎kustomize/base/configmap-prompts.yaml‎kustomize/configmap-prompts.yaml renamed to kustomize/base/configmap-prompts.yaml
diff --git a/‎kustomize/deploymentconfig.yaml‎ ‎kustomize/base/deploymentconfig.yaml‎kustomize/deploymentconfig.yaml renamed to kustomize/base/deploymentconfig.yaml
Lines changed: 4 additions & 4 deletions b/‎kustomize/deploymentconfig.yaml‎ ‎kustomize/base/deploymentconfig.yaml‎kustomize/deploymentconfig.yaml renamed to kustomize/base/deploymentconfig.yaml
Lines changed: 4 additions & 4 deletions
diff --git a/‎kustomize/imagestream.yaml‎ ‎kustomize/base/imagestream.yaml‎kustomize/imagestream.yaml renamed to kustomize/base/imagestream.yaml b/‎kustomize/imagestream.yaml‎ ‎kustomize/base/imagestream.yaml‎kustomize/imagestream.yaml renamed to kustomize/base/imagestream.yaml
@@ -72,4 +72,4 @@ logs/
 *.tmp
 *.temp
 .tmp/
-.temp/ 
+.temp/
@@ -50,6 +50,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
     curl \
     jq \
     ca-certificates \
+    vim \
     && apt-get clean \
     && rm -rf /var/lib/apt/lists/*
 
 
@@ -1,4 +1,4 @@
-.PHONY: help install dev-install test lint format check clean
+.PHONY: help install dev-install test lint format check clean deploy
 
 help:  ## Show this help
 	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}'
@@ -31,7 +31,7 @@ clean:  ## Clean cache and temporary files
 	rm -rf .ruff_cache/
 	rm -rf .mypy_cache/
 
-run:  ## Run BugZooka (requires --product and --ci arguments)
+run:  ## Run BugZooka (if RAG_IMAGE set in env/.env, apply sidecar overlay first)
 	PYTHONPATH=. python bugzooka/entrypoint.py $(ARGS)
 
 podman-build:  ## Build podman image
@@ -46,3 +46,15 @@ podman-run:  ## Run podman container
 		-e GEMINI_VERIFY_SSL=false \
 		-v ./.env:/app/.env:Z \
 		bugzooka:latest
+
+deploy:  ## Deploy to OpenShift (uses overlays/rag if RAG_IMAGE is set in .env)
+	@set -a; \
+	if [ -f .env ]; then . ./.env; fi; \
+	set +a; \
+	if [ -n "$$RAG_IMAGE" ]; then \
+		echo "Deploying with RAG overlay (RAG_IMAGE=$$RAG_IMAGE)"; \
+		kustomize build --load-restrictor=LoadRestrictionsNone ./kustomize/overlays/rag | envsubst | oc apply -f -; \
+	else \
+		echo "Deploying base kustomize (no RAG_IMAGE)"; \
+		kustomize build --load-restrictor=LoadRestrictionsNone ./kustomize/base | envsubst | oc apply -f -; \
+	fi
@@ -144,6 +144,54 @@ Along with secrets, prompts are configurable using a `prompts.json` in the root
 }
 ```
 
+
+### **Historical Failure Summary (summarize)**
+
+- What it does:
+  - Scans channel history within the specified lookback window
+  - Counts total jobs and failures, groups failures by type
+  - Optionally breaks down by OpenShift version and includes representative messages
+
+- How to run:
+  - Ensure BugZooka is running
+  - In Slack:
+    - `summarize 20m`
+    - `summarize 7d verbose`
+
+- Behavior:
+  - All summary output is threaded under that parent to avoid channel noise
+  - Large sections are chunked to fit Slack limits
+
+- Notes:
+  - Only CI job notifications that clearly indicate a failure are included
+  - No persistent state; summaries read from channel history at request time
+
+### **RAG-Augmented Analysis (Optional)**
+BugZooka can optionally enrich its “Implications to understand” output with Retrieval-Augmented Generation (RAG) context when a local vector store is available.
+
+- What it does:
+  - Detects RAG data under `RAG_DB_PATH` (default: `/rag`).
+  - Retrieves top-k relevant chunks via the local FAISS index.
+  - Uses `RAG_AWARE_PROMPT` to ask the inference API for context-aware insights.
+  - Appends a “RAG-Informed Insights” section beneath the standard implications.
+
+- Enable via deployment overlay:
+  - Build your BYOK RAG image following the BYOK tooling HOWTO and set it as `RAG_IMAGE` in your `.env`:
+    - [BYOK Tooling HOWTO](https://github.com/openshift/lightspeed-rag-content/tree/main/byok#byok-tooling-howto)
+  - Run `make deploy`. The Makefile will apply the RAG overlay and mount a shared volume at `/rag`.
+  - Note: The BYOK image is intended to be used as an initContainer to prepare the vector store. In this repository, the provided overlay runs it as a sidecar; both patterns are supported for preparing/serving `/rag`.
+  - For local testing without a cluster, place your RAG content under `/rag`; BugZooka will auto-detect it.
+
+- Behavior and fallback:
+  - If no RAG artifacts are detected, analysis proceeds unchanged.
+
+- Files of interest:
+  - `bugzooka/integrations/rag_client_util.py`: retrieves top-k chunks from FAISS
+  - `bugzooka/analysis/prompts.py`: `RAG_AWARE_PROMPT`
+  - `bugzooka/integrations/slack_fetcher.py`: integrates RAG into implications when available
+  - `kustomize/overlays/rag/*`: RAG sidecar overlay and volume wiring
+
+
 ### **MCP Servers**
 MCP servers can be integrated by adding a simple configuration in `mcp_config.json` file in the root directory.
 
 
@@ -37,3 +37,17 @@
     "- <Code fixes or configuration updates>\n"
     "- <Relevant logs, metrics, or monitoring tools>",
 }
+
+RAG_AWARE_PROMPT = {
+    "system": "You are an AI assistant specializing in analyzing logs to detect failures. "
+    "When provided with additional contextual knowledge (from RAG), use it to refine your analysis "
+    "and improve accuracy of diagnostics.",
+    "user": (
+        "You have access to external knowledge retrieved from a vector store (RAG). "
+        "Use this RAG context to better interpret the following log data.\n\n"
+        "RAG Context:\n{rag_context}\n\n"
+        "Log Data:\n{error_list}\n\n"
+        "Using both, detect anomalies, identify key failures, and summarize the most critical issues."
+    ),
+    "assistant": "Here is a context-aware analysis of the most relevant failures:",
+}
@@ -0,0 +1,58 @@
+import os
+from typing import Optional
+
+from dotenv import load_dotenv
+from llama_index.core import Settings, load_index_from_storage
+from llama_index.core.llms.utils import resolve_llm
+from llama_index.core.storage.storage_context import StorageContext
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+from llama_index.vector_stores.faiss import FaissVectorStore
+
+# Fix cache permission issue for non-root containers
+os.environ.setdefault("HF_HOME", "/tmp/.cache")
+os.environ.setdefault("TRANSFORMERS_CACHE", "/tmp/.cache")
+os.environ.setdefault("LLAMA_INDEX_CACHE_DIR", "/tmp/.cache")
+os.makedirs("/tmp/.cache", exist_ok=True)
+
+
+def get_rag_context(query: str, top_k: Optional[int] = None) -> str:
+    """Return concatenated top-k chunks from the local FAISS store for a query.
+
+    Reads configuration from environment variables and optional .env files.
+    """
+    # Load env without overriding already-set variables
+    load_dotenv(dotenv_path=".env", override=False)
+    load_dotenv(dotenv_path="/app/.env", override=False)
+
+    db_path = os.getenv("RAG_DB_PATH", "/rag")
+    index_id = os.getenv("RAG_PRODUCT_INDEX", "vector_db_index")
+    embed_model_path = os.getenv(
+        "EMBEDDING_MODEL_PATH", "sentence-transformers/all-mpnet-base-v2"
+    )
+    k = int(os.getenv("RAG_TOP_K", str(top_k if top_k is not None else 5)))
+
+    os.environ.setdefault("TRANSFORMERS_CACHE", embed_model_path)
+    os.environ.setdefault("TRANSFORMERS_OFFLINE", "0")
+
+    Settings.embed_model = HuggingFaceEmbedding(model_name=embed_model_path)
+    Settings.llm = resolve_llm(None)
+
+    storage_context = StorageContext.from_defaults(
+        vector_store=FaissVectorStore.from_persist_dir(db_path), persist_dir=db_path
+    )
+    vector_index = load_index_from_storage(
+        storage_context=storage_context, index_id=index_id
+    )
+
+    retriever = vector_index.as_retriever(similarity_top_k=k)
+    nodes = retriever.retrieve(query)
+
+    seen_texts = set()
+    formatted_chunks = []
+    for i, node in enumerate(nodes, 1):
+        text = node.get_text().strip()
+        if text not in seen_texts:
+            seen_texts.add(text)
+            formatted_chunks.append(f"--- Chunk {i} ---\n{text}\n")
+
+    return "\n".join(formatted_chunks)
@@ -3,6 +3,7 @@
 import sys
 import time
 import re
+import os
 
 from slack_sdk import WebClient
 from slack_sdk.errors import SlackApiError
@@ -25,11 +26,13 @@
     classify_failure_type,
     build_summary_sections,
 )
-from bugzooka.core.utils import extract_job_details
+from bugzooka.analysis.prompts import RAG_AWARE_PROMPT
 from bugzooka.integrations.inference import (
     InferenceAPIUnavailableError,
     AgentAnalysisLimitExceededError,
+    ask_inference_api,
 )
+from bugzooka.integrations.rag_client_util import get_rag_context
 from bugzooka.core.utils import (
     to_job_history_url,
     fetch_job_history_stats,
@@ -164,6 +167,7 @@ def _chunk_text(self, text: str, limit: int = 11900) -> List[str]:
             start = split_at
 
         return chunks
+
     def _handle_job_history(
         self,
         thread_ts: str,
@@ -487,6 +491,14 @@ def _summarize_messages_in_range(
             version_type_messages,
         )
 
+    def _is_rag_enabled(self) -> bool:
+        """Check if RAG data exists under /rag."""
+        rag_dir = os.getenv("RAG_DB_PATH", "/rag")
+        if not os.path.isdir(rag_dir):
+            return False
+        # Check for expected RAG artifacts (JSON index/store files)
+        return any(f.name.endswith(".json") for f in os.scandir(rag_dir))
+
     def _process_message(
         self, msg, product, ci_system, product_config, enable_inference
     ):
@@ -556,8 +568,46 @@ def _process_message(
                 error_summary, product, product_config
             )
 
-            # Send final analysis
-            self._send_analysis_result(analysis_response, ts)
+            # Optionally augment with RAG-aware prompt when RAG_IMAGE is set
+            combined_response = analysis_response
+            try:
+                if self._is_rag_enabled():
+                    self.logger.info(
+                        "RAG data detected — augmenting analysis with RAG context."
+                    )
+                    rag_top_k = int(os.getenv("RAG_TOP_K", "3"))
+                    rag_query = f"Provide context relevant to the following errors:\n{error_summary}"
+                    rag_context = get_rag_context(rag_query, top_k=rag_top_k)
+                    if rag_context:
+                        rag_user = RAG_AWARE_PROMPT["user"].format(
+                            rag_context=rag_context,
+                            error_list=error_summary,
+                        )
+                        rag_messages = [
+                            {"role": "system", "content": RAG_AWARE_PROMPT["system"]},
+                            {"role": "user", "content": rag_user},
+                            {
+                                "role": "assistant",
+                                "content": RAG_AWARE_PROMPT["assistant"],
+                            },
+                        ]
+                        rag_resp = ask_inference_api(
+                            messages=rag_messages,
+                            url=product_config["endpoint"][product],
+                            api_token=product_config["token"][product],
+                            model=product_config["model"][product],
+                        )
+                        combined_response = (
+                            f"{analysis_response}\n\n"
+                            f"💡 **RAG-Informed Insights:**\n{rag_resp}"
+                        )
+                else:
+                    self.logger.info("No RAG data found — skipping RAG augmentation.")
+            except Exception as e:
+                self.logger.warning("RAG augmentation failed/skipped: %s", e)
+
+            # Send final analysis (possibly augmented)
+            self._send_analysis_result(combined_response, ts)
 
         except InferenceAPIUnavailableError as e:
             self.logger.warning(
 
@@ -51,11 +51,11 @@ spec:
               subPath: prompts.json
           resources:
             requests:
-              cpu: "100m"
-              memory: "128Mi"
+              cpu: "1"
+              memory: "1Gi"
             limits:
-              cpu: "500m"
-              memory: "512Mi"
+              cpu: "2"
+              memory: "2Gi"
       volumes:
         - name: prompts
           configMap:
-Original file line number
+Diff line change
 *.tmp
 *.temp
 .tmp/
 -.temp/
 +.temp/