[DERCBOT-1609] Structuring the LLM response

assouktim · assouktim · commit 6004fc4bd361 · 2025-09-15T11:16:00.000+02:00
diff --git a/bot/admin/web/src/app/rag/rag-settings/models/engines-configurations.ts b/bot/admin/web/src/app/rag/rag-settings/models/engines-configurations.ts
@@ -27,35 +27,102 @@ import {
   PromptDefinitionFormatter
 } from '../../../shared/model/ai-settings';
 
-export const QuestionCondensingDefaultPrompt: string = `Given a chat history and the latest user question which might reference context in the chat history, formulate a standalone question which can be understood without the chat history. Do NOT answer the question, just reformulate it if needed and otherwise return it as is.`;
+export const QuestionCondensingDefaultPrompt: string = `You are a helpful assistant that reformulates questions.
 
-export const QuestionAnsweringDefaultPrompt: string = `# TOCK (The Open Conversation Kit) chatbot
-
-## General context
-
-You are a chatbot designed to provide short conversational messages in response to user queries.
-
-## Guidelines
-
-Incorporate any relevant details from the provided context into your answers, ensuring they are directly related to the user's query.
+You are given:
+- The conversation history between the user and the assistant
+- The most recent user question
 
-## Style and format
+Your task:
+- Reformulate the user’s latest question into a clear, standalone query.
+- Incorporate relevant context from the conversation history.
+- Do NOT answer the question.
+- If the history does not provide additional context, keep the question as is.
 
-Your tone is empathetic, informative and polite.
+Return only the reformulated question.`;
 
-## Additional instructions
-
-Use the following pieces of retrieved context to answer the question.
-If you dont know the answer, answer (exactly) with "{{no_answer}}".
-Answer in {{locale}}.
-
-## Context
-
-{{context}}
-
-## Question
+export const QuestionAnsweringDefaultPrompt: string = `# TOCK (The Open Conversation Kit) chatbot
 
-{{question}}
+## Instructions:
+You must answer STRICTLY in valid JSON format (no extra text, no explanations).
+Use only the following context and the rules below to answer the question.
+
+### Rules for JSON output:
+
+- If the answer is found in the context:
+  - "status": "found_in_context"
+
+- If the answer is NOT found in the context:
+  - "status": "not_found_in_context"
+  - "answer":
+    - The "answer" must not be a generic refusal. Instead, generate a helpful and intelligent response:
+        - If a similar or related element exists in the context (e.g., another product, service, or regulation with a close name, date, or wording), suggest it naturally in the answer.
+        - If no similar element exists, politely acknowledge the lack of information while encouraging clarification or rephrasing.
+    - Always ensure the response is phrased in a natural and user-friendly way, rather than a dry "not found in context".
+
+- If the question matches a special case defined below:
+  - "status": "<the corresponding case code>"
+
+And for all cases (MANDATORY):
+  - "answer": "<the best possible answer in {{ locale }}>"
+  - "topic": "<exactly ONE topic chosen STRICTLY from the predefined list below. If no exact match is possible, set 'unknown'>"
+  - "suggested_topics": ["<zero or more free-form suggestions if topic is unknown>"]
+
+Exception: If the question is small talk (only to conversational rituals such as greetings (e.g., “hello”, “hi”) and farewells or leave-takings (e.g., “goodbye”, “see you”) ), you may ignore the context and generate a natural small-talk response in the "answer". In this case:
+  - "status": "small_talk"
+  - "topic": "<e.g., greetings>"
+  - "suggested_topics": []
+  - "context": []
+
+### Context tracing requirements (MANDATORY):
+- You MUST include **every** chunk from the input context in the "context" array, in the same order they appear. **No chunk may be omitted**.
+- If explicit chunk identifiers are present in the context, use them; otherwise assign sequential numbers starting at 1.
+- For each chunk object:
+  - "chunk": "<chunk_identifier_or_sequential_number>"
+  - "sentences": ["<verbatim sentence(s) from this chunk used to answer the question>"] — leave empty \`[]\` if none.
+  - "reason": null if the chunk contributed; otherwise a concise explanation of why this chunk is not relevant to the question (e.g., "general background only", "different product", "no data for the asked period", etc.).
+- If there are zero chunks in the context, return \`"context": []\`.
+
+### Predefined list of topics (use EXACT spelling, no variations):
+
+## Context:
+{{ context }}
+
+## Conversation history
+{{ chat_history }}
+
+## User question
+{{ question }}
+
+## Output format (JSON only):
+Return your response in the following format:
+
+{
+  "status": "found_on_context" | "not_in_context" | "small_talk",
+  "answer": "TEXTUAL_ANSWER",
+  "topic": "EXACT_TOPIC_FROM_LIST_OR_UNKNOWN",
+  "suggested_topics": [
+    "SUGGESTED_TOPIC_1",
+    "SUGGESTED_TOPIC_2"
+  ],
+  "context": [
+    {
+      "chunk": "1",
+      "sentences": ["SENTENCE_1", "SENTENCE_2"],
+      "reason": null
+    },
+    {
+      "chunk": "2",
+      "sentences": [],
+      "reason": "General description; no details related to the question."
+    },
+    {
+      "chunk": "3",
+      "sentences": ["SENTENCE_X"],
+      "reason": null
+    }
+  ]
+}
 `;
 
 export const QuestionCondensing_prompt: ProvidersConfigurationParam[] = [
diff --git a/bot/engine/src/main/kotlin/admin/bot/rag/BotRAGConfiguration.kt b/bot/engine/src/main/kotlin/admin/bot/rag/BotRAGConfiguration.kt
@@ -36,6 +36,7 @@ data class BotRAGConfiguration(
     val llmSetting: LLMSetting? = null,
     val emSetting: EMSetting,
     val indexSessionId: String? = null,
+    @Deprecated("Replaced by LLM answer status")
     val noAnswerSentence: String,
     val noAnswerStoryId: String? = null,
     val documentsRequired: Boolean = true,
diff --git a/bot/engine/src/main/kotlin/engine/config/RAGAnswerHandler.kt b/bot/engine/src/main/kotlin/engine/config/RAGAnswerHandler.kt
@@ -31,9 +31,9 @@ import ai.tock.bot.engine.action.SendSentenceWithFootnotes
 import ai.tock.bot.engine.dialog.Dialog
 import ai.tock.bot.engine.user.PlayerType
 import ai.tock.genai.orchestratorclient.requests.*
+import ai.tock.genai.orchestratorclient.responses.LLMAnswer
 import ai.tock.genai.orchestratorclient.responses.ObservabilityInfo
 import ai.tock.genai.orchestratorclient.responses.RAGResponse
-import ai.tock.genai.orchestratorclient.responses.TextWithFootnotes
 import ai.tock.genai.orchestratorclient.retrofit.GenAIOrchestratorBusinessError
 import ai.tock.genai.orchestratorclient.retrofit.GenAIOrchestratorValidationError
 import ai.tock.genai.orchestratorclient.services.RAGService
@@ -60,7 +60,7 @@ object RAGAnswerHandler : AbstractProactiveAnswerHandler {
             BotRepository.saveMetric(createMetric(MetricType.STORY_HANDLED))
 
             // Call RAG Api - Gen AI Orchestrator
-            val (answer, debug, noAnswerStory, observabilityInfo) = rag(this)
+            val (answer, footnotes, debug, noAnswerStory, observabilityInfo) = rag(this)
 
             // Add debug data if available and if debugging is enabled
             if (debug != null) {
@@ -75,14 +75,18 @@ object RAGAnswerHandler : AbstractProactiveAnswerHandler {
                 val modifiedObservabilityInfo = observabilityInfo?.let { updateObservabilityInfo(this, it) }
 
                 send(
-                    SendSentenceWithFootnotes(
-                        botId, connectorId, userId, text = answer.text, footnotes = answer.footnotes.map {
+                    action = SendSentenceWithFootnotes(
+                        playerId = botId,
+                        applicationId = connectorId,
+                        recipientId = userId,
+                        text = answer.answer,
+                        footnotes = footnotes?.map {
                             Footnote(
                                 it.identifier, it.title, it.url,
                                 if(action.metadata.sourceWithContent) it.content else null,
                                 it.score
                             )
-                        }.toMutableList(),
+                        }?.toMutableList() ?: mutableListOf<Footnote>(),
                         // modifiedObservabilityInfo includes the public langfuse URL if filled.
                         metadata = ActionMetadata(isGenAiRagAnswer = true, observabilityInfo = modifiedObservabilityInfo)
                     )
@@ -116,13 +120,13 @@ object RAGAnswerHandler : AbstractProactiveAnswerHandler {
     private fun ragStoryRedirection(botBus: BotBus, response: RAGResponse?): StoryDefinition? {
         return with(botBus) {
             botDefinition.ragConfiguration?.let { ragConfig ->
-                if (response?.answer?.text.equals(ragConfig.noAnswerSentence, ignoreCase = true)) {
+                if (response?.answer?.status.equals("not_found_in_context", ignoreCase = true)) {
                     // Save no answer metric
                     saveRagMetric(IndicatorValues.NO_ANSWER)
 
                     // Switch to no answer story if configured
                     if (!ragConfig.noAnswerStoryId.isNullOrBlank()) {
-                        logger.info { "The RAG response is equal to the configured no-answer sentence, so switch to the no-answer story." }
+                        logger.info { "Switch to the no-answer RAG story." }
                         getNoAnswerRAGStory(ragConfig)
                     } else null
                 } else {
@@ -221,7 +225,7 @@ object RAGAnswerHandler : AbstractProactiveAnswerHandler {
                 )
 
                 // Handle RAG response
-                return RAGResult(response?.answer, response?.debug, ragStoryRedirection(this, response), response?.observabilityInfo)
+                return RAGResult(response?.answer, response?.footnotes, response?.debug, ragStoryRedirection(this, response), response?.observabilityInfo)
             } catch (exc: Exception) {
                 logger.error { exc }
                 // Save failure metric
@@ -232,7 +236,7 @@ object RAGAnswerHandler : AbstractProactiveAnswerHandler {
                     RAGResult(noAnswerStory = getNoAnswerRAGStory(ragConfiguration))
                 }
                 else RAGResult(
-                    answer = TextWithFootnotes(text = technicalErrorMessage),
+                    answer = LLMAnswer(status="error", answer = technicalErrorMessage),
                     debug = when(exc) {
                         is GenAIOrchestratorBusinessError -> RAGError(exc.message, exc.error)
                         is GenAIOrchestratorValidationError -> RAGError(exc.message, exc.detail)
@@ -282,7 +286,8 @@ object RAGAnswerHandler : AbstractProactiveAnswerHandler {
  * Aggregation of RAG answer, debug and the no answer Story.
  */
 data class RAGResult(
-    val answer: TextWithFootnotes? = null,
+    val answer: LLMAnswer? = null,
+    val footnotes: List<ai.tock.genai.orchestratorclient.responses.Footnote>? = null,
     val debug: Any? = null,
     val noAnswerStory: StoryDefinition? = null,
     val observabilityInfo: ObservabilityInfo? = null,
diff --git a/gen-ai/orchestrator-client/src/main/kotlin/ai/tock/genai/orchestratorclient/responses/RAGResponse.kt b/gen-ai/orchestrator-client/src/main/kotlin/ai/tock/genai/orchestratorclient/responses/RAGResponse.kt
@@ -17,7 +17,8 @@
 package ai.tock.genai.orchestratorclient.responses
 
 data class RAGResponse(
-    val answer: TextWithFootnotes,
+    val answer: LLMAnswer,
+    val footnotes: List<Footnote> = emptyList(),
     val debug: Any? = null,
     val observabilityInfo: ObservabilityInfo? = null,
 )
diff --git a/gen-ai/orchestrator-client/src/main/kotlin/ai/tock/genai/orchestratorclient/responses/models.kt b/gen-ai/orchestrator-client/src/main/kotlin/ai/tock/genai/orchestratorclient/responses/models.kt
@@ -17,9 +17,18 @@
 package ai.tock.genai.orchestratorclient.responses
 
 
-data class TextWithFootnotes(
-    val text: String,
-    val footnotes: List<Footnote> = emptyList(),
+data class ChunkSentences(
+    val chunk: String? = null,
+    val sentences: List<String>? = emptyList(),
+    val reason: String? = null,
+)
+
+data class LLMAnswer(
+    val status: String,
+    val answer: String,
+    val topic: String? = null,
+    val suggestedTopics: List<String>? = null,
+    val context: List<ChunkSentences>? = null,
 )
 
 data class Footnote(
diff --git a/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/llm/OllamaLLMSetting.kt b/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/llm/OllamaLLMSetting.kt
@@ -28,4 +28,3 @@ data class OllamaLLMSetting<T>(
     }
 }
 
-// TODO MASS : Check Compile + TU (car dernier commit)
diff --git a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/models/rag/rag_models.py b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/models/rag/rag_models.py
@@ -52,16 +52,55 @@ class Footnote(Source):
 
     identifier: str = Field(description='Footnote identifier', examples=['1'])
 
+class ChunkInfos(BaseModel):
+    """A model representing information about a chunk used in the RAG context."""
 
-class TextWithFootnotes(BaseModel):
-    """Text with its footnotes. Used for RAG response"""
-
-    text: str = Field(
-        description='Text with footnotes used to list outside sources',
-        examples=['This is page content [1], and this is more content [2]'],
+    chunk: Optional[str] = Field(
+        description='Unique identifier of the chunk.',
+        examples=['cd6d8221-ba9f-44da-86ee-0e25a3c9a5c7'],
+        default=None
+    )
+    sentences: Optional[List[str]] = Field(
+        description='List of verbatim sentences from the chunk that were used by the LLM.',
+        default=None
     )
-    footnotes: set[Footnote] = Field(description='Set of footnotes')
+    reason: Optional[str] = Field(
+        description='Reason why the chunk was not used (e.g., irrelevant, general background).',
+        default=None
+    )
+
+
+class LLMAnswer(BaseModel):
+    """
+    A model representing the structured answer generated by the LLM
+    in response to a user query, based on the provided RAG context.
+    """
 
+    status: Optional[str] = Field(
+        description="The status of the answer generation. "
+                    "Possible values: 'found_in_context', 'not_found_in_context', 'small_talk', "
+                    "or other case-specific codes.",
+        default=None
+    )
+    answer: Optional[str] = Field(
+        description="The textual answer generated by the LLM, in the user's locale.",
+        default=None
+    )
+    topic: Optional[str] = Field(
+        description="The main topic assigned to the answer. Must be one of the predefined list "
+                    "of topics, or 'unknown' if no match is possible.",
+        default=None
+    )
+    suggested_topics: Optional[List[str]] = Field(
+        description="A list of suggested alternative or related topics, "
+                    "used when the main topic is 'unknown'.",
+        default=None
+    )
+    context: Optional[List[ChunkInfos]] = Field(
+        description="The list of chunks from the context that contributed to or were considered "
+                    "in the LLM's answer. Each entry contains identifiers, sentences, and reasons.",
+        default=None
+    )
 
 @unique
 class ChatMessageType(str, Enum):
@@ -154,4 +193,4 @@ class RAGDebugData(QADebugData):
             'Question: Hello, how to plan a trip to Morocco ?. Answer in French.'
         ],
     )
-    answer: str = Field(description='The RAG answer.')
+    answer: LLMAnswer = Field(description='The RAG answer.')
diff --git a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/routers/responses/responses.py b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/routers/responses/responses.py
@@ -25,9 +25,8 @@
     ErrorInfo,
 )
 from gen_ai_orchestrator.models.llm.llm_provider import LLMProvider
-from gen_ai_orchestrator.models.rag.rag_models import Source, TextWithFootnotes
+from gen_ai_orchestrator.models.rag.rag_models import Source, LLMAnswer, Footnote
 from gen_ai_orchestrator.models.observability.observability_provider import ObservabilityProvider
-from gen_ai_orchestrator.models.rag.rag_models import TextWithFootnotes
 from gen_ai_orchestrator.models.vector_stores.vectore_store_provider import VectorStoreProvider
 
 
@@ -122,9 +121,10 @@ class ObservabilityInfo(BaseModel):
 class RAGResponse(BaseModel):
     """The RAG response model"""
 
-    answer: TextWithFootnotes = Field(
-        description='The RAG answer, with outside sources.'
+    answer: Optional[LLMAnswer] = Field(
+        description='The RAG answer'
     )
+    footnotes: set[Footnote] = Field(description='Set of footnotes')
     debug: Optional[Any] = Field(
         description='Debug data',
         examples=[{'action': 'retrieve', 'result': 'OK', 'errors': []}],
diff --git a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/rag_chain.py b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/rag_chain.py
diff --git a/gen-ai/orchestrator-server/src/main/python/server/tests/services/test_rag_chain.py b/gen-ai/orchestrator-server/src/main/python/server/tests/services/test_rag_chain.py

Original file line number	Diff line number	Diff line change
`@@ -17,7 +17,8 @@`
`17`	`17`	`package ai.tock.genai.orchestratorclient.responses`
`18`	`18`
`19`	`19`	`data class RAGResponse(`
`20`		`- val answer: TextWithFootnotes,`
	`20`	`+ val answer: LLMAnswer,`
	`21`	`+ val footnotes: List<Footnote> = emptyList(),`
`21`	`22`	`val debug: Any? = null,`
`22`	`23`	`val observabilityInfo: ObservabilityInfo? = null,`
`23`	`24`	`)`
Original file line number	Diff line number	Diff line change
`@@ -28,4 +28,3 @@ data class OllamaLLMSetting<T>(`
`28`	`28`	`}`
`29`	`29`	`}`
`30`	`30`
`31`		`-// TODO MASS : Check Compile + TU (car dernier commit)`