diff --git a/rag_from_scratch_10_and_11.ipynb b/rag_from_scratch_10_and_11.ipynb index 2de0b9a..722d4eb 100644 --- a/rag_from_scratch_10_and_11.ipynb +++ b/rag_from_scratch_10_and_11.ipynb @@ -16,17 +16,17 @@ "\n", "## Enviornment\n", "\n", - "`(1) Packages`" + "`(1) Packages`\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "id": "9bc509da-52b2-49fc-bc45-e5fd75ff5fed", "metadata": {}, "outputs": [], "source": [ - "! pip install langchain_community tiktoken langchain-openai langchainhub chromadb langchain youtube-transcript-api pytube" + "# ! pip install langchain_community tiktoken langchain-openai langchainhub chromadb langchain youtube-transcript-api pytube" ] }, { @@ -36,20 +36,20 @@ "source": [ "`(2) LangSmith`\n", "\n", - "https://docs.smith.langchain.com/" + "https://docs.smith.langchain.com/\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "id": "3bc82800-5498-40be-86db-e1f6df8c86da", "metadata": {}, "outputs": [], "source": [ "import os\n", - "os.environ['LANGCHAIN_TRACING_V2'] = 'true'\n", - "os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'\n", - "os.environ['LANGCHAIN_API_KEY'] = " + "# os.environ['LANGCHAIN_TRACING_V2'] = 'true'\n", + "# os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'\n", + "# os.environ['LANGCHAIN_API_KEY'] = " ] }, { @@ -57,17 +57,28 @@ "id": "9acfd3be-d9d3-42f6-a13d-b936bb8cd6f4", "metadata": {}, "source": [ - "`(3) API Keys`" + "`(3) API Keys`\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "id": "78e8ecbd-7ba1-4a2d-8ad5-4ce59e164d23", "metadata": {}, "outputs": [], "source": [ - "os.environ['OPENAI_API_KEY'] = " + "os.environ['GOOGLE_API_KEY'] = \n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "31e47f0f", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_cohere.chat_models import ChatCohere\n", + "cohere_llm = ChatCohere(cohere_api_key=\"\")\n" ] }, { @@ -80,40 +91,32 @@ "id": "45fd9558-c53f-4a6c-80f0-c31b3bfd55de", "metadata": {}, "source": [ - "## Part 10: Logical and Semantic routing \n", + "## Part 10: Logical and Semantic routing\n", "\n", "Use function-calling for classification.\n", "\n", - "Flow: \n", + "Flow:\n", "\n", "![Screenshot 2024-03-15 at 3.29.30 PM.png](attachment:b6699c4f-6188-4e0e-8ba4-21582dbca9ef.png)\n", "\n", "Docs:\n", "\n", - "https://python.langchain.com/docs/use_cases/query_analysis/techniques/routing#routing-to-multiple-indexes" + "https://python.langchain.com/docs/use_cases/query_analysis/techniques/routing#routing-to-multiple-indexes\n" ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 19, "id": "04c2cf60-d636-4992-a021-1236f7688999", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/rlm/miniforge3/envs/llama2/lib/python3.11/site-packages/langchain_core/_api/beta_decorator.py:86: LangChainBetaWarning: The function `with_structured_output` is in beta. It is actively being worked on, so the API may change.\n", - " warn_beta(\n" - ] - } - ], + "outputs": [], "source": [ "from typing import Literal\n", "\n", "from langchain_core.prompts import ChatPromptTemplate\n", "from langchain_core.pydantic_v1 import BaseModel, Field\n", - "from langchain_openai import ChatOpenAI\n", + "from langchain_google_genai import ChatGoogleGenerativeAI\n", + "\n", "\n", "# Data model\n", "class RouteQuery(BaseModel):\n", @@ -125,13 +128,14 @@ " )\n", "\n", "# LLM with function call \n", - "llm = ChatOpenAI(model=\"gpt-3.5-turbo-0125\", temperature=0)\n", - "structured_llm = llm.with_structured_output(RouteQuery)\n", + "llm = ChatGoogleGenerativeAI(model=\"gemini-pro\",\n", + " temperature=0.7, top_p=0.85)\n", + "structured_llm = cohere_llm.with_structured_output(RouteQuery)\n", "\n", "# Prompt \n", "system = \"\"\"You are an expert at routing a user question to the appropriate data source.\n", "\n", - "Based on the programming language the question is referring to, route it to the relevant data source.\"\"\"\n", + "Based on the programming language the question is referring to, route it to the relevant data source. if the question is related to python give 'python_docs', if question is related to js give 'js_docs' else if it is related to golang 'golang_docs'\"\"\"\n", "\n", "prompt = ChatPromptTemplate.from_messages(\n", " [\n", @@ -156,12 +160,12 @@ "source": [ "Note: we used function calling to produce structured output.\n", "\n", - "![Screenshot 2024-03-16 at 12.38.23 PM.png](attachment:1c7e2e9e-e85f-490f-9591-883a4070bdb2.png)" + "![Screenshot 2024-03-16 at 12.38.23 PM.png](attachment:1c7e2e9e-e85f-490f-9591-883a4070bdb2.png)\n" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 20, "id": "cfc6febc-93df-49b4-9920-c93589ba021e", "metadata": {}, "outputs": [], @@ -179,40 +183,30 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 21, "id": "277536df-0904-4d99-92bb-652621afbdec", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "RouteQuery(datasource='python_docs')" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "result" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 23, "id": "636a43ae-50f3-43a1-a1b7-93266ea13bcd", "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "'python_docs'" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" + "ename": "AttributeError", + "evalue": "'NoneType' object has no attribute 'datasource'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[23], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[43mresult\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdatasource\u001b[49m\n", + "\u001b[1;31mAttributeError\u001b[0m: 'NoneType' object has no attribute 'datasource'" + ] } ], "source": [ @@ -226,12 +220,12 @@ "source": [ "Once we have this, it is trivial to define a branch that uses `result.datasource`\n", "\n", - "https://python.langchain.com/docs/expression_language/how_to/routing" + "https://python.langchain.com/docs/expression_language/how_to/routing\n" ] }, { "cell_type": "code", - "execution_count": 39, + "execution_count": null, "id": "01f15722-35c6-4456-ad1b-06463233db25", "metadata": {}, "outputs": [], @@ -254,7 +248,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": null, "id": "6af07b77-0537-4635-87ec-ad8f59d34e9b", "metadata": {}, "outputs": [ @@ -280,7 +274,7 @@ "source": [ "Trace:\n", "\n", - "https://smith.langchain.com/public/c2ca61b4-3810-45d0-a156-3d6a73e9ee2a/r" + "https://smith.langchain.com/public/c2ca61b4-3810-45d0-a156-3d6a73e9ee2a/r\n" ] }, { @@ -301,21 +295,663 @@ "\n", "Docs:\n", "\n", - "https://python.langchain.com/docs/expression_language/cookbook/embedding_router" + "https://python.langchain.com/docs/expression_language/cookbook/embedding_router\n" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "53cbfa72-c35a-4d1d-aa6d-08a570ab2170", "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "--- Logging error ---\n", + "Traceback (most recent call last):\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\callbacks\\manager.py\", line 1998, in _configure\n", + " handler = LangChainTracer(\n", + " ^^^^^^^^^^^^^^^^\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\tracers\\langchain.py\", line 91, in __init__\n", + " self.client = client or get_client()\n", + " ^^^^^^^^^^^^\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\tracers\\langchain.py\", line 54, in get_client\n", + " _CLIENT = Client()\n", + " ^^^^^^^^\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langsmith\\client.py\", line 534, in __init__\n", + " _validate_api_key_if_hosted(self.api_url, self.api_key)\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langsmith\\client.py\", line 323, in _validate_api_key_if_hosted\n", + " raise ls_utils.LangSmithUserError(\n", + "langsmith.utils.LangSmithUserError: API key must be provided when using hosted LangSmith API\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\logging\\__init__.py\", line 1110, in emit\n", + " msg = self.format(record)\n", + " ^^^^^^^^^^^^^^^^^^^\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\logging\\__init__.py\", line 953, in format\n", + " return fmt.format(record)\n", + " ^^^^^^^^^^^^^^^^^^\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\logging\\__init__.py\", line 687, in format\n", + " record.message = record.getMessage()\n", + " ^^^^^^^^^^^^^^^^^^^\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\logging\\__init__.py\", line 377, in getMessage\n", + " msg = msg % self.args\n", + " ~~~~^~~~~~~~~~~\n", + "TypeError: not all arguments converted during string formatting\n", + "Call stack:\n", + " File \"\", line 198, in _run_module_as_main\n", + " File \"\", line 88, in _run_code\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\ipykernel_launcher.py\", line 17, in \n", + " app.launch_new_instance()\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\traitlets\\config\\application.py\", line 1041, in launch_instance\n", + " app.start()\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\ipykernel\\kernelapp.py\", line 711, in start\n", + " self.io_loop.start()\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\tornado\\platform\\asyncio.py\", line 215, in start\n", + " self.asyncio_loop.run_forever()\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\asyncio\\base_events.py\", line 607, in run_forever\n", + " self._run_once()\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\asyncio\\base_events.py\", line 1922, in _run_once\n", + " handle._run()\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\asyncio\\events.py\", line 80, in _run\n", + " self._context.run(self._callback, *self._args)\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\ipykernel\\kernelbase.py\", line 510, in dispatch_queue\n", + " await self.process_one()\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\ipykernel\\kernelbase.py\", line 499, in process_one\n", + " await dispatch(*args)\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\ipykernel\\kernelbase.py\", line 406, in dispatch_shell\n", + " await result\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\ipykernel\\kernelbase.py\", line 729, in execute_request\n", + " reply_content = await reply_content\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\ipykernel\\ipkernel.py\", line 411, in do_execute\n", + " res = shell.run_cell(\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\ipykernel\\zmqshell.py\", line 530, in run_cell\n", + " return super().run_cell(*args, **kwargs)\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\IPython\\core\\interactiveshell.py\", line 2945, in run_cell\n", + " result = self._run_cell(\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\IPython\\core\\interactiveshell.py\", line 3000, in _run_cell\n", + " return runner(coro)\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\IPython\\core\\async_helpers.py\", line 129, in _pseudo_sync_runner\n", + " coro.send(None)\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\IPython\\core\\interactiveshell.py\", line 3203, in run_cell_async\n", + " has_raised = await self.run_ast_nodes(code_ast.body, cell_name,\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\IPython\\core\\interactiveshell.py\", line 3382, in run_ast_nodes\n", + " if await self.run_code(code, result, async_=asy):\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\IPython\\core\\interactiveshell.py\", line 3442, in run_code\n", + " exec(code_obj, self.user_global_ns, self.user_ns)\n", + " File \"C:\\Users\\Acer\\AppData\\Local\\Temp\\ipykernel_30632\\4060515341.py\", line 47, in \n", + " print(chain.invoke(\"What's a black hole\"))\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\runnables\\base.py\", line 2487, in invoke\n", + " callback_manager = get_callback_manager_for_config(config)\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\runnables\\config.py\", line 402, in get_callback_manager_for_config\n", + " return CallbackManager.configure(\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\callbacks\\manager.py\", line 1450, in configure\n", + " return _configure(\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\callbacks\\manager.py\", line 2004, in _configure\n", + " logger.warning(\n", + "Message: 'Unable to load requested LangChainTracer. To disable this warning, unset the LANGCHAIN_TRACING_V2 environment variables.'\n", + "Arguments: (\"LangSmithUserError('API key must be provided when using hosted LangSmith API')\",)\n", + "--- Logging error ---\n", + "Traceback (most recent call last):\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\callbacks\\manager.py\", line 1998, in _configure\n", + " handler = LangChainTracer(\n", + " ^^^^^^^^^^^^^^^^\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\tracers\\langchain.py\", line 91, in __init__\n", + " self.client = client or get_client()\n", + " ^^^^^^^^^^^^\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\tracers\\langchain.py\", line 54, in get_client\n", + " _CLIENT = Client()\n", + " ^^^^^^^^\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langsmith\\client.py\", line 534, in __init__\n", + " _validate_api_key_if_hosted(self.api_url, self.api_key)\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langsmith\\client.py\", line 323, in _validate_api_key_if_hosted\n", + " raise ls_utils.LangSmithUserError(\n", + "langsmith.utils.LangSmithUserError: API key must be provided when using hosted LangSmith API\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\logging\\__init__.py\", line 1110, in emit\n", + " msg = self.format(record)\n", + " ^^^^^^^^^^^^^^^^^^^\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\logging\\__init__.py\", line 953, in format\n", + " return fmt.format(record)\n", + " ^^^^^^^^^^^^^^^^^^\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\logging\\__init__.py\", line 687, in format\n", + " record.message = record.getMessage()\n", + " ^^^^^^^^^^^^^^^^^^^\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\logging\\__init__.py\", line 377, in getMessage\n", + " msg = msg % self.args\n", + " ~~~~^~~~~~~~~~~\n", + "TypeError: not all arguments converted during string formatting\n", + "Call stack:\n", + " File \"\", line 198, in _run_module_as_main\n", + " File \"\", line 88, in _run_code\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\ipykernel_launcher.py\", line 17, in \n", + " app.launch_new_instance()\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\traitlets\\config\\application.py\", line 1041, in launch_instance\n", + " app.start()\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\ipykernel\\kernelapp.py\", line 711, in start\n", + " self.io_loop.start()\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\tornado\\platform\\asyncio.py\", line 215, in start\n", + " self.asyncio_loop.run_forever()\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\asyncio\\base_events.py\", line 607, in run_forever\n", + " self._run_once()\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\asyncio\\base_events.py\", line 1922, in _run_once\n", + " handle._run()\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\asyncio\\events.py\", line 80, in _run\n", + " self._context.run(self._callback, *self._args)\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\ipykernel\\kernelbase.py\", line 510, in dispatch_queue\n", + " await self.process_one()\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\ipykernel\\kernelbase.py\", line 499, in process_one\n", + " await dispatch(*args)\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\ipykernel\\kernelbase.py\", line 406, in dispatch_shell\n", + " await result\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\ipykernel\\kernelbase.py\", line 729, in execute_request\n", + " reply_content = await reply_content\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\ipykernel\\ipkernel.py\", line 411, in do_execute\n", + " res = shell.run_cell(\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\ipykernel\\zmqshell.py\", line 530, in run_cell\n", + " return super().run_cell(*args, **kwargs)\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\IPython\\core\\interactiveshell.py\", line 2945, in run_cell\n", + " result = self._run_cell(\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\IPython\\core\\interactiveshell.py\", line 3000, in _run_cell\n", + " return runner(coro)\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\IPython\\core\\async_helpers.py\", line 129, in _pseudo_sync_runner\n", + " coro.send(None)\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\IPython\\core\\interactiveshell.py\", line 3203, in run_cell_async\n", + " has_raised = await self.run_ast_nodes(code_ast.body, cell_name,\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\IPython\\core\\interactiveshell.py\", line 3382, in run_ast_nodes\n", + " if await self.run_code(code, result, async_=asy):\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\IPython\\core\\interactiveshell.py\", line 3442, in run_code\n", + " exec(code_obj, self.user_global_ns, self.user_ns)\n", + " File \"C:\\Users\\Acer\\AppData\\Local\\Temp\\ipykernel_30632\\4060515341.py\", line 47, in \n", + " print(chain.invoke(\"What's a black hole\"))\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\runnables\\base.py\", line 2499, in invoke\n", + " input = step.invoke(\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\runnables\\base.py\", line 3108, in invoke\n", + " callback_manager = CallbackManager.configure(\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\callbacks\\manager.py\", line 1450, in configure\n", + " return _configure(\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\callbacks\\manager.py\", line 2004, in _configure\n", + " logger.warning(\n", + "Message: 'Unable to load requested LangChainTracer. To disable this warning, unset the LANGCHAIN_TRACING_V2 environment variables.'\n", + "Arguments: (\"LangSmithUserError('API key must be provided when using hosted LangSmith API')\",)\n", + "--- Logging error ---\n", + "Traceback (most recent call last):\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\callbacks\\manager.py\", line 1998, in _configure\n", + " handler = LangChainTracer(\n", + " ^^^^^^^^^^^^^^^^\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\tracers\\langchain.py\", line 91, in __init__\n", + " self.client = client or get_client()\n", + " ^^^^^^^^^^^^\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\tracers\\langchain.py\", line 54, in get_client\n", + " _CLIENT = Client()\n", + " ^^^^^^^^\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langsmith\\client.py\", line 534, in __init__\n", + " _validate_api_key_if_hosted(self.api_url, self.api_key)\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langsmith\\client.py\", line 323, in _validate_api_key_if_hosted\n", + " raise ls_utils.LangSmithUserError(\n", + "langsmith.utils.LangSmithUserError: API key must be provided when using hosted LangSmith API\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\logging\\__init__.py\", line 1110, in emit\n", + " msg = self.format(record)\n", + " ^^^^^^^^^^^^^^^^^^^\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\logging\\__init__.py\", line 953, in format\n", + " return fmt.format(record)\n", + " ^^^^^^^^^^^^^^^^^^\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\logging\\__init__.py\", line 687, in format\n", + " record.message = record.getMessage()\n", + " ^^^^^^^^^^^^^^^^^^^\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\logging\\__init__.py\", line 377, in getMessage\n", + " msg = msg % self.args\n", + " ~~~~^~~~~~~~~~~\n", + "TypeError: not all arguments converted during string formatting\n", + "Call stack:\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\threading.py\", line 995, in _bootstrap\n", + " self._bootstrap_inner()\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\threading.py\", line 1038, in _bootstrap_inner\n", + " self.run()\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\threading.py\", line 975, in run\n", + " self._target(*self._args, **self._kwargs)\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\concurrent\\futures\\thread.py\", line 83, in _worker\n", + " work_item.run()\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\concurrent\\futures\\thread.py\", line 58, in run\n", + " result = self.fn(*self.args, **self.kwargs)\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\runnables\\passthrough.py\", line 217, in invoke\n", + " return self._call_with_config(identity, input, config)\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\runnables\\base.py\", line 1612, in _call_with_config\n", + " callback_manager = get_callback_manager_for_config(config)\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\runnables\\config.py\", line 402, in get_callback_manager_for_config\n", + " return CallbackManager.configure(\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\callbacks\\manager.py\", line 1450, in configure\n", + " return _configure(\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\callbacks\\manager.py\", line 2004, in _configure\n", + " logger.warning(\n", + "Message: 'Unable to load requested LangChainTracer. To disable this warning, unset the LANGCHAIN_TRACING_V2 environment variables.'\n", + "Arguments: (\"LangSmithUserError('API key must be provided when using hosted LangSmith API')\",)\n", + "--- Logging error ---\n", + "Traceback (most recent call last):\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\callbacks\\manager.py\", line 1998, in _configure\n", + " handler = LangChainTracer(\n", + " ^^^^^^^^^^^^^^^^\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\tracers\\langchain.py\", line 91, in __init__\n", + " self.client = client or get_client()\n", + " ^^^^^^^^^^^^\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\tracers\\langchain.py\", line 54, in get_client\n", + " _CLIENT = Client()\n", + " ^^^^^^^^\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langsmith\\client.py\", line 534, in __init__\n", + " _validate_api_key_if_hosted(self.api_url, self.api_key)\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langsmith\\client.py\", line 323, in _validate_api_key_if_hosted\n", + " raise ls_utils.LangSmithUserError(\n", + "langsmith.utils.LangSmithUserError: API key must be provided when using hosted LangSmith API\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\logging\\__init__.py\", line 1110, in emit\n", + " msg = self.format(record)\n", + " ^^^^^^^^^^^^^^^^^^^\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\logging\\__init__.py\", line 953, in format\n", + " return fmt.format(record)\n", + " ^^^^^^^^^^^^^^^^^^\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\logging\\__init__.py\", line 687, in format\n", + " record.message = record.getMessage()\n", + " ^^^^^^^^^^^^^^^^^^^\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\logging\\__init__.py\", line 377, in getMessage\n", + " msg = msg % self.args\n", + " ~~~~^~~~~~~~~~~\n", + "TypeError: not all arguments converted during string formatting\n", + "Call stack:\n", + " File \"\", line 198, in _run_module_as_main\n", + " File \"\", line 88, in _run_code\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\ipykernel_launcher.py\", line 17, in \n", + " app.launch_new_instance()\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\traitlets\\config\\application.py\", line 1041, in launch_instance\n", + " app.start()\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\ipykernel\\kernelapp.py\", line 711, in start\n", + " self.io_loop.start()\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\tornado\\platform\\asyncio.py\", line 215, in start\n", + " self.asyncio_loop.run_forever()\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\asyncio\\base_events.py\", line 607, in run_forever\n", + " self._run_once()\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\asyncio\\base_events.py\", line 1922, in _run_once\n", + " handle._run()\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\asyncio\\events.py\", line 80, in _run\n", + " self._context.run(self._callback, *self._args)\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\ipykernel\\kernelbase.py\", line 510, in dispatch_queue\n", + " await self.process_one()\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\ipykernel\\kernelbase.py\", line 499, in process_one\n", + " await dispatch(*args)\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\ipykernel\\kernelbase.py\", line 406, in dispatch_shell\n", + " await result\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\ipykernel\\kernelbase.py\", line 729, in execute_request\n", + " reply_content = await reply_content\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\ipykernel\\ipkernel.py\", line 411, in do_execute\n", + " res = shell.run_cell(\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\ipykernel\\zmqshell.py\", line 530, in run_cell\n", + " return super().run_cell(*args, **kwargs)\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\IPython\\core\\interactiveshell.py\", line 2945, in run_cell\n", + " result = self._run_cell(\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\IPython\\core\\interactiveshell.py\", line 3000, in _run_cell\n", + " return runner(coro)\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\IPython\\core\\async_helpers.py\", line 129, in _pseudo_sync_runner\n", + " coro.send(None)\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\IPython\\core\\interactiveshell.py\", line 3203, in run_cell_async\n", + " has_raised = await self.run_ast_nodes(code_ast.body, cell_name,\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\IPython\\core\\interactiveshell.py\", line 3382, in run_ast_nodes\n", + " if await self.run_code(code, result, async_=asy):\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\IPython\\core\\interactiveshell.py\", line 3442, in run_code\n", + " exec(code_obj, self.user_global_ns, self.user_ns)\n", + " File \"C:\\Users\\Acer\\AppData\\Local\\Temp\\ipykernel_30632\\4060515341.py\", line 47, in \n", + " print(chain.invoke(\"What's a black hole\"))\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\runnables\\base.py\", line 2499, in invoke\n", + " input = step.invoke(\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\runnables\\base.py\", line 3963, in invoke\n", + " return self._call_with_config(\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\runnables\\base.py\", line 1612, in _call_with_config\n", + " callback_manager = get_callback_manager_for_config(config)\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\runnables\\config.py\", line 402, in get_callback_manager_for_config\n", + " return CallbackManager.configure(\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\callbacks\\manager.py\", line 1450, in configure\n", + " return _configure(\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\callbacks\\manager.py\", line 2004, in _configure\n", + " logger.warning(\n", + "Message: 'Unable to load requested LangChainTracer. To disable this warning, unset the LANGCHAIN_TRACING_V2 environment variables.'\n", + "Arguments: (\"LangSmithUserError('API key must be provided when using hosted LangSmith API')\",)\n" + ] + }, { "name": "stdout", "output_type": "stream", "text": [ - "Using PHYSICS\n", - "A black hole is a region in space where the gravitational pull is so strong that nothing, not even light, can escape from it. It is formed when a massive star collapses in on itself. The boundary surrounding a black hole is called the event horizon. Beyond the event horizon, the gravitational pull is so intense that even time and space are distorted. Black holes are some of the most mysterious and fascinating objects in the universe.\n" + "Using MATH\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "--- Logging error ---\n", + "Traceback (most recent call last):\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\callbacks\\manager.py\", line 1998, in _configure\n", + " handler = LangChainTracer(\n", + " ^^^^^^^^^^^^^^^^\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\tracers\\langchain.py\", line 91, in __init__\n", + " self.client = client or get_client()\n", + " ^^^^^^^^^^^^\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\tracers\\langchain.py\", line 54, in get_client\n", + " _CLIENT = Client()\n", + " ^^^^^^^^\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langsmith\\client.py\", line 534, in __init__\n", + " _validate_api_key_if_hosted(self.api_url, self.api_key)\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langsmith\\client.py\", line 323, in _validate_api_key_if_hosted\n", + " raise ls_utils.LangSmithUserError(\n", + "langsmith.utils.LangSmithUserError: API key must be provided when using hosted LangSmith API\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\logging\\__init__.py\", line 1110, in emit\n", + " msg = self.format(record)\n", + " ^^^^^^^^^^^^^^^^^^^\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\logging\\__init__.py\", line 953, in format\n", + " return fmt.format(record)\n", + " ^^^^^^^^^^^^^^^^^^\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\logging\\__init__.py\", line 687, in format\n", + " record.message = record.getMessage()\n", + " ^^^^^^^^^^^^^^^^^^^\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\logging\\__init__.py\", line 377, in getMessage\n", + " msg = msg % self.args\n", + " ~~~~^~~~~~~~~~~\n", + "TypeError: not all arguments converted during string formatting\n", + "Call stack:\n", + " File \"\", line 198, in _run_module_as_main\n", + " File \"\", line 88, in _run_code\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\ipykernel_launcher.py\", line 17, in \n", + " app.launch_new_instance()\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\traitlets\\config\\application.py\", line 1041, in launch_instance\n", + " app.start()\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\ipykernel\\kernelapp.py\", line 711, in start\n", + " self.io_loop.start()\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\tornado\\platform\\asyncio.py\", line 215, in start\n", + " self.asyncio_loop.run_forever()\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\asyncio\\base_events.py\", line 607, in run_forever\n", + " self._run_once()\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\asyncio\\base_events.py\", line 1922, in _run_once\n", + " handle._run()\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\asyncio\\events.py\", line 80, in _run\n", + " self._context.run(self._callback, *self._args)\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\ipykernel\\kernelbase.py\", line 510, in dispatch_queue\n", + " await self.process_one()\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\ipykernel\\kernelbase.py\", line 499, in process_one\n", + " await dispatch(*args)\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\ipykernel\\kernelbase.py\", line 406, in dispatch_shell\n", + " await result\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\ipykernel\\kernelbase.py\", line 729, in execute_request\n", + " reply_content = await reply_content\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\ipykernel\\ipkernel.py\", line 411, in do_execute\n", + " res = shell.run_cell(\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\ipykernel\\zmqshell.py\", line 530, in run_cell\n", + " return super().run_cell(*args, **kwargs)\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\IPython\\core\\interactiveshell.py\", line 2945, in run_cell\n", + " result = self._run_cell(\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\IPython\\core\\interactiveshell.py\", line 3000, in _run_cell\n", + " return runner(coro)\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\IPython\\core\\async_helpers.py\", line 129, in _pseudo_sync_runner\n", + " coro.send(None)\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\IPython\\core\\interactiveshell.py\", line 3203, in run_cell_async\n", + " has_raised = await self.run_ast_nodes(code_ast.body, cell_name,\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\IPython\\core\\interactiveshell.py\", line 3382, in run_ast_nodes\n", + " if await self.run_code(code, result, async_=asy):\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\IPython\\core\\interactiveshell.py\", line 3442, in run_code\n", + " exec(code_obj, self.user_global_ns, self.user_ns)\n", + " File \"C:\\Users\\Acer\\AppData\\Local\\Temp\\ipykernel_30632\\4060515341.py\", line 47, in \n", + " print(chain.invoke(\"What's a black hole\"))\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\runnables\\base.py\", line 2499, in invoke\n", + " input = step.invoke(\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\runnables\\base.py\", line 3963, in invoke\n", + " return self._call_with_config(\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\runnables\\base.py\", line 1626, in _call_with_config\n", + " context.run(\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\runnables\\config.py\", line 347, in call_func_with_variable_args\n", + " return func(input, **kwargs) # type: ignore[call-arg]\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\runnables\\base.py\", line 3847, in _invoke\n", + " output = output.invoke(\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\prompts\\base.py\", line 128, in invoke\n", + " return self._call_with_config(\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\runnables\\base.py\", line 1612, in _call_with_config\n", + " callback_manager = get_callback_manager_for_config(config)\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\runnables\\config.py\", line 402, in get_callback_manager_for_config\n", + " return CallbackManager.configure(\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\callbacks\\manager.py\", line 1450, in configure\n", + " return _configure(\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\callbacks\\manager.py\", line 2004, in _configure\n", + " logger.warning(\n", + "Message: 'Unable to load requested LangChainTracer. To disable this warning, unset the LANGCHAIN_TRACING_V2 environment variables.'\n", + "Arguments: (\"LangSmithUserError('API key must be provided when using hosted LangSmith API')\",)\n", + "--- Logging error ---\n", + "Traceback (most recent call last):\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\callbacks\\manager.py\", line 1998, in _configure\n", + " handler = LangChainTracer(\n", + " ^^^^^^^^^^^^^^^^\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\tracers\\langchain.py\", line 91, in __init__\n", + " self.client = client or get_client()\n", + " ^^^^^^^^^^^^\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\tracers\\langchain.py\", line 54, in get_client\n", + " _CLIENT = Client()\n", + " ^^^^^^^^\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langsmith\\client.py\", line 534, in __init__\n", + " _validate_api_key_if_hosted(self.api_url, self.api_key)\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langsmith\\client.py\", line 323, in _validate_api_key_if_hosted\n", + " raise ls_utils.LangSmithUserError(\n", + "langsmith.utils.LangSmithUserError: API key must be provided when using hosted LangSmith API\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\logging\\__init__.py\", line 1110, in emit\n", + " msg = self.format(record)\n", + " ^^^^^^^^^^^^^^^^^^^\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\logging\\__init__.py\", line 953, in format\n", + " return fmt.format(record)\n", + " ^^^^^^^^^^^^^^^^^^\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\logging\\__init__.py\", line 687, in format\n", + " record.message = record.getMessage()\n", + " ^^^^^^^^^^^^^^^^^^^\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\logging\\__init__.py\", line 377, in getMessage\n", + " msg = msg % self.args\n", + " ~~~~^~~~~~~~~~~\n", + "TypeError: not all arguments converted during string formatting\n", + "Call stack:\n", + " File \"\", line 198, in _run_module_as_main\n", + " File \"\", line 88, in _run_code\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\ipykernel_launcher.py\", line 17, in \n", + " app.launch_new_instance()\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\traitlets\\config\\application.py\", line 1041, in launch_instance\n", + " app.start()\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\ipykernel\\kernelapp.py\", line 711, in start\n", + " self.io_loop.start()\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\tornado\\platform\\asyncio.py\", line 215, in start\n", + " self.asyncio_loop.run_forever()\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\asyncio\\base_events.py\", line 607, in run_forever\n", + " self._run_once()\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\asyncio\\base_events.py\", line 1922, in _run_once\n", + " handle._run()\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\asyncio\\events.py\", line 80, in _run\n", + " self._context.run(self._callback, *self._args)\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\ipykernel\\kernelbase.py\", line 510, in dispatch_queue\n", + " await self.process_one()\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\ipykernel\\kernelbase.py\", line 499, in process_one\n", + " await dispatch(*args)\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\ipykernel\\kernelbase.py\", line 406, in dispatch_shell\n", + " await result\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\ipykernel\\kernelbase.py\", line 729, in execute_request\n", + " reply_content = await reply_content\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\ipykernel\\ipkernel.py\", line 411, in do_execute\n", + " res = shell.run_cell(\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\ipykernel\\zmqshell.py\", line 530, in run_cell\n", + " return super().run_cell(*args, **kwargs)\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\IPython\\core\\interactiveshell.py\", line 2945, in run_cell\n", + " result = self._run_cell(\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\IPython\\core\\interactiveshell.py\", line 3000, in _run_cell\n", + " return runner(coro)\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\IPython\\core\\async_helpers.py\", line 129, in _pseudo_sync_runner\n", + " coro.send(None)\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\IPython\\core\\interactiveshell.py\", line 3203, in run_cell_async\n", + " has_raised = await self.run_ast_nodes(code_ast.body, cell_name,\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\IPython\\core\\interactiveshell.py\", line 3382, in run_ast_nodes\n", + " if await self.run_code(code, result, async_=asy):\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\IPython\\core\\interactiveshell.py\", line 3442, in run_code\n", + " exec(code_obj, self.user_global_ns, self.user_ns)\n", + " File \"C:\\Users\\Acer\\AppData\\Local\\Temp\\ipykernel_30632\\4060515341.py\", line 47, in \n", + " print(chain.invoke(\"What's a black hole\"))\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\runnables\\base.py\", line 2499, in invoke\n", + " input = step.invoke(\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\language_models\\chat_models.py\", line 158, in invoke\n", + " self.generate_prompt(\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\language_models\\chat_models.py\", line 560, in generate_prompt\n", + " return self.generate(prompt_messages, stop=stop, callbacks=callbacks, **kwargs)\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\language_models\\chat_models.py\", line 389, in generate\n", + " callback_manager = CallbackManager.configure(\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\callbacks\\manager.py\", line 1450, in configure\n", + " return _configure(\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\callbacks\\manager.py\", line 2004, in _configure\n", + " logger.warning(\n", + "Message: 'Unable to load requested LangChainTracer. To disable this warning, unset the LANGCHAIN_TRACING_V2 environment variables.'\n", + "Arguments: (\"LangSmithUserError('API key must be provided when using hosted LangSmith API')\",)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "**A black hole** is a region of spacetime exhibiting such strong gravitational effects that nothing—not even light—can escape from inside it. The theory of general relativity predicts that a sufficiently compact mass can deform spacetime to form a black hole. The boundary of no escape is called the event horizon.\n", + "\n", + "**Characteristics of a Black Hole:**\n", + "\n", + "* **Gravitational Pull:** Black holes have extremely strong gravitational pull due to their compact mass. Anything that comes too close to a black hole, including light, will be pulled in and cannot escape.\n", + "\n", + "* **Event Horizon:** The event horizon is the boundary around a black hole from which nothing can escape. Once an object crosses the event horizon, it is effectively trapped within the black hole.\n", + "\n", + "* **Singularity:** At the center of a black hole is a point called a singularity. This is where the gravitational forces become infinite and the laws of physics as we know them break down.\n", + "\n", + "* **No Hair Theorem:** According to the no-hair theorem, all black holes are essentially the same and can be fully described by only three parameters: mass, angular momentum, and electric charge.\n", + "\n", + "**Formation of a Black Hole:**\n", + "\n", + "Black holes are formed when massive stars collapse at the end of their lives. When a star with a mass several times that of our sun runs out of fuel, it can no longer support its own weight against gravitational collapse. The core of the star collapses under its own gravity, creating a black hole.\n", + "\n", + "**Types of Black Holes:**\n", + "\n", + "* **Stellar Black Holes:** These are black holes formed from the collapse of massive stars. They typically have masses ranging from a few solar masses to tens of solar masses.\n", + "\n", + "* **Supermassive Black Holes:** These are black holes with masses millions or even billions of times that of our sun. They are found at the centers of most galaxies, including our own Milky Way galaxy.\n", + "\n", + "* **Intermediate-Mass Black Holes:** These are black holes with masses between stellar and supermassive black holes, ranging from hundreds to thousands of solar masses. Their existence is still a matter of debate.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "--- Logging error ---\n", + "Traceback (most recent call last):\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\callbacks\\manager.py\", line 1998, in _configure\n", + " handler = LangChainTracer(\n", + " ^^^^^^^^^^^^^^^^\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\tracers\\langchain.py\", line 91, in __init__\n", + " self.client = client or get_client()\n", + " ^^^^^^^^^^^^\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\tracers\\langchain.py\", line 54, in get_client\n", + " _CLIENT = Client()\n", + " ^^^^^^^^\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langsmith\\client.py\", line 534, in __init__\n", + " _validate_api_key_if_hosted(self.api_url, self.api_key)\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langsmith\\client.py\", line 323, in _validate_api_key_if_hosted\n", + " raise ls_utils.LangSmithUserError(\n", + "langsmith.utils.LangSmithUserError: API key must be provided when using hosted LangSmith API\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\logging\\__init__.py\", line 1110, in emit\n", + " msg = self.format(record)\n", + " ^^^^^^^^^^^^^^^^^^^\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\logging\\__init__.py\", line 953, in format\n", + " return fmt.format(record)\n", + " ^^^^^^^^^^^^^^^^^^\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\logging\\__init__.py\", line 687, in format\n", + " record.message = record.getMessage()\n", + " ^^^^^^^^^^^^^^^^^^^\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\logging\\__init__.py\", line 377, in getMessage\n", + " msg = msg % self.args\n", + " ~~~~^~~~~~~~~~~\n", + "TypeError: not all arguments converted during string formatting\n", + "Call stack:\n", + " File \"\", line 198, in _run_module_as_main\n", + " File \"\", line 88, in _run_code\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\ipykernel_launcher.py\", line 17, in \n", + " app.launch_new_instance()\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\traitlets\\config\\application.py\", line 1041, in launch_instance\n", + " app.start()\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\ipykernel\\kernelapp.py\", line 711, in start\n", + " self.io_loop.start()\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\tornado\\platform\\asyncio.py\", line 215, in start\n", + " self.asyncio_loop.run_forever()\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\asyncio\\base_events.py\", line 607, in run_forever\n", + " self._run_once()\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\asyncio\\base_events.py\", line 1922, in _run_once\n", + " handle._run()\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\asyncio\\events.py\", line 80, in _run\n", + " self._context.run(self._callback, *self._args)\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\ipykernel\\kernelbase.py\", line 510, in dispatch_queue\n", + " await self.process_one()\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\ipykernel\\kernelbase.py\", line 499, in process_one\n", + " await dispatch(*args)\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\ipykernel\\kernelbase.py\", line 406, in dispatch_shell\n", + " await result\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\ipykernel\\kernelbase.py\", line 729, in execute_request\n", + " reply_content = await reply_content\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\ipykernel\\ipkernel.py\", line 411, in do_execute\n", + " res = shell.run_cell(\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\ipykernel\\zmqshell.py\", line 530, in run_cell\n", + " return super().run_cell(*args, **kwargs)\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\IPython\\core\\interactiveshell.py\", line 2945, in run_cell\n", + " result = self._run_cell(\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\IPython\\core\\interactiveshell.py\", line 3000, in _run_cell\n", + " return runner(coro)\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\IPython\\core\\async_helpers.py\", line 129, in _pseudo_sync_runner\n", + " coro.send(None)\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\IPython\\core\\interactiveshell.py\", line 3203, in run_cell_async\n", + " has_raised = await self.run_ast_nodes(code_ast.body, cell_name,\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\IPython\\core\\interactiveshell.py\", line 3382, in run_ast_nodes\n", + " if await self.run_code(code, result, async_=asy):\n", + " File \"C:\\Users\\Acer\\AppData\\Roaming\\Python\\Python311\\site-packages\\IPython\\core\\interactiveshell.py\", line 3442, in run_code\n", + " exec(code_obj, self.user_global_ns, self.user_ns)\n", + " File \"C:\\Users\\Acer\\AppData\\Local\\Temp\\ipykernel_30632\\4060515341.py\", line 47, in \n", + " print(chain.invoke(\"What's a black hole\"))\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\runnables\\base.py\", line 2499, in invoke\n", + " input = step.invoke(\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\output_parsers\\base.py\", line 169, in invoke\n", + " return self._call_with_config(\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\runnables\\base.py\", line 1612, in _call_with_config\n", + " callback_manager = get_callback_manager_for_config(config)\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\runnables\\config.py\", line 402, in get_callback_manager_for_config\n", + " return CallbackManager.configure(\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\callbacks\\manager.py\", line 1450, in configure\n", + " return _configure(\n", + " File \"c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\callbacks\\manager.py\", line 2004, in _configure\n", + " logger.warning(\n", + "Message: 'Unable to load requested LangChainTracer. To disable this warning, unset the LANGCHAIN_TRACING_V2 environment variables.'\n", + "Arguments: (\"LangSmithUserError('API key must be provided when using hosted LangSmith API')\",)\n" ] } ], @@ -324,7 +960,7 @@ "from langchain_core.output_parsers import StrOutputParser\n", "from langchain_core.prompts import PromptTemplate\n", "from langchain_core.runnables import RunnableLambda, RunnablePassthrough\n", - "from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n", + "from langchain_google_genai import GoogleGenerativeAIEmbeddings\n", "\n", "# Two prompts\n", "physics_template = \"\"\"You are a very smart physics professor. \\\n", @@ -342,7 +978,7 @@ "{query}\"\"\"\n", "\n", "# Embed prompts\n", - "embeddings = OpenAIEmbeddings()\n", + "embeddings = GoogleGenerativeAIEmbeddings(model=\"models/embedding-001\")\n", "prompt_templates = [physics_template, math_template]\n", "prompt_embeddings = embeddings.embed_documents(prompt_templates)\n", "\n", @@ -361,7 +997,8 @@ "chain = (\n", " {\"query\": RunnablePassthrough()}\n", " | RunnableLambda(prompt_router)\n", - " | ChatOpenAI()\n", + " | ChatGoogleGenerativeAI(model=\"gemini-pro\",\n", + " temperature=0.7, top_p=0.85)\n", " | StrOutputParser()\n", ")\n", "\n", @@ -373,9 +1010,9 @@ "id": "e40eb434-97e2-497d-8241-914941594ffe", "metadata": {}, "source": [ - "Trace: \n", + "Trace:\n", "\n", - "https://smith.langchain.com/public/98c25405-2631-4de8-b12a-1891aded3359/r" + "https://smith.langchain.com/public/98c25405-2631-4de8-b12a-1891aded3359/r\n" ] }, { @@ -396,7 +1033,7 @@ "\n", "https://blog.langchain.dev/query-construction/\n", "\n", - "https://blog.langchain.dev/enhancing-rag-based-applications-accuracy-by-constructing-and-leveraging-knowledge-graphs/" + "https://blog.langchain.dev/enhancing-rag-based-applications-accuracy-by-constructing-and-leveraging-knowledge-graphs/\n" ] }, { @@ -415,7 +1052,7 @@ "\n", "![Screenshot 2024-03-16 at 1.12.10 PM.png](attachment:3d933538-e73d-4922-8fe6-dbd2fc2cf2f5.png)\n", "\n", - "Many vectorstores contain metadata fields. \n", + "Many vectorstores contain metadata fields.\n", "\n", "This makes it possible to filter for specific chunks based on metadata.\n", "\n", @@ -423,12 +1060,12 @@ "\n", "Docs:\n", "\n", - "https://python.langchain.com/docs/use_cases/query_analysis/techniques/structuring" + "https://python.langchain.com/docs/use_cases/query_analysis/techniques/structuring\n" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "b22eb666-a6c2-4b3f-81dd-93ece81f035d", "metadata": {}, "outputs": [ @@ -438,14 +1075,14 @@ "{'source': 'pbAd8O1Lvm4',\n", " 'title': 'Self-reflective RAG with LangGraph: Self-RAG and CRAG',\n", " 'description': 'Unknown',\n", - " 'view_count': 11922,\n", + " 'view_count': 15666,\n", " 'thumbnail_url': 'https://i.ytimg.com/vi/pbAd8O1Lvm4/hq720.jpg',\n", " 'publish_date': '2024-02-07 00:00:00',\n", " 'length': 1058,\n", " 'author': 'LangChain'}" ] }, - "execution_count": 11, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -472,12 +1109,12 @@ "\n", "We want to convert natural langugae into structured search queries.\n", "\n", - "We can define a schema for structured search queries." + "We can define a schema for structured search queries.\n" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "7731745b-accc-4cf1-8291-e12d1aa46361", "metadata": {}, "outputs": [], @@ -540,18 +1177,18 @@ "id": "6054bc98-0ae1-45e6-8f06-22c65ec47180", "metadata": {}, "source": [ - "Now, we prompt the LLM to produce queries." + "Now, we prompt the LLM to produce queries.\n" ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "id": "f699d9e7-468e-4574-bdba-f4be4a5779de", "metadata": {}, "outputs": [], "source": [ "from langchain_core.prompts import ChatPromptTemplate\n", - "from langchain_openai import ChatOpenAI\n", + "from langchain_google_genai import ChatGoogleGenerativeAI\n", "\n", "system = \"\"\"You are an expert at converting user questions into database queries. \\\n", "You have access to a database of tutorial videos about a software library for building LLM-powered applications. \\\n", @@ -564,14 +1201,15 @@ " (\"human\", \"{question}\"),\n", " ]\n", ")\n", - "llm = ChatOpenAI(model=\"gpt-3.5-turbo-0125\", temperature=0)\n", + "llm = ChatGoogleGenerativeAI(model=\"gemini-pro\",\n", + " temperature=0.7, top_p=0.85)\n", "structured_llm = llm.with_structured_output(TutorialSearch)\n", "query_analyzer = prompt | structured_llm" ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "id": "1b776858-a589-4fe5-a8a3-19530706075d", "metadata": {}, "outputs": [ @@ -590,7 +1228,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "id": "65bfad12-1985-433e-a980-eb8c9da53f72", "metadata": {}, "outputs": [ @@ -613,7 +1251,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "id": "99643372-01cc-49cc-a507-bebbed096247", "metadata": {}, "outputs": [ @@ -635,7 +1273,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "id": "c26f2329-d091-4a47-995a-822e3f062ea1", "metadata": {}, "outputs": [ @@ -662,7 +1300,7 @@ "id": "90111ec1-784f-4b8c-bd91-6122edb7eb25", "metadata": {}, "source": [ - "To then connect this to various vectorstores, you can follow [here](https://python.langchain.com/docs/modules/data_connection/retrievers/self_query#constructing-from-scratch-with-lcel)." + "To then connect this to various vectorstores, you can follow [here](https://python.langchain.com/docs/modules/data_connection/retrievers/self_query#constructing-from-scratch-with-lcel).\n" ] }, { @@ -690,7 +1328,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.8" + "version": "3.11.4" } }, "nbformat": 4, diff --git a/rag_from_scratch_12_to_14.ipynb b/rag_from_scratch_12_to_14.ipynb index 26c65f7..7c239a5 100644 --- a/rag_from_scratch_12_to_14.ipynb +++ b/rag_from_scratch_12_to_14.ipynb @@ -24,7 +24,7 @@ "\n", "## Enviornment\n", "\n", - "`(1) Packages`" + "`(1) Packages`\n" ] }, { @@ -44,20 +44,20 @@ "source": [ "`(2) LangSmith`\n", "\n", - "https://docs.smith.langchain.com/" + "https://docs.smith.langchain.com/\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "6098b8bf-354d-4eea-ba25-25fe12ba6b6b", "metadata": {}, "outputs": [], "source": [ "import os\n", - "os.environ['LANGCHAIN_TRACING_V2'] = 'true'\n", - "os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'\n", - "os.environ['LANGCHAIN_API_KEY'] = " + "# os.environ['LANGCHAIN_TRACING_V2'] = 'true'\n", + "# os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'\n", + "# os.environ['LANGCHAIN_API_KEY'] = " ] }, { @@ -65,17 +65,17 @@ "id": "afbd20f1-af47-409e-bfbf-3a698b310e7e", "metadata": {}, "source": [ - "`(3) API Keys`" + "`(3) API Keys`\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "d966f427-1a9f-4bc8-b1fa-5df6078b1df6", "metadata": {}, "outputs": [], "source": [ - "os.environ['OPENAI_API_KEY'] = " + "os.environ['GOOGLE_API_KEY'] = " ] }, { @@ -90,9 +90,9 @@ "source": [ "## Part 12: Multi-representation Indexing\n", "\n", - "Flow: \n", + "Flow:\n", "\n", - " ![Screenshot 2024-03-16 at 5.54.55 PM.png](attachment:3eee1e62-6f49-4ca5-9d9b-16df2b6ffe06.png)\n", + "![Screenshot 2024-03-16 at 5.54.55 PM.png](attachment:3eee1e62-6f49-4ca5-9d9b-16df2b6ffe06.png)\n", "\n", "Docs:\n", "\n", @@ -102,12 +102,12 @@ "\n", "Paper:\n", "\n", - "https://arxiv.org/abs/2312.06648" + "https://arxiv.org/abs/2312.06648\n" ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 3, "id": "1bf368e7-ebf6-4469-bfa7-62466184afbb", "metadata": {}, "outputs": [], @@ -124,7 +124,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 4, "id": "431c9506-c6c0-463b-af77-9291a63f1d26", "metadata": {}, "outputs": [], @@ -134,12 +134,13 @@ "from langchain_core.documents import Document\n", "from langchain_core.output_parsers import StrOutputParser\n", "from langchain_core.prompts import ChatPromptTemplate\n", - "from langchain_openai import ChatOpenAI\n", + "# from langchain_openai import ChatOpenAI\n", + "from langchain_google_genai import GoogleGenerativeAI\n", "\n", "chain = (\n", " {\"doc\": lambda x: x.page_content}\n", " | ChatPromptTemplate.from_template(\"Summarize the following document:\\n\\n{doc}\")\n", - " | ChatOpenAI(model=\"gpt-3.5-turbo\",max_retries=0)\n", + " | GoogleGenerativeAI(model=\"gemini-pro\",temperature=0.7)\n", " | StrOutputParser()\n", ")\n", "\n", @@ -148,19 +149,42 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, + "id": "a53e16f6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['**Summary**\\n\\nBuilding autonomous agents with a large language model (LLM) as the core controller is a promising concept. LLM-powered autonomous agents have demonstrated potential beyond generating text and code. They can be framed as powerful general problem solvers.\\n\\n**Agent System Overview**\\n\\nAn LLM-powered autonomous agent system typically comprises an LLM, memory components, and tool usage capabilities.\\n\\n**Memory Components**\\n\\n* **Short-term memory:** In-context learning, where the model retains information within a limited context window.\\n* **Long-term memory:** External vector store that allows the agent to retrieve and recall an infinite amount of information.\\n\\n**Tool Usage**\\n\\n* Agents can access external APIs for missing information, such as code execution, proprietary data sources, etc.\\n\\n**Planning**\\n\\n* **Task decomposition:** LLMs break down complex tasks into smaller, manageable subgoals using techniques like Chain of Thought (CoT) and Tree of Thoughts (ToT).\\n* **Self-reflection:** Agents can review past actions, learn from mistakes, and refine future steps using methods like ReAct, Reflexion, Chain of Hindsight (CoH), and Algorithm Distillation (AD).\\n\\n**Challenges**\\n\\n* Finite context length limits the inclusion of historical information and detailed instructions.\\n* Long-term planning and task decomposition remain challenging.\\n* Natural language interface can be unreliable due to model errors and inconsistent behavior.',\n", + " '**High-Quality Human Data for Model Training**\\n\\n**Introduction:**\\nHigh-quality data is crucial for training deep learning models. Human annotation is the primary source for task-specific labeled data.\\n\\n**Human Raters and Data Quality:**\\nData quality is influenced by factors such as:\\n\\n* Task design\\n* Rater selection and training\\n* Data collection and aggregation\\n\\n**The Wisdom of the Crowd:**\\nCrowdsourcing platforms like Amazon Mechanical Turk can provide non-expert annotations. However, quality control is essential, and methods like weighted voting, raw agreement, and Cohen\\'s Kappa are used to assess inter-rater agreement.\\n\\n**Rater Agreement:**\\nMultiple raters can provide labels for the same data. Probabilistic graph modeling techniques can estimate rater competence and predict true labels.\\n\\n**Rater Disagreement:**\\nDisagreement can arise due to subjective interpretations or innate diversity of opinions. Two paradigms exist:\\n\\n* **Descriptive:** Embraces subjectivity, capturing diverse perspectives.\\n* **Prescriptive:** Discourages subjectivity, aiming for consistency in applying one belief.\\n\\n**Data Quality for Model Training:**\\nMethods to identify mislabeled data include:\\n\\n* **Influence Functions:** Measure the impact of data points on model parameters and loss function.\\n* **Prediction Changes during Training:** Track model predictions during training to identify hard-to-learn or potentially mislabeled samples.\\n* **Area under the Margin (AUM):** Detects mislabeled samples based on the tension between generalization and (wrong) prediction in gradient updates.\\n* **Noisy Cross-Validation (NCV):** Identifies \"clean\" samples by matching labels with predictions from a model trained on a different dataset subset.']" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "summaries" + ] + }, + { + "cell_type": "code", + "execution_count": 6, "id": "dc5614c1-121c-4ad5-8609-cc0e4a633ee9", "metadata": {}, "outputs": [], "source": [ "from langchain.storage import InMemoryByteStore\n", - "from langchain_openai import OpenAIEmbeddings\n", + "# from langchain_openai import OpenAIEmbeddings\n", + "from langchain_google_genai import GoogleGenerativeAIEmbeddings\n", "from langchain_community.vectorstores import Chroma\n", "from langchain.retrievers.multi_vector import MultiVectorRetriever\n", "\n", "# The vectorstore to use to index the child chunks\n", "vectorstore = Chroma(collection_name=\"summaries\",\n", - " embedding_function=OpenAIEmbeddings())\n", + " embedding_function=GoogleGenerativeAIEmbeddings(model=\"models/embedding-001\"))\n", "\n", "# The storage layer for the parent documents\n", "store = InMemoryByteStore()\n", @@ -187,17 +211,17 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 7, "id": "f111ca83-3e56-4785-bac3-99948cd8df1b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "Document(page_content='The document discusses the concept of building autonomous agents powered by Large Language Models (LLMs) as their core controllers. It covers components such as planning, memory, and tool use, along with case studies and proof-of-concept examples like AutoGPT and GPT-Engineer. Challenges like finite context length, planning difficulties, and reliability of natural language interfaces are also highlighted. The document provides references to related research papers and offers a comprehensive overview of LLM-powered autonomous agents.', metadata={'doc_id': 'cf31524b-fe6a-4b28-a980-f5687c9460ea'})" + "Document(page_content='**Summary**\\n\\nBuilding autonomous agents with a large language model (LLM) as the core controller is a promising concept. LLM-powered autonomous agents have demonstrated potential beyond generating text and code. They can be framed as powerful general problem solvers.\\n\\n**Agent System Overview**\\n\\nAn LLM-powered autonomous agent system typically comprises an LLM, memory components, and tool usage capabilities.\\n\\n**Memory Components**\\n\\n* **Short-term memory:** In-context learning, where the model retains information within a limited context window.\\n* **Long-term memory:** External vector store that allows the agent to retrieve and recall an infinite amount of information.\\n\\n**Tool Usage**\\n\\n* Agents can access external APIs for missing information, such as code execution, proprietary data sources, etc.\\n\\n**Planning**\\n\\n* **Task decomposition:** LLMs break down complex tasks into smaller, manageable subgoals using techniques like Chain of Thought (CoT) and Tree of Thoughts (ToT).\\n* **Self-reflection:** Agents can review past actions, learn from mistakes, and refine future steps using methods like ReAct, Reflexion, Chain of Hindsight (CoH), and Algorithm Distillation (AD).\\n\\n**Challenges**\\n\\n* Finite context length limits the inclusion of historical information and detailed instructions.\\n* Long-term planning and task decomposition remain challenging.\\n* Natural language interface can be unreliable due to model errors and inconsistent behavior.', metadata={'doc_id': '64998995-a74e-4662-98b4-f27513a4a847'})" ] }, - "execution_count": 5, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -242,7 +266,7 @@ "id": "a52d8214-a997-4761-a8a3-0a29109410be", "metadata": {}, "source": [ - "Related idea is the [parent document retriever](https://python.langchain.com/docs/modules/data_connection/retrievers/parent_document_retriever)." + "Related idea is the [parent document retriever](https://python.langchain.com/docs/modules/data_connection/retrievers/parent_document_retriever).\n" ] }, { @@ -271,7 +295,7 @@ "\n", "Full code:\n", "\n", - "https://github.com/langchain-ai/langchain/blob/master/cookbook/RAPTOR.ipynb" + "https://github.com/langchain-ai/langchain/blob/master/cookbook/RAPTOR.ipynb\n" ] }, { @@ -281,15 +305,15 @@ "source": [ "## Part 14: ColBERT\n", "\n", - "RAGatouille makes it as simple to use ColBERT. \n", + "RAGatouille makes it as simple to use ColBERT.\n", "\n", - "ColBERT generates a contextually influenced vector for each token in the passages. \n", + "ColBERT generates a contextually influenced vector for each token in the passages.\n", "\n", "ColBERT similarly generates vectors for each token in the query.\n", "\n", "Then, the score of each document is the sum of the maximum similarity of each query embedding to any of the document embeddings:\n", "\n", - "See [here](https://hackernoon.com/how-colbert-helps-developers-overcome-the-limits-of-rag) and [here](https://python.langchain.com/docs/integrations/retrievers/ragatouille) and [here](https://til.simonwillison.net/llms/colbert-ragatouille)." + "See [here](https://hackernoon.com/how-colbert-helps-developers-overcome-the-limits-of-rag) and [here](https://python.langchain.com/docs/integrations/retrievers/ragatouille) and [here](https://til.simonwillison.net/llms/colbert-ragatouille).\n" ] }, { @@ -682,7 +706,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.8" + "version": "3.11.4" } }, "nbformat": 4, diff --git a/rag_from_scratch_15_to_18.ipynb b/rag_from_scratch_15_to_18.ipynb index bf0bdec..b69b602 100644 --- a/rag_from_scratch_15_to_18.ipynb +++ b/rag_from_scratch_15_to_18.ipynb @@ -12,7 +12,7 @@ "source": [ "# Rag From Scratch: Retrieval\n", "\n", - "![Screenshot 2024-03-25 at 8.23.58 PM.png](attachment:c6ee3569-ca87-49b5-8f2c-21193230f8d4.png)" + "![Screenshot 2024-03-25 at 8.23.58 PM.png](attachment:c6ee3569-ca87-49b5-8f2c-21193230f8d4.png)\n" ] }, { @@ -22,7 +22,7 @@ "source": [ "## Enviornment\n", "\n", - "`(1) Packages`" + "`(1) Packages`\n" ] }, { @@ -42,7 +42,7 @@ "source": [ "`(2) LangSmith`\n", "\n", - "https://docs.smith.langchain.com/" + "https://docs.smith.langchain.com/\n" ] }, { @@ -63,17 +63,26 @@ "id": "47a210bd-3869-401b-8157-e4a9c0711359", "metadata": {}, "source": [ - "`(3) API Keys`" + "`(3) API Keys`\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "aeb22812-822d-4320-be3c-ab0c52356914", "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "SyntaxError", + "evalue": "invalid syntax (92338635.py, line 1)", + "output_type": "error", + "traceback": [ + "\u001b[1;36m Cell \u001b[1;32mIn[1], line 1\u001b[1;36m\u001b[0m\n\u001b[1;33m os.environ['GOOGLE_API_KEY'] = \u001b[0m\n\u001b[1;37m ^\u001b[0m\n\u001b[1;31mSyntaxError\u001b[0m\u001b[1;31m:\u001b[0m invalid syntax\n" + ] + } + ], "source": [ - "os.environ['OPENAI_API_KEY'] = \n", + "os.environ['GOOGLE_API_KEY'] = \n", "os.environ['COHERE_API_KEY'] = " ] }, @@ -91,7 +100,7 @@ "\n", "We showed this previously with RAG-fusion.\n", "\n", - "![Screenshot 2024-03-25 at 2.59.21 PM.png](attachment:f0d70de3-4427-4849-a35f-92ab0e5e91cf.png)" + "![Screenshot 2024-03-25 at 2.59.21 PM.png](attachment:f0d70de3-4427-4849-a35f-92ab0e5e91cf.png)\n" ] }, { @@ -289,11 +298,11 @@ "id": "a70968aa-52e8-41b9-96f1-7d811351512e", "metadata": {}, "source": [ - "We can also use [Cohere Re-Rank](https://python.langchain.com/docs/integrations/retrievers/cohere-reranker#doing-reranking-with-coherererank). \n", + "We can also use [Cohere Re-Rank](https://python.langchain.com/docs/integrations/retrievers/cohere-reranker#doing-reranking-with-coherererank).\n", "\n", "See [here](https://txt.cohere.com/rerank/):\n", "\n", - "![data-src-image-387e0861-93de-4823-84e0-7ae04f2be893.png](attachment:f46d29d0-e1a2-4c09-8b65-d7f5b675209d.png)" + "![data-src-image-387e0861-93de-4823-84e0-7ae04f2be893.png](attachment:f46d29d0-e1a2-4c09-8b65-d7f5b675209d.png)\n" ] }, { @@ -343,7 +352,7 @@ "\n", "https://github.com/langchain-ai/langgraph/blob/main/examples/rag/langgraph_crag.ipynb\n", "\n", - "https://github.com/langchain-ai/langgraph/blob/main/examples/rag/langgraph_crag_mistral.ipynb" + "https://github.com/langchain-ai/langgraph/blob/main/examples/rag/langgraph_crag_mistral.ipynb\n" ] }, { @@ -353,15 +362,13 @@ "source": [ "# Generation\n", "\n", - "\n", - "\n", "## 17 - Retrieval (Self-RAG)\n", - " \n", + "\n", "`Notebooks`\n", "\n", "https://github.com/langchain-ai/langgraph/tree/main/examples/rag\n", "\n", - "https://github.com/langchain-ai/langgraph/blob/main/examples/rag/langgraph_self_rag_mistral_nomic.ipynb" + "https://github.com/langchain-ai/langgraph/blob/main/examples/rag/langgraph_self_rag_mistral_nomic.ipynb\n" ] }, { @@ -369,7 +376,7 @@ "id": "48f233df-b2a3-438c-8d65-5e83bf6ace64", "metadata": {}, "source": [ - "## 18 - Impact of long context \n", + "## 18 - Impact of long context\n", "\n", "`Deep dive`\n", "\n", @@ -377,7 +384,7 @@ "\n", "`Slides`\n", "\n", - "https://docs.google.com/presentation/d/1mJUiPBdtf58NfuSEQ7pVSEQ2Oqmek7F1i4gBwR6JDss/edit#slide=id.g26c0cb8dc66_0_0" + "https://docs.google.com/presentation/d/1mJUiPBdtf58NfuSEQ7pVSEQ2Oqmek7F1i4gBwR6JDss/edit#slide=id.g26c0cb8dc66_0_0\n" ] } ], @@ -397,7 +404,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.8" + "version": "3.11.4" } }, "nbformat": 4, diff --git a/rag_from_scratch_1_to_4.ipynb b/rag_from_scratch_1_to_4.ipynb index 85f91e2..4a80578 100644 --- a/rag_from_scratch_1_to_4.ipynb +++ b/rag_from_scratch_1_to_4.ipynb @@ -20,17 +20,17 @@ "\n", "## Enviornment\n", "\n", - "`(1) Packages`" + "`(1) Packages`\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "id": "3a88555a-53a5-4ab8-ba3d-e6dd3a26c71a", "metadata": {}, "outputs": [], "source": [ - "! pip install langchain_community tiktoken langchain-openai langchainhub chromadb langchain" + "# ! pip install langchain_community tiktoken langchain-openai langchainhub chromadb langchain" ] }, { @@ -40,20 +40,30 @@ "source": [ "`(2) LangSmith`\n", "\n", - "https://docs.smith.langchain.com/" + "https://docs.smith.langchain.com/\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, + "id": "99480411", + "metadata": {}, + "outputs": [], + "source": [ + "# !pip install --quiet langchain-google-genai\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, "id": "b76f68a8-4745-4377-8057-6090b87377d1", "metadata": {}, "outputs": [], "source": [ "import os\n", - "os.environ['LANGCHAIN_TRACING_V2'] = 'true'\n", - "os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'\n", - "os.environ['LANGCHAIN_API_KEY'] = " + "# os.environ['LANGCHAIN_TRACING_V2'] = 'true'\n", + "# os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'\n", + "# os.environ['LANGCHAIN_API_KEY'] = \"\"" ] }, { @@ -61,17 +71,18 @@ "id": "f8eb312d-8a07-4df3-8462-72ac526715f7", "metadata": {}, "source": [ - "`(3) API Keys`" + "`(3) API Keys`\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "df28175e-24b6-4939-8a3c-5a1f9511f51e", "metadata": {}, "outputs": [], "source": [ - "os.environ['OPENAI_API_KEY'] = " + "# os.environ['GOOGLE_API_KEY'] = \n", + "os.environ['GOOGLE_API_KEY'] = " ] }, { @@ -80,16 +91,34 @@ "metadata": {}, "source": [ "## Part 1: Overview\n", - " \n", - "[RAG quickstart](https://python.langchain.com/docs/use_cases/question_answering/quickstart)" + "\n", + "[RAG quickstart](https://python.langchain.com/docs/use_cases/question_answering/quickstart)\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "98070313-0c2f-4ba6-ae3e-79e2418ce4df", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Document(page_content='### my thoughts\\nas the holocust was for a religious reason, i used to think that prayers adn preaching of god would be common in concerntration. i am surprised as how little significance the religion had on the inmates. the religion didn\\'t give them meaning. they didn;t curse the god. teh writer must be an athiest. the religion plays no role at all in his suffering and overcoming it.\\n\\nA thought transfixed me: for the first time in my life I saw the truth as it is set into song by so many\\npoets, proclaimed as the final wisdom by so many thinkers. The truth —that love is the ultimate and\\nthe highest goal to which man can aspire. Then I grasped the meaning of the greatest secret that\\nhuman poetry and human thought and belief have to impart: The salvation of man is through love\\nand in love. I understood how a man who has nothing left in this world still may know bliss, be it only\\nfor a brief moment, in the contemplation of his beloved. In a position of utter desolation, when man\\ncannot express himself in positive action, when his only achievement may consist in enduring his\\nsufferings in the right way—an honourable way— in such a position man can, through loving\\ncontemplation of the image he carries of his beloved, achieve fulfilment. For the first time in my life I\\nwas able to understand the meaning of the words, \"The angels are lost in perpetual contemplation\\nof an infinite glory.\"\\n\\n- **“Emotion, which is suffering, ceases to be suffering as soon as we form a clear and precise picture of it.”**\\n\\n―\\xa0Baruch Spinoza,\\xa0Ethics\\n\\n- Emotions and feelings are two different things. The misconstrual happens when emotion transitions to feeling. Emotion happens before feeling, and the way you interpret your emotions shape how you feel.\\n- What you have experienced, no power on earth can take from you.) N\\n- These psychological stages reflect their responses to the extreme conditions they faced:\\n\\n1. **Shock**: Upon arrival at the camp, prisoners experienced a state of shock as they were confronted with the harsh realities of their situation. They clung to hope, often in the form of delusions, believing that their ordeal would not be as terrible as it seemed.\\n \\n2. **Detachment or Emotional Death**: After settling into camp life, prisoners entered a phase of emotional detachment. They became numb to the suffering around them, focusing solely on survival. This phase involved a struggle to maintain both physical and mental well-being amid extreme adversity.\\n \\n3. **Depersonalization**: Following liberation from the camps, prisoners faced a phase of depersonalization. They struggled to adjust to freedom, feeling disconnected from reality and grappling with feelings of bitterness and disillusionment. This phase marked a challenging transition back to normal life, as survivors struggled to reconcile their experiences with the outside world.\\n\\n## self discovery vs the self transcendence\\nBy declaring that man is responsible and must actualize the potential meaning of his life, I wish to\\nstress that the true meaning of life is to be discovered in the world rather than within man or his\\nown psyche, as though it were a closed system. I have termed this constitutive characteristic \"the\\nself-transcendence of human existence.\" It denotes the fact that being human always points, and is\\ndirected, to something, or someone, other than oneself—be it a meaning to fulfil or another human\\nbeing to encounter. The more one forgets himself—by giving himself to a cause to serve or another\\nperson to love—the more human he is and the more he actualizes himself. What is called selfactualization\\nis not an attainable aim at all, for the simple reason that the more one would strive for\\nit, the more he would miss it. In other words, self-actualization is possible only as a side-effect of\\nself-transcendence.\\n### three values\\n**Finding Meaning in Life: 3 Paths**\\n\\n- By performing a deed or creating something–taking action.\\n- By coming into contact with someone or experiencing something.\\n- By experiencing unavoidable suffering, and the attitude we take toward it.\\n\\n# frued \\n \\nSigmund Freud, the founder of psychoanalysis, developed a complex theory of personality that explores the dynamics of the human mind and its underlying structures. At the core of Freud\\'s theory are three distinct components of personality: the id, the ego, and the superego. Let\\'s delve into each one:\\n\\n1. **Id**: According to Freud, the id is the primitive and instinctual part of the mind that operates on the pleasure principle. It seeks immediate gratification of basic needs and desires, without considering the consequences or moral implications. The id operates at an unconscious level and is present from birth. It is driven by biological urges and impulses, such as hunger, thirst, and sexual desires. Freud likened the id to a wild and unruly child, seeking immediate satisfaction without regard for social norms or consequences.\\n \\n2. **Ego**: The ego, in Freud\\'s theory, is the part of the mind that mediates between the demands of the id, the constraints of reality, and the moral standards of the superego. The ego operates on the reality principle, seeking to satisfy the desires of the id in ways that are realistic and socially acceptable. It develops as a result of interaction with the external world and matures over time, gaining the ability to delay gratification and make rational decisions. The ego is conscious and rational, balancing the conflicting demands of the id and superego while navigating the complexities of everyday life.\\n \\n3. **Superego**: The superego represents the internalized moral standards and values of society, as well as the individual\\'s own sense of right and wrong. It acts as a conscience, imposing moral ideals, standards, and judgments on the ego\\'s actions. The superego develops during childhood through identification with parental figures and societal norms. It strives for perfection and moral purity, often leading to feelings of guilt or shame when its standards are not met. The superego operates at both conscious and unconscious levels, influencing behavior through feelings of moral obligation and the desire to conform to societal norms.\\n\\n# alfred adler\\n1. **Striving for Superiority**: Adler proposed that the primary motivating force behind human behavior is the \"striving for superiority.\" Unlike Freud, who focused on the pleasure principle, Adler believed that individuals are primarily driven by a desire to overcome feelings of inferiority and achieve a sense of mastery and competence. This striving for superiority is innate and universal, motivating individuals to strive for personal growth, success, and social contribution.\\n \\n2. **Inferiority Complex**: Central to Adler\\'s theory is the concept of the \"inferiority complex,\" which refers to feelings of inadequacy and inferiority that arise when individuals perceive themselves as unable to meet the challenges of life. Adler believed that everyone experiences feelings of inferiority at some point, often stemming from childhood experiences or perceived shortcomings. However, he emphasized that these feelings can be overcome through striving for superiority and developing a sense of self-efficacy.\\n \\n3. **Social Interest**: Adler proposed that humans are inherently social beings and that social relationships play a crucial role in shaping personality development. He introduced the concept of \"social interest,\" which refers to a person\\'s innate tendency to connect with others, cooperate, and contribute to the welfare of society. According to Adler, individuals with a strong sense of social interest are more likely to experience feelings of belongingness, empathy, and community, leading to greater psychological well-being.\\n \\n4. **Style of Life**: Adler believed that each individual develops a unique \"style of life\" or a consistent pattern of behaviors, thoughts, and feelings that characterizes their approach to life. This style of life is influenced by early experiences, family dynamics, and the individual\\'s efforts to overcome feelings of inferiority. Adler emphasized the importance of understanding an individual\\'s style of life in therapy, as it provides insight into their goals, motivations, and subjective experiences.\\n\\n# schopehaur\\n- Pessimism: Schopenhauer\\'s philosophy is often characterized by its pessimistic outlook on life. He believed that human existence is inherently characterized by suffering, frustration, and dissatisfaction, and that happiness is fleeting and ultimately elusive.\\n- Will to Live: Schopenhauer proposed that the fundamental driving force of human existence is the \"will to live,\" an unconscious and irrational force that compels individuals to seek survival, reproduction, and self-preservation. He viewed this will as the root cause of suffering and conflict in the world.\\n- The Illusion of Individuality: Schopenhauer argued that the sense of individual identity is illusory, as it is ultimately derived from the underlying unity of the will. He believed that true enlightenment and liberation come from recognizing the interconnectedness of all beings and transcending the ego.\\n- Aesthetics: Schopenhauer placed great importance on the arts as a means of accessing deeper truths about human experience. He viewed aesthetic experiences, such as music, as a direct expression of the will and a way to momentarily transcend the suffering of existence.\\n- Ethics: Schopenhauer\\'s ethical philosophy centered on compassion and empathy for others. He believed that the recognition of the suffering inherent in all life should lead to a sense of compassion toward all living beings, motivating ethical behavior and altruism.\\n- Asceticism: Schopenhauer advocated for a life of simplicity and renunciation of worldly desires as a means of mitigating suffering and achieving inner peace. He believed that detachment from material possessions and sensory pleasures could lead to a deeper understanding of reality and a reduction in suffering.\\n# kant\\n- **Categorical Imperative**: Kant\\'s ethical philosophy revolves around the concept of the categorical imperative, which is a universal moral law that applies to all rational beings. According to Kant, individuals should act according to maxims that they would want to be universally followed, regardless of the consequences. This principle emphasizes the importance of moral duty and rationality in ethical decision-making.\\n \\n- **Autonomy and Freedom**: Kant emphasized the importance of human autonomy and freedom. He argued that individuals possess inherent dignity and worth as rational beings and should be treated as ends in themselves, rather than as means to an end. Kant\\'s notion of freedom is closely tied to the idea of self-legislation, wherein individuals are bound by moral laws that they impose upon themselves through reason.\\n \\n- **A Priori Knowledge**: Kant distinguished between a priori knowledge, which is independent of experience and derived from pure reason, and a posteriori knowledge, which is derived from sensory experience. He argued that certain fundamental concepts, such as space, time, and causality, are innate to the human mind and provide the framework for understanding and interpreting sensory experience.\\n \\n- **Transcendental Idealism**: Kant\\'s metaphysical position, known as transcendental idealism, posits that the structure of reality is shaped by the cognitive faculties of the human mind. He argued that we can never know things as they are in themselves (noumena), but only as they appear to us through the categories of the understanding and the forms of intuition (phenomena).\\n \\n- **Critique of Pure Reason**: Kant\\'s major work, the \"Critique of Pure Reason,\" sought to reconcile the rationalist and empiricist traditions in philosophy by examining the limits and possibilities of human knowledge. He aimed to establish the conditions under which knowledge is possible and to critique the traditional metaphysical claims of previous philosophers.\\n \\n- **Aesthetic Judgment**: Kant\\'s aesthetics focused on the nature of beauty and aesthetic experience. He proposed that judgments of beauty are based on subjective feelings of pleasure that arise from the harmonious interplay of form and content. Kant distinguished between the free play of imagination and understanding in aesthetic judgment and emphasized the universality of aesthetic taste.\\n\\n# stoicism\\n## 1. Live According To Nature\\n\\nThe Stoics believed in living according to nature and accepting what happens naturally. This means not fighting against the inevitable or getting attached to things outside your control. By living in harmony with nature, you can learn to accept whatever happens with equanimity. Getting upset over things you can’t control is futile.\\n\\n## 2. Focus On What You Can Control\\n\\nStoicism teaches us to focus our energy on what we can control – our thoughts, attitudes, and actions. Realize that you can’t control external events or other people’s behaviors. But you have power over your perspective and response. Practice focusing on your own choice of thoughts and actions rather than worrying about things outside your control.\\n\\n## 3. Manage Your Expectations\\n\\nThe Stoics warned against building up unrealistic expectations about life and others. When you expect things to be a certain way, you often set yourself up for disappointment. Instead, focus on managing your expectations and not anticipating more than what is realistic. Accept that people and life will not always align with your plans or ideals. Let go of attachments to specific outcomes.\\n\\n## 4. Practice Negative Visualization\\n\\nImagine worst-case scenarios to prepare yourself mentally. Regularly reflect on what you fear losing- your job, reputation, possessions, or loved ones. Visualize it gone. This process can help you appreciate what you have and build the mental resilience to handle loss. If the worst were to happen, you would still carry on.\\n\\n## 5. Reframe Your Perspective\\n\\nHow you view events largely shapes your emotions. When something happens that upsets you, try reframing your perspective. Look at the situation objectively from different angles. Consider how you might view this years from now. Please put it in a broader context. Reframing can help you avoid getting stuck in a negative narrative.\\n\\n## 6. Accept What Comes Your Way\\n\\nLife constantly changes and brings unpredictable events, both good and bad. Accept that this is part of nature. When something you wish hadn’t happened, focus on moving forward constructively. Don’t fight against reality. Acceptance provides peace of mind.\\n\\n## 7. Remember You Will Die\\n\\nContemplating your mortality puts things in perspective. We all will die and can’t control when. Remembering this can help you let go of petty concerns and live more purposefully. Don’t worry about insignificant problems or things you can’t change. Focus on what truly matters most while you’re still alive.\\n', metadata={'source': './data.txt'})]\n" + ] + }, + { + "data": { + "text/plain": [ + "'I do not have enough information to answer this question.'" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import bs4\n", "from langchain import hub\n", @@ -98,28 +127,53 @@ "from langchain_community.vectorstores import Chroma\n", "from langchain_core.output_parsers import StrOutputParser\n", "from langchain_core.runnables import RunnablePassthrough\n", - "from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n", + "# from langchain_google_genai import GoogleGenerativeAIEmbeddings\n", + "from langchain_google_genai import ChatGoogleGenerativeAI\n", "\n", "#### INDEXING ####\n", "\n", - "# Load Documents\n", - "loader = WebBaseLoader(\n", - " web_paths=(\"https://lilianweng.github.io/posts/2023-06-23-agent/\",),\n", - " bs_kwargs=dict(\n", - " parse_only=bs4.SoupStrainer(\n", - " class_=(\"post-content\", \"post-title\", \"post-header\")\n", - " )\n", - " ),\n", - ")\n", + "# # Load Documents\n", + "# loader = WebBaseLoader(\n", + "# web_paths=(\"https://github.com/thenaivekid/thenaivekid/blob/main/text.txt\",),\n", + "# bs_kwargs=dict(\n", + "# parse_only=bs4.SoupStrainer(\n", + "# class_=(\"post-content\", \"post-title\", \"post-header\")\n", + "# )\n", + "# ),\n", + "# )\n", + "\n", + "# docs = loader.load()\n", + "\n", + "# loader1 = WebBaseLoader(\n", + "# web_paths=(\"https://github.com/thenaivekid/thenaivekid/blob/main/competitor.txt\",),\n", + "# )\n", + "from langchain.document_loaders import TextLoader \n", + "loader = TextLoader(\"./data.txt\", encoding=\"utf-8\")\n", "docs = loader.load()\n", + "docs\n", + "print(docs)\n", "\n", "# Split\n", "text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)\n", "splits = text_splitter.split_documents(docs)\n", - "\n", + "# print(splits)\n", "# Embed\n", - "vectorstore = Chroma.from_documents(documents=splits, \n", - " embedding=OpenAIEmbeddings())\n", + "# vectorstore = Chroma.from_documents(documents=splits, \n", + "# embedding=OpenAIEmbeddings())\n", + "from langchain_google_genai import GoogleGenerativeAIEmbeddings\n", + "\n", + "# If there is no environment variable set for the API key, you can pass the API\n", + "# key to the parameter `google_api_key` of the `GoogleGenerativeAIEmbeddings`\n", + "# function: `google_api_key = \"key\"`.\n", + "\n", + "gemini_embeddings = GoogleGenerativeAIEmbeddings(model=\"models/embedding-001\")\n", + "\n", + "# Save to disk\n", + "vectorstore = Chroma.from_documents(\n", + " documents=splits, # Data\n", + " embedding=gemini_embeddings, # Embedding model\n", + " persist_directory=\"./chroma_db\" # Directory to save data\n", + " )\n", "\n", "retriever = vectorstore.as_retriever()\n", "\n", @@ -129,7 +183,10 @@ "prompt = hub.pull(\"rlm/rag-prompt\")\n", "\n", "# LLM\n", - "llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0)\n", + "# llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0)\n", + "\n", + "llm = ChatGoogleGenerativeAI(model=\"gemini-pro\",\n", + " temperature=0.7, top_p=0.85)\n", "\n", "# Post-processing\n", "def format_docs(docs):\n", @@ -144,7 +201,122 @@ ")\n", "\n", "# Question\n", - "rag_chain.invoke(\"What is Task Decomposition?\")" + "rag_chain.invoke(\"Are people talking about mr beast?\")" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "3116ee50", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Search Results:\n", + "was able to understand the meaning of the words, \"The angels are lost in perpetual contemplation\n", + "of an infinite glory.\"\n", + "it, the more he would miss it. In other words, self-actualization is possible only as a side-effect of\n", + "self-transcendence.\n", + "### three values\n", + "**Finding Meaning in Life: 3 Paths**\n", + "### my thoughts\n", + "as the holocust was for a religious reason, i used to think that prayers adn preaching of god would be common in concerntration. i am surprised as how little significance the religion had on the inmates. the religion didn't give them meaning. they didn;t curse the god. teh writer must be an athiest. the religion plays no role at all in his suffering and overcoming it.\n", + "## 5. Reframe Your Perspective\n", + "\n", + "How you view events largely shapes your emotions. When something happens that upsets you, try reframing your perspective. Look at the situation objectively from different angles. Consider how you might view this years from now. Please put it in a broader context. Reframing can help you avoid getting stuck in a negative narrative.\n", + "\n", + "## 6. Accept What Comes Your Way\n", + "\n", + "Life constantly changes and brings unpredictable events, both good and bad. Accept that this is part of nature. When something you wish hadn’t happened, focus on moving forward constructively. Don’t fight against reality. Acceptance provides peace of mind.\n", + "\n", + "## 7. Remember You Will Die\n", + "\n", + "Contemplating your mortality puts things in perspective. We all will die and can’t control when. Remembering this can help you let go of petty concerns and live more purposefully. Don’t worry about insignificant problems or things you can’t change. Focus on what truly matters most while you’re still alive.\n" + ] + } + ], + "source": [ + "from langchain.docstore.document import Document\n", + "from langchain.text_splitter import RecursiveCharacterTextSplitter\n", + "from langchain_community.vectorstores import Chroma\n", + "from langchain_google_genai import GoogleGenerativeAIEmbeddings\n", + "\n", + "# Define function to load text file\n", + "def load_text_data(filename):\n", + " with open(filename, \"r\", encoding=\"utf-8\") as f:\n", + " return f.read()\n", + "\n", + "# Function to process and store data\n", + "def process_and_store(filename, model_name=\"models/embedding-001\", persist_dir=\"./my_db\"):\n", + " # Load text data\n", + " text = load_text_data(filename)\n", + "\n", + " # Create documents\n", + " doc = Document(page_content=text, metadata={\"source\": \"local\"})\n", + "\n", + "\n", + " # Split documents\n", + " text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)\n", + " splits = text_splitter.split_documents(docs)\n", + "\n", + " # Create vector store with Gemini embeddings\n", + " my_db = Chroma.from_documents(\n", + " documents=splits,\n", + " collection_name=\"my_data\",\n", + " embedding=GoogleGenerativeAIEmbeddings(model=model_name),\n", + " # persist_directory=persist_dir\n", + " )\n", + "\n", + " # Return the vector store (retriever) for searching\n", + " return my_db.as_retriever()\n", + "\n", + "# Function to perform retrieval\n", + "def search_db(retriever, query):\n", + " # Perform retrieval based on the query\n", + " results = retriever.get_relevant_documents(query)\n", + "\n", + " # Process and return results (replace with your desired processing)\n", + " processed_results = [doc.page_content for doc in results]\n", + " return processed_results\n", + "\n", + "# Example usage\n", + "filename = \"text.txt\" # Replace with your actual filename\n", + "retriever = process_and_store(filename)\n", + "\n", + "# Example search query\n", + "query = \"delete comment\"\n", + "search_results = search_db(retriever, query)\n", + "\n", + "# Print the retrieved results (modify as needed)\n", + "print(\"Search Results:\")\n", + "for result in search_results:\n", + " print(result)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "510ed1f1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Document(page_content='pytest\\ndataclasses', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='pytest\\ndataclasses', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='pytest\\ndataclasses', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='pytest\\ndataclasses', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'})]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "retriever.get_relevant_documents(\"deleting comments\")" ] }, { @@ -159,12 +331,12 @@ "source": [ "## Part 2: Indexing\n", "\n", - "![Screenshot 2024-02-12 at 1.36.56 PM.png](attachment:d1c0f19e-1f5f-4fc6-a860-16337c1910fa.png)" + "![Screenshot 2024-02-12 at 1.36.56 PM.png](attachment:d1c0f19e-1f5f-4fc6-a860-16337c1910fa.png)\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "id": "edd7beeb-21fa-4f4b-b8fa-5a4f70489a16", "metadata": {}, "outputs": [], @@ -179,15 +351,26 @@ "id": "e0552ea4-935d-4dfa-bd2b-56d148e96304", "metadata": {}, "source": [ - "[Count tokens](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb) considering [~4 char / token](https://help.openai.com/en/articles/4936856-what-are-tokens-and-how-to-count-them)" + "[Count tokens](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb) considering [~4 char / token](https://help.openai.com/en/articles/4936856-what-are-tokens-and-how-to-count-them)\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "id": "df119cca-1676-4caa-bad4-11805d69e616", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "8" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import tiktoken\n", "\n", @@ -205,18 +388,37 @@ "id": "4f04fd74-829f-472c-a1bc-ec6521a0529f", "metadata": {}, "source": [ - "[Text embedding models](https://python.langchain.com/docs/integrations/text_embedding/openai)" + "[Text embedding models](https://python.langchain.com/docs/integrations/text_embedding/openai)\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "id": "6bd98786-755d-4d49-ba97-30c5a623b74e", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "768" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "from langchain_openai import OpenAIEmbeddings\n", - "embd = OpenAIEmbeddings()\n", + "# from langchain_openai import OpenAIEmbeddings\n", + "# embd = OpenAIEmbeddings()\n", + "from langchain_google_genai import GoogleGenerativeAIEmbeddings\n", + "\n", + "# If there is no environment variable set for the API key, you can pass the API\n", + "# key to the parameter `google_api_key` of the `GoogleGenerativeAIEmbeddings`\n", + "# function: `google_api_key = \"key\"`.\n", + "\n", + "embd = GoogleGenerativeAIEmbeddings(model=\"models/embedding-001\")\n", + "\n", "query_result = embd.embed_query(question)\n", "document_result = embd.embed_query(document)\n", "len(query_result)" @@ -227,15 +429,23 @@ "id": "f5e0e35f-6861-4c5e-9301-04fd5408f8f8", "metadata": {}, "source": [ - "[Cosine similarity](https://platform.openai.com/docs/guides/embeddings/frequently-asked-questions) is reccomended (1 indicates identical) for OpenAI embeddings." + "[Cosine similarity](https://platform.openai.com/docs/guides/embeddings/frequently-asked-questions) is reccomended (1 indicates identical) for OpenAI embeddings.\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "id": "b8001998-b08c-4560-b124-bfa1fced8958", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cosine Similarity: 0.8932740049570456\n" + ] + } + ], "source": [ "import numpy as np\n", "\n", @@ -254,12 +464,12 @@ "id": "8aea73bc-98e3-4fdc-ba72-d190736bed20", "metadata": {}, "source": [ - "[Document Loaders](https://python.langchain.com/docs/integrations/document_loaders/)" + "[Document Loaders](https://python.langchain.com/docs/integrations/document_loaders/)\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "id": "5778c31a-6138-4130-8865-31a08e82b9fb", "metadata": {}, "outputs": [], @@ -280,6 +490,27 @@ "blog_docs = loader.load()" ] }, + { + "cell_type": "code", + "execution_count": 23, + "id": "d862c6f8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Document(page_content='\\n\\n LLM Powered Autonomous Agents\\n \\nDate: June 23, 2023 | Estimated Reading Time: 31 min | Author: Lilian Weng\\n\\n\\nBuilding agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\\nAgent System Overview#\\nIn a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:\\n\\nPlanning\\n\\nSubgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\\nReflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refine them for future steps, thereby improving the quality of final results.\\n\\n\\nMemory\\n\\nShort-term memory: I would consider all the in-context learning (See Prompt Engineering) as utilizing short-term memory of the model to learn.\\nLong-term memory: This provides the agent with the capability to retain and recall (infinite) information over extended periods, often by leveraging an external vector store and fast retrieval.\\n\\n\\nTool use\\n\\nThe agent learns to call external APIs for extra information that is missing from the model weights (often hard to change after pre-training), including current information, code execution capability, access to proprietary information sources and more.\\n\\n\\n\\n\\nFig. 1. Overview of a LLM-powered autonomous agent system.\\nComponent One: Planning#\\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\\nTask Decomposition#\\nChain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.\\nTree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.\\nTask decomposition can be done (1) by LLM with simple prompting like \"Steps for XYZ.\\\\n1.\", \"What are the subgoals for achieving XYZ?\", (2) by using task-specific instructions; e.g. \"Write a story outline.\" for writing a novel, or (3) with human inputs.\\nAnother quite distinct approach, LLM+P (Liu et al. 2023), involves relying on an external classical planner to do long-horizon planning. This approach utilizes the Planning Domain Definition Language (PDDL) as an intermediate interface to describe the planning problem. In this process, LLM (1) translates the problem into “Problem PDDL”, then (2) requests a classical planner to generate a PDDL plan based on an existing “Domain PDDL”, and finally (3) translates the PDDL plan back into natural language. Essentially, the planning step is outsourced to an external tool, assuming the availability of domain-specific PDDL and a suitable planner which is common in certain robotic setups but not in many other domains.\\nSelf-Reflection#\\nSelf-reflection is a vital aspect that allows autonomous agents to improve iteratively by refining past action decisions and correcting previous mistakes. It plays a crucial role in real-world tasks where trial and error are inevitable.\\nReAct (Yao et al. 2023) integrates reasoning and acting within LLM by extending the action space to be a combination of task-specific discrete actions and the language space. The former enables LLM to interact with the environment (e.g. use Wikipedia search API), while the latter prompting LLM to generate reasoning traces in natural language.\\nThe ReAct prompt template incorporates explicit steps for LLM to think, roughly formatted as:\\nThought: ...\\nAction: ...\\nObservation: ...\\n... (Repeated many times)\\n\\nFig. 2. Examples of reasoning trajectories for knowledge-intensive tasks (e.g. HotpotQA, FEVER) and decision-making tasks (e.g. AlfWorld Env, WebShop). (Image source: Yao et al. 2023).\\nIn both experiments on knowledge-intensive tasks and decision-making tasks, ReAct works better than the Act-only baseline where Thought: … step is removed.\\nReflexion (Shinn & Labash 2023) is a framework to equips agents with dynamic memory and self-reflection capabilities to improve reasoning skills. Reflexion has a standard RL setup, in which the reward model provides a simple binary reward and the action space follows the setup in ReAct where the task-specific action space is augmented with language to enable complex reasoning steps. After each action $a_t$, the agent computes a heuristic $h_t$ and optionally may decide to reset the environment to start a new trial depending on the self-reflection results.\\n\\nFig. 3. Illustration of the Reflexion framework. (Image source: Shinn & Labash, 2023)\\nThe heuristic function determines when the trajectory is inefficient or contains hallucination and should be stopped. Inefficient planning refers to trajectories that take too long without success. Hallucination is defined as encountering a sequence of consecutive identical actions that lead to the same observation in the environment.\\nSelf-reflection is created by showing two-shot examples to LLM and each example is a pair of (failed trajectory, ideal reflection for guiding future changes in the plan). Then reflections are added into the agent’s working memory, up to three, to be used as context for querying LLM.\\n\\nFig. 4. Experiments on AlfWorld Env and HotpotQA. Hallucination is a more common failure than inefficient planning in AlfWorld. (Image source: Shinn & Labash, 2023)\\nChain of Hindsight (CoH; Liu et al. 2023) encourages the model to improve on its own outputs by explicitly presenting it with a sequence of past outputs, each annotated with feedback. Human feedback data is a collection of $D_h = \\\\{(x, y_i , r_i , z_i)\\\\}_{i=1}^n$, where $x$ is the prompt, each $y_i$ is a model completion, $r_i$ is the human rating of $y_i$, and $z_i$ is the corresponding human-provided hindsight feedback. Assume the feedback tuples are ranked by reward, $r_n \\\\geq r_{n-1} \\\\geq \\\\dots \\\\geq r_1$ The process is supervised fine-tuning where the data is a sequence in the form of $\\\\tau_h = (x, z_i, y_i, z_j, y_j, \\\\dots, z_n, y_n)$, where $\\\\leq i \\\\leq j \\\\leq n$. The model is finetuned to only predict $y_n$ where conditioned on the sequence prefix, such that the model can self-reflect to produce better output based on the feedback sequence. The model can optionally receive multiple rounds of instructions with human annotators at test time.\\nTo avoid overfitting, CoH adds a regularization term to maximize the log-likelihood of the pre-training dataset. To avoid shortcutting and copying (because there are many common words in feedback sequences), they randomly mask 0% - 5% of past tokens during training.\\nThe training dataset in their experiments is a combination of WebGPT comparisons, summarization from human feedback and human preference dataset.\\n\\nFig. 5. After fine-tuning with CoH, the model can follow instructions to produce outputs with incremental improvement in a sequence. (Image source: Liu et al. 2023)\\nThe idea of CoH is to present a history of sequentially improved outputs in context and train the model to take on the trend to produce better outputs. Algorithm Distillation (AD; Laskin et al. 2023) applies the same idea to cross-episode trajectories in reinforcement learning tasks, where an algorithm is encapsulated in a long history-conditioned policy. Considering that an agent interacts with the environment many times and in each episode the agent gets a little better, AD concatenates this learning history and feeds that into the model. Hence we should expect the next predicted action to lead to better performance than previous trials. The goal is to learn the process of RL instead of training a task-specific policy itself.\\n\\nFig. 6. Illustration of how Algorithm Distillation (AD) works. (Image source: Laskin et al. 2023).\\nThe paper hypothesizes that any algorithm that generates a set of learning histories can be distilled into a neural network by performing behavioral cloning over actions. The history data is generated by a set of source policies, each trained for a specific task. At the training stage, during each RL run, a random task is sampled and a subsequence of multi-episode history is used for training, such that the learned policy is task-agnostic.\\nIn reality, the model has limited context window length, so episodes should be short enough to construct multi-episode history. Multi-episodic contexts of 2-4 episodes are necessary to learn a near-optimal in-context RL algorithm. The emergence of in-context RL requires long enough context.\\nIn comparison with three baselines, including ED (expert distillation, behavior cloning with expert trajectories instead of learning history), source policy (used for generating trajectories for distillation by UCB), RL^2 (Duan et al. 2017; used as upper bound since it needs online RL), AD demonstrates in-context RL with performance getting close to RL^2 despite only using offline RL and learns much faster than other baselines. When conditioned on partial training history of the source policy, AD also improves much faster than ED baseline.\\n\\nFig. 7. Comparison of AD, ED, source policy and RL^2 on environments that require memory and exploration. Only binary reward is assigned. The source policies are trained with A3C for \"dark\" environments and DQN for watermaze.(Image source: Laskin et al. 2023)\\nComponent Two: Memory#\\n(Big thank you to ChatGPT for helping me draft this section. I’ve learned a lot about the human brain and data structure for fast MIPS in my conversations with ChatGPT.)\\nTypes of Memory#\\nMemory can be defined as the processes used to acquire, store, retain, and later retrieve information. There are several types of memory in human brains.\\n\\n\\nSensory Memory: This is the earliest stage of memory, providing the ability to retain impressions of sensory information (visual, auditory, etc) after the original stimuli have ended. Sensory memory typically only lasts for up to a few seconds. Subcategories include iconic memory (visual), echoic memory (auditory), and haptic memory (touch).\\n\\n\\nShort-Term Memory (STM) or Working Memory: It stores information that we are currently aware of and needed to carry out complex cognitive tasks such as learning and reasoning. Short-term memory is believed to have the capacity of about 7 items (Miller 1956) and lasts for 20-30 seconds.\\n\\n\\nLong-Term Memory (LTM): Long-term memory can store information for a remarkably long time, ranging from a few days to decades, with an essentially unlimited storage capacity. There are two subtypes of LTM:\\n\\nExplicit / declarative memory: This is memory of facts and events, and refers to those memories that can be consciously recalled, including episodic memory (events and experiences) and semantic memory (facts and concepts).\\nImplicit / procedural memory: This type of memory is unconscious and involves skills and routines that are performed automatically, like riding a bike or typing on a keyboard.\\n\\n\\n\\n\\nFig. 8. Categorization of human memory.\\nWe can roughly consider the following mappings:\\n\\nSensory memory as learning embedding representations for raw inputs, including text, image or other modalities;\\nShort-term memory as in-context learning. It is short and finite, as it is restricted by the finite context window length of Transformer.\\nLong-term memory as the external vector store that the agent can attend to at query time, accessible via fast retrieval.\\n\\nMaximum Inner Product Search (MIPS)#\\nThe external memory can alleviate the restriction of finite attention span. A standard practice is to save the embedding representation of information into a vector store database that can support fast maximum inner-product search (MIPS). To optimize the retrieval speed, the common choice is the approximate nearest neighbors (ANN)\\u200b algorithm to return approximately top k nearest neighbors to trade off a little accuracy lost for a huge speedup.\\nA couple common choices of ANN algorithms for fast MIPS:\\n\\nLSH (Locality-Sensitive Hashing): It introduces a hashing function such that similar input items are mapped to the same buckets with high probability, where the number of buckets is much smaller than the number of inputs.\\nANNOY (Approximate Nearest Neighbors Oh Yeah): The core data structure are random projection trees, a set of binary trees where each non-leaf node represents a hyperplane splitting the input space into half and each leaf stores one data point. Trees are built independently and at random, so to some extent, it mimics a hashing function. ANNOY search happens in all the trees to iteratively search through the half that is closest to the query and then aggregates the results. The idea is quite related to KD tree but a lot more scalable.\\nHNSW (Hierarchical Navigable Small World): It is inspired by the idea of small world networks where most nodes can be reached by any other nodes within a small number of steps; e.g. “six degrees of separation” feature of social networks. HNSW builds hierarchical layers of these small-world graphs, where the bottom layers contain the actual data points. The layers in the middle create shortcuts to speed up search. When performing a search, HNSW starts from a random node in the top layer and navigates towards the target. When it can’t get any closer, it moves down to the next layer, until it reaches the bottom layer. Each move in the upper layers can potentially cover a large distance in the data space, and each move in the lower layers refines the search quality.\\nFAISS (Facebook AI Similarity Search): It operates on the assumption that in high dimensional space, distances between nodes follow a Gaussian distribution and thus there should exist clustering of data points. FAISS applies vector quantization by partitioning the vector space into clusters and then refining the quantization within clusters. Search first looks for cluster candidates with coarse quantization and then further looks into each cluster with finer quantization.\\nScaNN (Scalable Nearest Neighbors): The main innovation in ScaNN is anisotropic vector quantization. It quantizes a data point $x_i$ to $\\\\tilde{x}_i$ such that the inner product $\\\\langle q, x_i \\\\rangle$ is as similar to the original distance of $\\\\angle q, \\\\tilde{x}_i$ as possible, instead of picking the closet quantization centroid points.\\n\\n\\nFig. 9. Comparison of MIPS algorithms, measured in recall@10. (Image source: Google Blog, 2020)\\nCheck more MIPS algorithms and performance comparison in ann-benchmarks.com.\\nComponent Three: Tool Use#\\nTool use is a remarkable and distinguishing characteristic of human beings. We create, modify and utilize external objects to do things that go beyond our physical and cognitive limits. Equipping LLMs with external tools can significantly extend the model capabilities.\\n\\nFig. 10. A picture of a sea otter using rock to crack open a seashell, while floating in the water. While some other animals can use tools, the complexity is not comparable with humans. (Image source: Animals using tools)\\nMRKL (Karpas et al. 2022), short for “Modular Reasoning, Knowledge and Language”, is a neuro-symbolic architecture for autonomous agents. A MRKL system is proposed to contain a collection of “expert” modules and the general-purpose LLM works as a router to route inquiries to the best suitable expert module. These modules can be neural (e.g. deep learning models) or symbolic (e.g. math calculator, currency converter, weather API).\\nThey did an experiment on fine-tuning LLM to call a calculator, using arithmetic as a test case. Their experiments showed that it was harder to solve verbal math problems than explicitly stated math problems because LLMs (7B Jurassic1-large model) failed to extract the right arguments for the basic arithmetic reliably. The results highlight when the external symbolic tools can work reliably, knowing when to and how to use the tools are crucial, determined by the LLM capability.\\nBoth TALM (Tool Augmented Language Models; Parisi et al. 2022) and Toolformer (Schick et al. 2023) fine-tune a LM to learn to use external tool APIs. The dataset is expanded based on whether a newly added API call annotation can improve the quality of model outputs. See more details in the “External APIs” section of Prompt Engineering.\\nChatGPT Plugins and OpenAI API function calling are good examples of LLMs augmented with tool use capability working in practice. The collection of tool APIs can be provided by other developers (as in Plugins) or self-defined (as in function calls).\\nHuggingGPT (Shen et al. 2023) is a framework to use ChatGPT as the task planner to select models available in HuggingFace platform according to the model descriptions and summarize the response based on the execution results.\\n\\nFig. 11. Illustration of how HuggingGPT works. (Image source: Shen et al. 2023)\\nThe system comprises of 4 stages:\\n(1) Task planning: LLM works as the brain and parses the user requests into multiple tasks. There are four attributes associated with each task: task type, ID, dependencies, and arguments. They use few-shot examples to guide LLM to do task parsing and planning.\\nInstruction:\\n\\nThe AI assistant can parse user input to several tasks: [{\"task\": task, \"id\", task_id, \"dep\": dependency_task_ids, \"args\": {\"text\": text, \"image\": URL, \"audio\": URL, \"video\": URL}}]. The \"dep\" field denotes the id of the previous task which generates a new resource that the current task relies on. A special tag \"-task_id\" refers to the generated text image, audio and video in the dependency task with id as task_id. The task MUST be selected from the following options: {{ Available Task List }}. There is a logical relationship between tasks, please note their order. If the user input can\\'t be parsed, you need to reply empty JSON. Here are several cases for your reference: {{ Demonstrations }}. The chat history is recorded as {{ Chat History }}. From this chat history, you can find the path of the user-mentioned resources for your task planning.\\n\\n(2) Model selection: LLM distributes the tasks to expert models, where the request is framed as a multiple-choice question. LLM is presented with a list of models to choose from. Due to the limited context length, task type based filtration is needed.\\nInstruction:\\n\\nGiven the user request and the call command, the AI assistant helps the user to select a suitable model from a list of models to process the user request. The AI assistant merely outputs the model id of the most appropriate model. The output must be in a strict JSON format: \"id\": \"id\", \"reason\": \"your detail reason for the choice\". We have a list of models for you to choose from {{ Candidate Models }}. Please select one model from the list.\\n\\n(3) Task execution: Expert models execute on the specific tasks and log results.\\nInstruction:\\n\\nWith the input and the inference results, the AI assistant needs to describe the process and results. The previous stages can be formed as - User Input: {{ User Input }}, Task Planning: {{ Tasks }}, Model Selection: {{ Model Assignment }}, Task Execution: {{ Predictions }}. You must first answer the user\\'s request in a straightforward manner. Then describe the task process and show your analysis and model inference results to the user in the first person. If inference results contain a file path, must tell the user the complete file path.\\n\\n(4) Response generation: LLM receives the execution results and provides summarized results to users.\\nTo put HuggingGPT into real world usage, a couple challenges need to solve: (1) Efficiency improvement is needed as both LLM inference rounds and interactions with other models slow down the process; (2) It relies on a long context window to communicate over complicated task content; (3) Stability improvement of LLM outputs and external model services.\\nAPI-Bank (Li et al. 2023) is a benchmark for evaluating the performance of tool-augmented LLMs. It contains 53 commonly used API tools, a complete tool-augmented LLM workflow, and 264 annotated dialogues that involve 568 API calls. The selection of APIs is quite diverse, including search engines, calculator, calendar queries, smart home control, schedule management, health data management, account authentication workflow and more. Because there are a large number of APIs, LLM first has access to API search engine to find the right API to call and then uses the corresponding documentation to make a call.\\n\\nFig. 12. Pseudo code of how LLM makes an API call in API-Bank. (Image source: Li et al. 2023)\\nIn the API-Bank workflow, LLMs need to make a couple of decisions and at each step we can evaluate how accurate that decision is. Decisions include:\\n\\nWhether an API call is needed.\\nIdentify the right API to call: if not good enough, LLMs need to iteratively modify the API inputs (e.g. deciding search keywords for Search Engine API).\\nResponse based on the API results: the model can choose to refine and call again if results are not satisfied.\\n\\nThis benchmark evaluates the agent’s tool use capabilities at three levels:\\n\\nLevel-1 evaluates the ability to call the API. Given an API’s description, the model needs to determine whether to call a given API, call it correctly, and respond properly to API returns.\\nLevel-2 examines the ability to retrieve the API. The model needs to search for possible APIs that may solve the user’s requirement and learn how to use them by reading documentation.\\nLevel-3 assesses the ability to plan API beyond retrieve and call. Given unclear user requests (e.g. schedule group meetings, book flight/hotel/restaurant for a trip), the model may have to conduct multiple API calls to solve it.\\n\\nCase Studies#\\nScientific Discovery Agent#\\nChemCrow (Bran et al. 2023) is a domain-specific example in which LLM is augmented with 13 expert-designed tools to accomplish tasks across organic synthesis, drug discovery, and materials design. The workflow, implemented in LangChain, reflects what was previously described in the ReAct and MRKLs and combines CoT reasoning with tools relevant to the tasks:\\n\\nThe LLM is provided with a list of tool names, descriptions of their utility, and details about the expected input/output.\\nIt is then instructed to answer a user-given prompt using the tools provided when necessary. The instruction suggests the model to follow the ReAct format - Thought, Action, Action Input, Observation.\\n\\nOne interesting observation is that while the LLM-based evaluation concluded that GPT-4 and ChemCrow perform nearly equivalently, human evaluations with experts oriented towards the completion and chemical correctness of the solutions showed that ChemCrow outperforms GPT-4 by a large margin. This indicates a potential problem with using LLM to evaluate its own performance on domains that requires deep expertise. The lack of expertise may cause LLMs not knowing its flaws and thus cannot well judge the correctness of task results.\\nBoiko et al. (2023) also looked into LLM-empowered agents for scientific discovery, to handle autonomous design, planning, and performance of complex scientific experiments. This agent can use tools to browse the Internet, read documentation, execute code, call robotics experimentation APIs and leverage other LLMs.\\nFor example, when requested to \"develop a novel anticancer drug\", the model came up with the following reasoning steps:\\n\\ninquired about current trends in anticancer drug discovery;\\nselected a target;\\nrequested a scaffold targeting these compounds;\\nOnce the compound was identified, the model attempted its synthesis.\\n\\nThey also discussed the risks, especially with illicit drugs and bioweapons. They developed a test set containing a list of known chemical weapon agents and asked the agent to synthesize them. 4 out of 11 requests (36%) were accepted to obtain a synthesis solution and the agent attempted to consult documentation to execute the procedure. 7 out of 11 were rejected and among these 7 rejected cases, 5 happened after a Web search while 2 were rejected based on prompt only.\\nGenerative Agents Simulation#\\nGenerative Agents (Park, et al. 2023) is super fun experiment where 25 virtual characters, each controlled by a LLM-powered agent, are living and interacting in a sandbox environment, inspired by The Sims. Generative agents create believable simulacra of human behavior for interactive applications.\\nThe design of generative agents combines LLM with memory, planning and reflection mechanisms to enable agents to behave conditioned on past experience, as well as to interact with other agents.\\n\\nMemory stream: is a long-term memory module (external database) that records a comprehensive list of agents’ experience in natural language.\\n\\nEach element is an observation, an event directly provided by the agent.\\n- Inter-agent communication can trigger new natural language statements.\\n\\n\\nRetrieval model: surfaces the context to inform the agent’s behavior, according to relevance, recency and importance.\\n\\nRecency: recent events have higher scores\\nImportance: distinguish mundane from core memories. Ask LM directly.\\nRelevance: based on how related it is to the current situation / query.\\n\\n\\nReflection mechanism: synthesizes memories into higher level inferences over time and guides the agent’s future behavior. They are higher-level summaries of past events (<- note that this is a bit different from self-reflection above)\\n\\nPrompt LM with 100 most recent observations and to generate 3 most salient high-level questions given a set of observations/statements. Then ask LM to answer those questions.\\n\\n\\nPlanning & Reacting: translate the reflections and the environment information into actions\\n\\nPlanning is essentially in order to optimize believability at the moment vs in time.\\nPrompt template: {Intro of an agent X}. Here is X\\'s plan today in broad strokes: 1)\\nRelationships between agents and observations of one agent by another are all taken into consideration for planning and reacting.\\nEnvironment information is present in a tree structure.\\n\\n\\n\\n\\nFig. 13. The generative agent architecture. (Image source: Park et al. 2023)\\nThis fun simulation results in emergent social behavior, such as information diffusion, relationship memory (e.g. two agents continuing the conversation topic) and coordination of social events (e.g. host a party and invite many others).\\nProof-of-Concept Examples#\\nAutoGPT has drawn a lot of attention into the possibility of setting up autonomous agents with LLM as the main controller. It has quite a lot of reliability issues given the natural language interface, but nevertheless a cool proof-of-concept demo. A lot of code in AutoGPT is about format parsing.\\nHere is the system message used by AutoGPT, where {{...}} are user inputs:\\nYou are {{ai-name}}, {{user-provided AI bot description}}.\\nYour decisions must always be made independently without seeking user assistance. Play to your strengths as an LLM and pursue simple strategies with no legal complications.\\n\\nGOALS:\\n\\n1. {{user-provided goal 1}}\\n2. {{user-provided goal 2}}\\n3. ...\\n4. ...\\n5. ...\\n\\nConstraints:\\n1. ~4000 word limit for short term memory. Your short term memory is short, so immediately save important information to files.\\n2. If you are unsure how you previously did something or want to recall past events, thinking about similar events will help you remember.\\n3. No user assistance\\n4. Exclusively use the commands listed in double quotes e.g. \"command name\"\\n5. Use subprocesses for commands that will not terminate within a few minutes\\n\\nCommands:\\n1. Google Search: \"google\", args: \"input\": \"\"\\n2. Browse Website: \"browse_website\", args: \"url\": \"\", \"question\": \"\"\\n3. Start GPT Agent: \"start_agent\", args: \"name\": \"\", \"task\": \"\", \"prompt\": \"\"\\n4. Message GPT Agent: \"message_agent\", args: \"key\": \"\", \"message\": \"\"\\n5. List GPT Agents: \"list_agents\", args:\\n6. Delete GPT Agent: \"delete_agent\", args: \"key\": \"\"\\n7. Clone Repository: \"clone_repository\", args: \"repository_url\": \"\", \"clone_path\": \"\"\\n8. Write to file: \"write_to_file\", args: \"file\": \"\", \"text\": \"\"\\n9. Read file: \"read_file\", args: \"file\": \"\"\\n10. Append to file: \"append_to_file\", args: \"file\": \"\", \"text\": \"\"\\n11. Delete file: \"delete_file\", args: \"file\": \"\"\\n12. Search Files: \"search_files\", args: \"directory\": \"\"\\n13. Analyze Code: \"analyze_code\", args: \"code\": \"\"\\n14. Get Improved Code: \"improve_code\", args: \"suggestions\": \"\", \"code\": \"\"\\n15. Write Tests: \"write_tests\", args: \"code\": \"\", \"focus\": \"\"\\n16. Execute Python File: \"execute_python_file\", args: \"file\": \"\"\\n17. Generate Image: \"generate_image\", args: \"prompt\": \"\"\\n18. Send Tweet: \"send_tweet\", args: \"text\": \"\"\\n19. Do Nothing: \"do_nothing\", args:\\n20. Task Complete (Shutdown): \"task_complete\", args: \"reason\": \"\"\\n\\nResources:\\n1. Internet access for searches and information gathering.\\n2. Long Term memory management.\\n3. GPT-3.5 powered Agents for delegation of simple tasks.\\n4. File output.\\n\\nPerformance Evaluation:\\n1. Continuously review and analyze your actions to ensure you are performing to the best of your abilities.\\n2. Constructively self-criticize your big-picture behavior constantly.\\n3. Reflect on past decisions and strategies to refine your approach.\\n4. Every command has a cost, so be smart and efficient. Aim to complete tasks in the least number of steps.\\n\\nYou should only respond in JSON format as described below\\nResponse Format:\\n{\\n \"thoughts\": {\\n \"text\": \"thought\",\\n \"reasoning\": \"reasoning\",\\n \"plan\": \"- short bulleted\\\\n- list that conveys\\\\n- long-term plan\",\\n \"criticism\": \"constructive self-criticism\",\\n \"speak\": \"thoughts summary to say to user\"\\n },\\n \"command\": {\\n \"name\": \"command name\",\\n \"args\": {\\n \"arg name\": \"value\"\\n }\\n }\\n}\\nEnsure the response can be parsed by Python json.loads\\nGPT-Engineer is another project to create a whole repository of code given a task specified in natural language. The GPT-Engineer is instructed to think over a list of smaller components to build and ask for user input to clarify questions as needed.\\nHere are a sample conversation for task clarification sent to OpenAI ChatCompletion endpoint used by GPT-Engineer. The user inputs are wrapped in {{user input text}}.\\n[\\n {\\n \"role\": \"system\",\\n \"content\": \"You will read instructions and not carry them out, only seek to clarify them.\\\\nSpecifically you will first summarise a list of super short bullets of areas that need clarification.\\\\nThen you will pick one clarifying question, and wait for an answer from the user.\\\\n\"\\n },\\n {\\n \"role\": \"user\",\\n \"content\": \"We are writing {{a Super Mario game in python. MVC components split in separate files. Keyboard control.}}\\\\n\"\\n },\\n {\\n \"role\": \"assistant\",\\n \"content\": \"Summary of areas that need clarification:\\\\n1. Specifics of the Super Mario game (e.g. level design, characters, gameplay mechanics)\\\\n2. Details about the MVC components (e.g. which components are in each file)\\\\n3. Keyboard control implementation (e.g. which keys to use, how to handle input)\\\\n\\\\nClarifying question:\\\\nCan you provide more details about the Super Mario game, such as level design, characters, and gameplay mechanics?\"\\n },\\n {\\n \"role\": \"user\",\\n \"content\": \"{{There are 10 levels in total. The main character is a plumber named Mario, who can walk and jump. It is a classical platform game just like Super Mario. The main character moves from left to right, trying to get to the destination, where there are many obstacles and attacks from enemies in the process.}}\\\\n\\\\nIs anything else unclear? If yes, only answer in the form:\\\\n{remaining unclear areas} remaining questions.\\\\n{Next question}\\\\nIf everything is sufficiently clear, only answer \\\\\"Nothing more to clarify.\\\\\".\"\\n },\\n {\\n \"role\": \"assistant\",\\n \"content\": \"Remaining unclear areas: 2 remaining questions.\\\\nCan you provide more information about how the MVC components are split into separate files?\"\\n },\\n {\\n \"role\": \"user\",\\n \"content\": \"{{Make your own assumptions and state them explicitly before starting}}\"\\n }\\n]\\nThen after these clarification, the agent moved into the code writing mode with a different system message.\\nSystem message:\\n\\nYou will get instructions for code to write.\\nYou will write a very long answer. Make sure that every detail of the architecture is, in the end, implemented as code.\\nMake sure that every detail of the architecture is, in the end, implemented as code.\\nThink step by step and reason yourself to the right decisions to make sure we get it right.\\nYou will first lay out the names of the core classes, functions, methods that will be necessary, as well as a quick comment on their purpose.\\nThen you will output the content of each file including ALL code.\\nEach file must strictly follow a markdown code block format, where the following tokens must be replaced such that\\nFILENAME is the lowercase file name including the file extension,\\nLANG is the markup code block language for the code’s language, and CODE is the code:\\nFILENAME\\nCODE\\nYou will start with the “entrypoint” file, then go to the ones that are imported by that file, and so on.\\nPlease note that the code should be fully functional. No placeholders.\\nFollow a language and framework appropriate best practice file naming convention.\\nMake sure that files contain all imports, types etc. Make sure that code in different files are compatible with each other.\\nEnsure to implement all code, if you are unsure, write a plausible implementation.\\nInclude module dependency or package manager dependency definition file.\\nBefore you finish, double check that all parts of the architecture is present in the files.\\nUseful to know:\\nYou almost always put different classes in different files.\\nFor Python, you always create an appropriate requirements.txt file.\\nFor NodeJS, you always create an appropriate package.json file.\\nYou always add a comment briefly describing the purpose of the function definition.\\nYou try to add comments explaining very complex bits of logic.\\nYou always follow the best practices for the requested languages in terms of describing the code written as a defined\\npackage/project.\\nPython toolbelt preferences:\\n\\npytest\\ndataclasses\\n\\n\\nConversatin samples:\\n[\\n {\\n \"role\": \"system\",\\n \"content\": \"You will get instructions for code to write.\\\\nYou will write a very long answer. Make sure that every detail of the architecture is, in the end, implemented as code.\\\\nMake sure that every detail of the architecture is, in the end, implemented as code.\\\\n\\\\nThink step by step and reason yourself to the right decisions to make sure we get it right.\\\\nYou will first lay out the names of the core classes, functions, methods that will be necessary, as well as a quick comment on their purpose.\\\\n\\\\nThen you will output the content of each file including ALL code.\\\\nEach file must strictly follow a markdown code block format, where the following tokens must be replaced such that\\\\nFILENAME is the lowercase file name including the file extension,\\\\nLANG is the markup code block language for the code\\'s language, and CODE is the code:\\\\n\\\\nFILENAME\\\\n```LANG\\\\nCODE\\\\n```\\\\n\\\\nYou will start with the \\\\\"entrypoint\\\\\" file, then go to the ones that are imported by that file, and so on.\\\\nPlease note that the code should be fully functional. No placeholders.\\\\n\\\\nFollow a language and framework appropriate best practice file naming convention.\\\\nMake sure that files contain all imports, types etc. Make sure that code in different files are compatible with each other.\\\\nEnsure to implement all code, if you are unsure, write a plausible implementation.\\\\nInclude module dependency or package manager dependency definition file.\\\\nBefore you finish, double check that all parts of the architecture is present in the files.\\\\n\\\\nUseful to know:\\\\nYou almost always put different classes in different files.\\\\nFor Python, you always create an appropriate requirements.txt file.\\\\nFor NodeJS, you always create an appropriate package.json file.\\\\nYou always add a comment briefly describing the purpose of the function definition.\\\\nYou try to add comments explaining very complex bits of logic.\\\\nYou always follow the best practices for the requested languages in terms of describing the code written as a defined\\\\npackage/project.\\\\n\\\\n\\\\nPython toolbelt preferences:\\\\n- pytest\\\\n- dataclasses\\\\n\"\\n },\\n # … same conversation as earlier, ended with \"Make your own assumptions and state them explicitly before starting\".\\n {\\n \"role\": \"assistant\",\\n \"content\": \"Assumptions:\\\\n1. Model: The model will contain the game\\'s data, such as level information, character states, and enemy positions.\\\\n2. View: The view will handle the game\\'s visuals, including rendering the game objects, backgrounds, and updating the display.\\\\n3. Controller: The controller will manage user input, such as keyboard controls, and update the model accordingly.\\\\n\\\\nWith these assumptions in mind, is there any specific aspect of the keyboard control implementation you would like me to clarify?\"\\n },\\n {\\n \"role\": \"user\",\\n \"content\": \"Please now remember the steps:\\\\n\\\\nThink step by step and reason yourself to the right decisions to make sure we get it right.\\\\nFirst lay out the names of the core classes, functions, methods that will be necessary, As well as a quick comment on their purpose.\\\\n\\\\nThen you will output the content of each file including ALL code.\\\\nEach file must strictly follow a markdown code block format, where the following tokens must be replaced such that\\\\nFILENAME is the lowercase file name including the file extension,\\\\nLANG is the markup code block language for the code\\'s language, and CODE is the code:\\\\n\\\\nFILENAME\\\\n```LANG\\\\nCODE\\\\n```\\\\n\\\\nPlease note that the code should be fully functional. No placeholders.\\\\n\\\\nYou will start with the \\\\\"entrypoint\\\\\" file, then go to the ones that are imported by that file, and so on.\\\\nFollow a language and framework appropriate best practice file naming convention.\\\\nMake sure that files contain all imports, types etc. The code should be fully functional. Make sure that code in different files are compatible with each other.\\\\nBefore you finish, double check that all parts of the architecture is present in the files.\\\\n\"\\n }\\n]\\nChallenges#\\nAfter going through key ideas and demos of building LLM-centered agents, I start to see a couple common limitations:\\n\\n\\nFinite context length: The restricted context capacity limits the inclusion of historical information, detailed instructions, API call context, and responses. The design of the system has to work with this limited communication bandwidth, while mechanisms like self-reflection to learn from past mistakes would benefit a lot from long or infinite context windows. Although vector stores and retrieval can provide access to a larger knowledge pool, their representation power is not as powerful as full attention.\\n\\n\\nChallenges in long-term planning and task decomposition: Planning over a lengthy history and effectively exploring the solution space remain challenging. LLMs struggle to adjust plans when faced with unexpected errors, making them less robust compared to humans who learn from trial and error.\\n\\n\\nReliability of natural language interface: Current agent system relies on natural language as an interface between LLMs and external components such as memory and tools. However, the reliability of model outputs is questionable, as LLMs may make formatting errors and occasionally exhibit rebellious behavior (e.g. refuse to follow an instruction). Consequently, much of the agent demo code focuses on parsing model output.\\n\\n\\nCitation#\\nCited as:\\n\\nWeng, Lilian. (Jun 2023). “LLM-powered Autonomous Agents”. Lil’Log. https://lilianweng.github.io/posts/2023-06-23-agent/.\\n\\nOr\\n@article{weng2023agent,\\n title = \"LLM-powered Autonomous Agents\",\\n author = \"Weng, Lilian\",\\n journal = \"lilianweng.github.io\",\\n year = \"2023\",\\n month = \"Jun\",\\n url = \"https://lilianweng.github.io/posts/2023-06-23-agent/\"\\n}\\nReferences#\\n[1] Wei et al. “Chain of thought prompting elicits reasoning in large language models.” NeurIPS 2022\\n[2] Yao et al. “Tree of Thoughts: Dliberate Problem Solving with Large Language Models.” arXiv preprint arXiv:2305.10601 (2023).\\n[3] Liu et al. “Chain of Hindsight Aligns Language Models with Feedback\\n“ arXiv preprint arXiv:2302.02676 (2023).\\n[4] Liu et al. “LLM+P: Empowering Large Language Models with Optimal Planning Proficiency” arXiv preprint arXiv:2304.11477 (2023).\\n[5] Yao et al. “ReAct: Synergizing reasoning and acting in language models.” ICLR 2023.\\n[6] Google Blog. “Announcing ScaNN: Efficient Vector Similarity Search” July 28, 2020.\\n[7] https://chat.openai.com/share/46ff149e-a4c7-4dd7-a800-fc4a642ea389\\n[8] Shinn & Labash. “Reflexion: an autonomous agent with dynamic memory and self-reflection” arXiv preprint arXiv:2303.11366 (2023).\\n[9] Laskin et al. “In-context Reinforcement Learning with Algorithm Distillation” ICLR 2023.\\n[10] Karpas et al. “MRKL Systems A modular, neuro-symbolic architecture that combines large language models, external knowledge sources and discrete reasoning.” arXiv preprint arXiv:2205.00445 (2022).\\n[11] Nakano et al. “Webgpt: Browser-assisted question-answering with human feedback.” arXiv preprint arXiv:2112.09332 (2021).\\n[12] Parisi et al. “TALM: Tool Augmented Language Models”\\n[13] Schick et al. “Toolformer: Language Models Can Teach Themselves to Use Tools.” arXiv preprint arXiv:2302.04761 (2023).\\n[14] Weaviate Blog. Why is Vector Search so fast? Sep 13, 2022.\\n[15] Li et al. “API-Bank: A Benchmark for Tool-Augmented LLMs” arXiv preprint arXiv:2304.08244 (2023).\\n[16] Shen et al. “HuggingGPT: Solving AI Tasks with ChatGPT and its Friends in HuggingFace” arXiv preprint arXiv:2303.17580 (2023).\\n[17] Bran et al. “ChemCrow: Augmenting large-language models with chemistry tools.” arXiv preprint arXiv:2304.05376 (2023).\\n[18] Boiko et al. “Emergent autonomous scientific research capabilities of large language models.” arXiv preprint arXiv:2304.05332 (2023).\\n[19] Joon Sung Park, et al. “Generative Agents: Interactive Simulacra of Human Behavior.” arXiv preprint arXiv:2304.03442 (2023).\\n[20] AutoGPT. https://github.com/Significant-Gravitas/Auto-GPT\\n[21] GPT-Engineer. https://github.com/AntonOsika/gpt-engineer\\n', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'})]" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "blog_docs" + ] + }, { "cell_type": "markdown", "id": "798e731e-c6ff-46e3-a8bc-386832362af2", @@ -287,12 +518,12 @@ "source": [ "[Splitter](https://python.langchain.com/docs/modules/data_connection/document_transformers/recursive_text_splitter)\n", "\n", - "> This text splitter is the recommended one for generic text. It is parameterized by a list of characters. It tries to split on them in order until the chunks are small enough. The default list is [\"\\n\\n\", \"\\n\", \" \", \"\"]. This has the effect of trying to keep all paragraphs (and then sentences, and then words) together as long as possible, as those would generically seem to be the strongest semantically related pieces of text." + "> This text splitter is the recommended one for generic text. It is parameterized by a list of characters. It tries to split on them in order until the chunks are small enough. The default list is [\"\\n\\n\", \"\\n\", \" \", \"\"]. This has the effect of trying to keep all paragraphs (and then sentences, and then words) together as long as possible, as those would generically seem to be the strongest semantically related pieces of text.\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "id": "e668d339-3951-4662-8387-c3d296646906", "metadata": {}, "outputs": [], @@ -307,27 +538,112 @@ "splits = text_splitter.split_documents(blog_docs)" ] }, + { + "cell_type": "code", + "execution_count": 25, + "id": "a9fce761", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Document(page_content='LLM Powered Autonomous Agents\\n \\nDate: June 23, 2023 | Estimated Reading Time: 31 min | Author: Lilian Weng\\n\\n\\nBuilding agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\\nAgent System Overview#\\nIn a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:\\n\\nPlanning\\n\\nSubgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\\nReflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refine them for future steps, thereby improving the quality of final results.\\n\\n\\nMemory', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='Memory\\n\\nShort-term memory: I would consider all the in-context learning (See Prompt Engineering) as utilizing short-term memory of the model to learn.\\nLong-term memory: This provides the agent with the capability to retain and recall (infinite) information over extended periods, often by leveraging an external vector store and fast retrieval.\\n\\n\\nTool use\\n\\nThe agent learns to call external APIs for extra information that is missing from the model weights (often hard to change after pre-training), including current information, code execution capability, access to proprietary information sources and more.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='Fig. 1. Overview of a LLM-powered autonomous agent system.\\nComponent One: Planning#\\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\\nTask Decomposition#\\nChain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.\\nTree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.\\nTask decomposition can be done (1) by LLM with simple prompting like \"Steps for XYZ.\\\\n1.\", \"What are the subgoals for achieving XYZ?\", (2) by using task-specific instructions; e.g. \"Write a story outline.\" for writing a novel, or (3) with human inputs.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='Another quite distinct approach, LLM+P (Liu et al. 2023), involves relying on an external classical planner to do long-horizon planning. This approach utilizes the Planning Domain Definition Language (PDDL) as an intermediate interface to describe the planning problem. In this process, LLM (1) translates the problem into “Problem PDDL”, then (2) requests a classical planner to generate a PDDL plan based on an existing “Domain PDDL”, and finally (3) translates the PDDL plan back into natural language. Essentially, the planning step is outsourced to an external tool, assuming the availability of domain-specific PDDL and a suitable planner which is common in certain robotic setups but not in many other domains.\\nSelf-Reflection#\\nSelf-reflection is a vital aspect that allows autonomous agents to improve iteratively by refining past action decisions and correcting previous mistakes. It plays a crucial role in real-world tasks where trial and error are inevitable.\\nReAct (Yao et al. 2023) integrates reasoning and acting within LLM by extending the action space to be a combination of task-specific discrete actions and the language space. The former enables LLM to interact with the environment (e.g. use Wikipedia search API), while the latter prompting LLM to generate reasoning traces in natural language.\\nThe ReAct prompt template incorporates explicit steps for LLM to think, roughly formatted as:', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='The ReAct prompt template incorporates explicit steps for LLM to think, roughly formatted as:\\nThought: ...\\nAction: ...\\nObservation: ...\\n... (Repeated many times)', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='Fig. 2. Examples of reasoning trajectories for knowledge-intensive tasks (e.g. HotpotQA, FEVER) and decision-making tasks (e.g. AlfWorld Env, WebShop). (Image source: Yao et al. 2023).\\nIn both experiments on knowledge-intensive tasks and decision-making tasks, ReAct works better than the Act-only baseline where Thought: … step is removed.\\nReflexion (Shinn & Labash 2023) is a framework to equips agents with dynamic memory and self-reflection capabilities to improve reasoning skills. Reflexion has a standard RL setup, in which the reward model provides a simple binary reward and the action space follows the setup in ReAct where the task-specific action space is augmented with language to enable complex reasoning steps. After each action $a_t$, the agent computes a heuristic $h_t$ and optionally may decide to reset the environment to start a new trial depending on the self-reflection results.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='Fig. 3. Illustration of the Reflexion framework. (Image source: Shinn & Labash, 2023)\\nThe heuristic function determines when the trajectory is inefficient or contains hallucination and should be stopped. Inefficient planning refers to trajectories that take too long without success. Hallucination is defined as encountering a sequence of consecutive identical actions that lead to the same observation in the environment.\\nSelf-reflection is created by showing two-shot examples to LLM and each example is a pair of (failed trajectory, ideal reflection for guiding future changes in the plan). Then reflections are added into the agent’s working memory, up to three, to be used as context for querying LLM.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='Fig. 4. Experiments on AlfWorld Env and HotpotQA. Hallucination is a more common failure than inefficient planning in AlfWorld. (Image source: Shinn & Labash, 2023)', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='Chain of Hindsight (CoH; Liu et al. 2023) encourages the model to improve on its own outputs by explicitly presenting it with a sequence of past outputs, each annotated with feedback. Human feedback data is a collection of $D_h = \\\\{(x, y_i , r_i , z_i)\\\\}_{i=1}^n$, where $x$ is the prompt, each $y_i$ is a model completion, $r_i$ is the human rating of $y_i$, and $z_i$ is the corresponding human-provided hindsight feedback. Assume the feedback tuples are ranked by reward, $r_n \\\\geq r_{n-1} \\\\geq \\\\dots \\\\geq r_1$ The process is supervised fine-tuning where the data is a sequence in the form of $\\\\tau_h = (x, z_i, y_i, z_j, y_j, \\\\dots, z_n, y_n)$, where $\\\\leq i \\\\leq j \\\\leq n$. The model is finetuned to only predict $y_n$ where conditioned on the sequence prefix, such that the model can self-reflect to produce better output based on the feedback sequence. The model can optionally receive multiple rounds of instructions with human annotators at test time.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='To avoid overfitting, CoH adds a regularization term to maximize the log-likelihood of the pre-training dataset. To avoid shortcutting and copying (because there are many common words in feedback sequences), they randomly mask 0% - 5% of past tokens during training.\\nThe training dataset in their experiments is a combination of WebGPT comparisons, summarization from human feedback and human preference dataset.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='Fig. 5. After fine-tuning with CoH, the model can follow instructions to produce outputs with incremental improvement in a sequence. (Image source: Liu et al. 2023)\\nThe idea of CoH is to present a history of sequentially improved outputs in context and train the model to take on the trend to produce better outputs. Algorithm Distillation (AD; Laskin et al. 2023) applies the same idea to cross-episode trajectories in reinforcement learning tasks, where an algorithm is encapsulated in a long history-conditioned policy. Considering that an agent interacts with the environment many times and in each episode the agent gets a little better, AD concatenates this learning history and feeds that into the model. Hence we should expect the next predicted action to lead to better performance than previous trials. The goal is to learn the process of RL instead of training a task-specific policy itself.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='Fig. 6. Illustration of how Algorithm Distillation (AD) works. (Image source: Laskin et al. 2023).\\nThe paper hypothesizes that any algorithm that generates a set of learning histories can be distilled into a neural network by performing behavioral cloning over actions. The history data is generated by a set of source policies, each trained for a specific task. At the training stage, during each RL run, a random task is sampled and a subsequence of multi-episode history is used for training, such that the learned policy is task-agnostic.\\nIn reality, the model has limited context window length, so episodes should be short enough to construct multi-episode history. Multi-episodic contexts of 2-4 episodes are necessary to learn a near-optimal in-context RL algorithm. The emergence of in-context RL requires long enough context.\\nIn comparison with three baselines, including ED (expert distillation, behavior cloning with expert trajectories instead of learning history), source policy (used for generating trajectories for distillation by UCB), RL^2 (Duan et al. 2017; used as upper bound since it needs online RL), AD demonstrates in-context RL with performance getting close to RL^2 despite only using offline RL and learns much faster than other baselines. When conditioned on partial training history of the source policy, AD also improves much faster than ED baseline.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='Fig. 7. Comparison of AD, ED, source policy and RL^2 on environments that require memory and exploration. Only binary reward is assigned. The source policies are trained with A3C for \"dark\" environments and DQN for watermaze.(Image source: Laskin et al. 2023)\\nComponent Two: Memory#\\n(Big thank you to ChatGPT for helping me draft this section. I’ve learned a lot about the human brain and data structure for fast MIPS in my conversations with ChatGPT.)\\nTypes of Memory#\\nMemory can be defined as the processes used to acquire, store, retain, and later retrieve information. There are several types of memory in human brains.\\n\\n\\nSensory Memory: This is the earliest stage of memory, providing the ability to retain impressions of sensory information (visual, auditory, etc) after the original stimuli have ended. Sensory memory typically only lasts for up to a few seconds. Subcategories include iconic memory (visual), echoic memory (auditory), and haptic memory (touch).\\n\\n\\nShort-Term Memory (STM) or Working Memory: It stores information that we are currently aware of and needed to carry out complex cognitive tasks such as learning and reasoning. Short-term memory is believed to have the capacity of about 7 items (Miller 1956) and lasts for 20-30 seconds.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='Long-Term Memory (LTM): Long-term memory can store information for a remarkably long time, ranging from a few days to decades, with an essentially unlimited storage capacity. There are two subtypes of LTM:\\n\\nExplicit / declarative memory: This is memory of facts and events, and refers to those memories that can be consciously recalled, including episodic memory (events and experiences) and semantic memory (facts and concepts).\\nImplicit / procedural memory: This type of memory is unconscious and involves skills and routines that are performed automatically, like riding a bike or typing on a keyboard.\\n\\n\\n\\n\\nFig. 8. Categorization of human memory.\\nWe can roughly consider the following mappings:\\n\\nSensory memory as learning embedding representations for raw inputs, including text, image or other modalities;\\nShort-term memory as in-context learning. It is short and finite, as it is restricted by the finite context window length of Transformer.\\nLong-term memory as the external vector store that the agent can attend to at query time, accessible via fast retrieval.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='Maximum Inner Product Search (MIPS)#\\nThe external memory can alleviate the restriction of finite attention span. A standard practice is to save the embedding representation of information into a vector store database that can support fast maximum inner-product search (MIPS). To optimize the retrieval speed, the common choice is the approximate nearest neighbors (ANN)\\u200b algorithm to return approximately top k nearest neighbors to trade off a little accuracy lost for a huge speedup.\\nA couple common choices of ANN algorithms for fast MIPS:', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='LSH (Locality-Sensitive Hashing): It introduces a hashing function such that similar input items are mapped to the same buckets with high probability, where the number of buckets is much smaller than the number of inputs.\\nANNOY (Approximate Nearest Neighbors Oh Yeah): The core data structure are random projection trees, a set of binary trees where each non-leaf node represents a hyperplane splitting the input space into half and each leaf stores one data point. Trees are built independently and at random, so to some extent, it mimics a hashing function. ANNOY search happens in all the trees to iteratively search through the half that is closest to the query and then aggregates the results. The idea is quite related to KD tree but a lot more scalable.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='HNSW (Hierarchical Navigable Small World): It is inspired by the idea of small world networks where most nodes can be reached by any other nodes within a small number of steps; e.g. “six degrees of separation” feature of social networks. HNSW builds hierarchical layers of these small-world graphs, where the bottom layers contain the actual data points. The layers in the middle create shortcuts to speed up search. When performing a search, HNSW starts from a random node in the top layer and navigates towards the target. When it can’t get any closer, it moves down to the next layer, until it reaches the bottom layer. Each move in the upper layers can potentially cover a large distance in the data space, and each move in the lower layers refines the search quality.\\nFAISS (Facebook AI Similarity Search): It operates on the assumption that in high dimensional space, distances between nodes follow a Gaussian distribution and thus there should exist clustering of data points. FAISS applies vector quantization by partitioning the vector space into clusters and then refining the quantization within clusters. Search first looks for cluster candidates with coarse quantization and then further looks into each cluster with finer quantization.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='ScaNN (Scalable Nearest Neighbors): The main innovation in ScaNN is anisotropic vector quantization. It quantizes a data point $x_i$ to $\\\\tilde{x}_i$ such that the inner product $\\\\langle q, x_i \\\\rangle$ is as similar to the original distance of $\\\\angle q, \\\\tilde{x}_i$ as possible, instead of picking the closet quantization centroid points.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='Fig. 9. Comparison of MIPS algorithms, measured in recall@10. (Image source: Google Blog, 2020)\\nCheck more MIPS algorithms and performance comparison in ann-benchmarks.com.\\nComponent Three: Tool Use#\\nTool use is a remarkable and distinguishing characteristic of human beings. We create, modify and utilize external objects to do things that go beyond our physical and cognitive limits. Equipping LLMs with external tools can significantly extend the model capabilities.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='Fig. 10. A picture of a sea otter using rock to crack open a seashell, while floating in the water. While some other animals can use tools, the complexity is not comparable with humans. (Image source: Animals using tools)\\nMRKL (Karpas et al. 2022), short for “Modular Reasoning, Knowledge and Language”, is a neuro-symbolic architecture for autonomous agents. A MRKL system is proposed to contain a collection of “expert” modules and the general-purpose LLM works as a router to route inquiries to the best suitable expert module. These modules can be neural (e.g. deep learning models) or symbolic (e.g. math calculator, currency converter, weather API).\\nThey did an experiment on fine-tuning LLM to call a calculator, using arithmetic as a test case. Their experiments showed that it was harder to solve verbal math problems than explicitly stated math problems because LLMs (7B Jurassic1-large model) failed to extract the right arguments for the basic arithmetic reliably. The results highlight when the external symbolic tools can work reliably, knowing when to and how to use the tools are crucial, determined by the LLM capability.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='Both TALM (Tool Augmented Language Models; Parisi et al. 2022) and Toolformer (Schick et al. 2023) fine-tune a LM to learn to use external tool APIs. The dataset is expanded based on whether a newly added API call annotation can improve the quality of model outputs. See more details in the “External APIs” section of Prompt Engineering.\\nChatGPT Plugins and OpenAI API function calling are good examples of LLMs augmented with tool use capability working in practice. The collection of tool APIs can be provided by other developers (as in Plugins) or self-defined (as in function calls).\\nHuggingGPT (Shen et al. 2023) is a framework to use ChatGPT as the task planner to select models available in HuggingFace platform according to the model descriptions and summarize the response based on the execution results.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='Fig. 11. Illustration of how HuggingGPT works. (Image source: Shen et al. 2023)\\nThe system comprises of 4 stages:\\n(1) Task planning: LLM works as the brain and parses the user requests into multiple tasks. There are four attributes associated with each task: task type, ID, dependencies, and arguments. They use few-shot examples to guide LLM to do task parsing and planning.\\nInstruction:', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='The AI assistant can parse user input to several tasks: [{\"task\": task, \"id\", task_id, \"dep\": dependency_task_ids, \"args\": {\"text\": text, \"image\": URL, \"audio\": URL, \"video\": URL}}]. The \"dep\" field denotes the id of the previous task which generates a new resource that the current task relies on. A special tag \"-task_id\" refers to the generated text image, audio and video in the dependency task with id as task_id. The task MUST be selected from the following options: {{ Available Task List }}. There is a logical relationship between tasks, please note their order. If the user input can\\'t be parsed, you need to reply empty JSON. Here are several cases for your reference: {{ Demonstrations }}. The chat history is recorded as {{ Chat History }}. From this chat history, you can find the path of the user-mentioned resources for your task planning.\\n\\n(2) Model selection: LLM distributes the tasks to expert models, where the request is framed as a multiple-choice question. LLM is presented with a list of models to choose from. Due to the limited context length, task type based filtration is needed.\\nInstruction:', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='Given the user request and the call command, the AI assistant helps the user to select a suitable model from a list of models to process the user request. The AI assistant merely outputs the model id of the most appropriate model. The output must be in a strict JSON format: \"id\": \"id\", \"reason\": \"your detail reason for the choice\". We have a list of models for you to choose from {{ Candidate Models }}. Please select one model from the list.\\n\\n(3) Task execution: Expert models execute on the specific tasks and log results.\\nInstruction:\\n\\nWith the input and the inference results, the AI assistant needs to describe the process and results. The previous stages can be formed as - User Input: {{ User Input }}, Task Planning: {{ Tasks }}, Model Selection: {{ Model Assignment }}, Task Execution: {{ Predictions }}. You must first answer the user\\'s request in a straightforward manner. Then describe the task process and show your analysis and model inference results to the user in the first person. If inference results contain a file path, must tell the user the complete file path.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='(4) Response generation: LLM receives the execution results and provides summarized results to users.\\nTo put HuggingGPT into real world usage, a couple challenges need to solve: (1) Efficiency improvement is needed as both LLM inference rounds and interactions with other models slow down the process; (2) It relies on a long context window to communicate over complicated task content; (3) Stability improvement of LLM outputs and external model services.\\nAPI-Bank (Li et al. 2023) is a benchmark for evaluating the performance of tool-augmented LLMs. It contains 53 commonly used API tools, a complete tool-augmented LLM workflow, and 264 annotated dialogues that involve 568 API calls. The selection of APIs is quite diverse, including search engines, calculator, calendar queries, smart home control, schedule management, health data management, account authentication workflow and more. Because there are a large number of APIs, LLM first has access to API search engine to find the right API to call and then uses the corresponding documentation to make a call.\\n\\nFig. 12. Pseudo code of how LLM makes an API call in API-Bank. (Image source: Li et al. 2023)\\nIn the API-Bank workflow, LLMs need to make a couple of decisions and at each step we can evaluate how accurate that decision is. Decisions include:', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='Whether an API call is needed.\\nIdentify the right API to call: if not good enough, LLMs need to iteratively modify the API inputs (e.g. deciding search keywords for Search Engine API).\\nResponse based on the API results: the model can choose to refine and call again if results are not satisfied.\\n\\nThis benchmark evaluates the agent’s tool use capabilities at three levels:\\n\\nLevel-1 evaluates the ability to call the API. Given an API’s description, the model needs to determine whether to call a given API, call it correctly, and respond properly to API returns.\\nLevel-2 examines the ability to retrieve the API. The model needs to search for possible APIs that may solve the user’s requirement and learn how to use them by reading documentation.\\nLevel-3 assesses the ability to plan API beyond retrieve and call. Given unclear user requests (e.g. schedule group meetings, book flight/hotel/restaurant for a trip), the model may have to conduct multiple API calls to solve it.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='Case Studies#\\nScientific Discovery Agent#\\nChemCrow (Bran et al. 2023) is a domain-specific example in which LLM is augmented with 13 expert-designed tools to accomplish tasks across organic synthesis, drug discovery, and materials design. The workflow, implemented in LangChain, reflects what was previously described in the ReAct and MRKLs and combines CoT reasoning with tools relevant to the tasks:\\n\\nThe LLM is provided with a list of tool names, descriptions of their utility, and details about the expected input/output.\\nIt is then instructed to answer a user-given prompt using the tools provided when necessary. The instruction suggests the model to follow the ReAct format - Thought, Action, Action Input, Observation.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='One interesting observation is that while the LLM-based evaluation concluded that GPT-4 and ChemCrow perform nearly equivalently, human evaluations with experts oriented towards the completion and chemical correctness of the solutions showed that ChemCrow outperforms GPT-4 by a large margin. This indicates a potential problem with using LLM to evaluate its own performance on domains that requires deep expertise. The lack of expertise may cause LLMs not knowing its flaws and thus cannot well judge the correctness of task results.\\nBoiko et al. (2023) also looked into LLM-empowered agents for scientific discovery, to handle autonomous design, planning, and performance of complex scientific experiments. This agent can use tools to browse the Internet, read documentation, execute code, call robotics experimentation APIs and leverage other LLMs.\\nFor example, when requested to \"develop a novel anticancer drug\", the model came up with the following reasoning steps:\\n\\ninquired about current trends in anticancer drug discovery;\\nselected a target;\\nrequested a scaffold targeting these compounds;\\nOnce the compound was identified, the model attempted its synthesis.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='inquired about current trends in anticancer drug discovery;\\nselected a target;\\nrequested a scaffold targeting these compounds;\\nOnce the compound was identified, the model attempted its synthesis.\\n\\nThey also discussed the risks, especially with illicit drugs and bioweapons. They developed a test set containing a list of known chemical weapon agents and asked the agent to synthesize them. 4 out of 11 requests (36%) were accepted to obtain a synthesis solution and the agent attempted to consult documentation to execute the procedure. 7 out of 11 were rejected and among these 7 rejected cases, 5 happened after a Web search while 2 were rejected based on prompt only.\\nGenerative Agents Simulation#\\nGenerative Agents (Park, et al. 2023) is super fun experiment where 25 virtual characters, each controlled by a LLM-powered agent, are living and interacting in a sandbox environment, inspired by The Sims. Generative agents create believable simulacra of human behavior for interactive applications.\\nThe design of generative agents combines LLM with memory, planning and reflection mechanisms to enable agents to behave conditioned on past experience, as well as to interact with other agents.\\n\\nMemory stream: is a long-term memory module (external database) that records a comprehensive list of agents’ experience in natural language.\\n\\nEach element is an observation, an event directly provided by the agent.\\n- Inter-agent communication can trigger new natural language statements.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content=\"Each element is an observation, an event directly provided by the agent.\\n- Inter-agent communication can trigger new natural language statements.\\n\\n\\nRetrieval model: surfaces the context to inform the agent’s behavior, according to relevance, recency and importance.\\n\\nRecency: recent events have higher scores\\nImportance: distinguish mundane from core memories. Ask LM directly.\\nRelevance: based on how related it is to the current situation / query.\\n\\n\\nReflection mechanism: synthesizes memories into higher level inferences over time and guides the agent’s future behavior. They are higher-level summaries of past events (<- note that this is a bit different from self-reflection above)\\n\\nPrompt LM with 100 most recent observations and to generate 3 most salient high-level questions given a set of observations/statements. Then ask LM to answer those questions.\\n\\n\\nPlanning & Reacting: translate the reflections and the environment information into actions\\n\\nPlanning is essentially in order to optimize believability at the moment vs in time.\\nPrompt template: {Intro of an agent X}. Here is X's plan today in broad strokes: 1)\\nRelationships between agents and observations of one agent by another are all taken into consideration for planning and reacting.\\nEnvironment information is present in a tree structure.\", metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='Fig. 13. The generative agent architecture. (Image source: Park et al. 2023)\\nThis fun simulation results in emergent social behavior, such as information diffusion, relationship memory (e.g. two agents continuing the conversation topic) and coordination of social events (e.g. host a party and invite many others).\\nProof-of-Concept Examples#\\nAutoGPT has drawn a lot of attention into the possibility of setting up autonomous agents with LLM as the main controller. It has quite a lot of reliability issues given the natural language interface, but nevertheless a cool proof-of-concept demo. A lot of code in AutoGPT is about format parsing.\\nHere is the system message used by AutoGPT, where {{...}} are user inputs:\\nYou are {{ai-name}}, {{user-provided AI bot description}}.\\nYour decisions must always be made independently without seeking user assistance. Play to your strengths as an LLM and pursue simple strategies with no legal complications.\\n\\nGOALS:\\n\\n1. {{user-provided goal 1}}\\n2. {{user-provided goal 2}}\\n3. ...\\n4. ...\\n5. ...', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='GOALS:\\n\\n1. {{user-provided goal 1}}\\n2. {{user-provided goal 2}}\\n3. ...\\n4. ...\\n5. ...\\n\\nConstraints:\\n1. ~4000 word limit for short term memory. Your short term memory is short, so immediately save important information to files.\\n2. If you are unsure how you previously did something or want to recall past events, thinking about similar events will help you remember.\\n3. No user assistance\\n4. Exclusively use the commands listed in double quotes e.g. \"command name\"\\n5. Use subprocesses for commands that will not terminate within a few minutes', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='Commands:\\n1. Google Search: \"google\", args: \"input\": \"\"\\n2. Browse Website: \"browse_website\", args: \"url\": \"\", \"question\": \"\"\\n3. Start GPT Agent: \"start_agent\", args: \"name\": \"\", \"task\": \"\", \"prompt\": \"\"\\n4. Message GPT Agent: \"message_agent\", args: \"key\": \"\", \"message\": \"\"\\n5. List GPT Agents: \"list_agents\", args:\\n6. Delete GPT Agent: \"delete_agent\", args: \"key\": \"\"\\n7. Clone Repository: \"clone_repository\", args: \"repository_url\": \"\", \"clone_path\": \"\"\\n8. Write to file: \"write_to_file\", args: \"file\": \"\", \"text\": \"\"\\n9. Read file: \"read_file\", args: \"file\": \"\"\\n10. Append to file: \"append_to_file\", args: \"file\": \"\", \"text\": \"\"\\n11. Delete file: \"delete_file\", args: \"file\": \"\"', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='10. Append to file: \"append_to_file\", args: \"file\": \"\", \"text\": \"\"\\n11. Delete file: \"delete_file\", args: \"file\": \"\"\\n12. Search Files: \"search_files\", args: \"directory\": \"\"\\n13. Analyze Code: \"analyze_code\", args: \"code\": \"\"\\n14. Get Improved Code: \"improve_code\", args: \"suggestions\": \"\", \"code\": \"\"\\n15. Write Tests: \"write_tests\", args: \"code\": \"\", \"focus\": \"\"\\n16. Execute Python File: \"execute_python_file\", args: \"file\": \"\"\\n17. Generate Image: \"generate_image\", args: \"prompt\": \"\"\\n18. Send Tweet: \"send_tweet\", args: \"text\": \"\"\\n19. Do Nothing: \"do_nothing\", args:\\n20. Task Complete (Shutdown): \"task_complete\", args: \"reason\": \"\"', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='Resources:\\n1. Internet access for searches and information gathering.\\n2. Long Term memory management.\\n3. GPT-3.5 powered Agents for delegation of simple tasks.\\n4. File output.\\n\\nPerformance Evaluation:\\n1. Continuously review and analyze your actions to ensure you are performing to the best of your abilities.\\n2. Constructively self-criticize your big-picture behavior constantly.\\n3. Reflect on past decisions and strategies to refine your approach.\\n4. Every command has a cost, so be smart and efficient. Aim to complete tasks in the least number of steps.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='You should only respond in JSON format as described below\\nResponse Format:\\n{\\n \"thoughts\": {\\n \"text\": \"thought\",\\n \"reasoning\": \"reasoning\",\\n \"plan\": \"- short bulleted\\\\n- list that conveys\\\\n- long-term plan\",\\n \"criticism\": \"constructive self-criticism\",\\n \"speak\": \"thoughts summary to say to user\"\\n },\\n \"command\": {\\n \"name\": \"command name\",\\n \"args\": {\\n \"arg name\": \"value\"\\n }\\n }\\n}\\nEnsure the response can be parsed by Python json.loads\\nGPT-Engineer is another project to create a whole repository of code given a task specified in natural language. The GPT-Engineer is instructed to think over a list of smaller components to build and ask for user input to clarify questions as needed.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='Here are a sample conversation for task clarification sent to OpenAI ChatCompletion endpoint used by GPT-Engineer. The user inputs are wrapped in {{user input text}}.\\n[\\n {\\n \"role\": \"system\",\\n \"content\": \"You will read instructions and not carry them out, only seek to clarify them.\\\\nSpecifically you will first summarise a list of super short bullets of areas that need clarification.\\\\nThen you will pick one clarifying question, and wait for an answer from the user.\\\\n\"\\n },\\n {\\n \"role\": \"user\",\\n \"content\": \"We are writing {{a Super Mario game in python. MVC components split in separate files. Keyboard control.}}\\\\n\"\\n },\\n {\\n \"role\": \"assistant\",\\n \"content\": \"Summary of areas that need clarification:\\\\n1. Specifics of the Super Mario game (e.g. level design, characters, gameplay mechanics)\\\\n2. Details about the MVC components (e.g. which components are in each file)\\\\n3. Keyboard control implementation (e.g. which keys to use, how to handle input)\\\\n\\\\nClarifying question:\\\\nCan you provide more details about the Super Mario game, such as level design, characters, and gameplay mechanics?\"\\n },', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='},\\n {\\n \"role\": \"user\",\\n \"content\": \"{{There are 10 levels in total. The main character is a plumber named Mario, who can walk and jump. It is a classical platform game just like Super Mario. The main character moves from left to right, trying to get to the destination, where there are many obstacles and attacks from enemies in the process.}}\\\\n\\\\nIs anything else unclear? If yes, only answer in the form:\\\\n{remaining unclear areas} remaining questions.\\\\n{Next question}\\\\nIf everything is sufficiently clear, only answer \\\\\"Nothing more to clarify.\\\\\".\"\\n },\\n {\\n \"role\": \"assistant\",\\n \"content\": \"Remaining unclear areas: 2 remaining questions.\\\\nCan you provide more information about how the MVC components are split into separate files?\"\\n },\\n {\\n \"role\": \"user\",\\n \"content\": \"{{Make your own assumptions and state them explicitly before starting}}\"\\n }\\n]\\nThen after these clarification, the agent moved into the code writing mode with a different system message.\\nSystem message:', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='You will get instructions for code to write.\\nYou will write a very long answer. Make sure that every detail of the architecture is, in the end, implemented as code.\\nMake sure that every detail of the architecture is, in the end, implemented as code.\\nThink step by step and reason yourself to the right decisions to make sure we get it right.\\nYou will first lay out the names of the core classes, functions, methods that will be necessary, as well as a quick comment on their purpose.\\nThen you will output the content of each file including ALL code.\\nEach file must strictly follow a markdown code block format, where the following tokens must be replaced such that\\nFILENAME is the lowercase file name including the file extension,\\nLANG is the markup code block language for the code’s language, and CODE is the code:\\nFILENAME\\nCODE\\nYou will start with the “entrypoint” file, then go to the ones that are imported by that file, and so on.\\nPlease note that the code should be fully functional. No placeholders.\\nFollow a language and framework appropriate best practice file naming convention.\\nMake sure that files contain all imports, types etc. Make sure that code in different files are compatible with each other.\\nEnsure to implement all code, if you are unsure, write a plausible implementation.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='Make sure that files contain all imports, types etc. Make sure that code in different files are compatible with each other.\\nEnsure to implement all code, if you are unsure, write a plausible implementation.\\nInclude module dependency or package manager dependency definition file.\\nBefore you finish, double check that all parts of the architecture is present in the files.\\nUseful to know:\\nYou almost always put different classes in different files.\\nFor Python, you always create an appropriate requirements.txt file.\\nFor NodeJS, you always create an appropriate package.json file.\\nYou always add a comment briefly describing the purpose of the function definition.\\nYou try to add comments explaining very complex bits of logic.\\nYou always follow the best practices for the requested languages in terms of describing the code written as a defined\\npackage/project.\\nPython toolbelt preferences:', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='pytest\\ndataclasses', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='Conversatin samples:\\n[\\n {\\n \"role\": \"system\",', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='\"content\": \"You will get instructions for code to write.\\\\nYou will write a very long answer. Make sure that every detail of the architecture is, in the end, implemented as code.\\\\nMake sure that every detail of the architecture is, in the end, implemented as code.\\\\n\\\\nThink step by step and reason yourself to the right decisions to make sure we get it right.\\\\nYou will first lay out the names of the core classes, functions, methods that will be necessary, as well as a quick comment on their purpose.\\\\n\\\\nThen you will output the content of each file including ALL code.\\\\nEach file must strictly follow a markdown code block format, where the following tokens must be replaced such that\\\\nFILENAME is the lowercase file name including the file extension,\\\\nLANG is the markup code block language for the code\\'s language, and CODE is the code:\\\\n\\\\nFILENAME\\\\n```LANG\\\\nCODE\\\\n```\\\\n\\\\nYou will start with the \\\\\"entrypoint\\\\\" file, then go to the ones that are imported by that file, and so on.\\\\nPlease note that the code should be fully functional. No placeholders.\\\\n\\\\nFollow a language and framework appropriate best practice file naming convention.\\\\nMake sure that files contain all imports, types etc. Make sure that code in different', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='note that the code should be fully functional. No placeholders.\\\\n\\\\nFollow a language and framework appropriate best practice file naming convention.\\\\nMake sure that files contain all imports, types etc. Make sure that code in different files are compatible with each other.\\\\nEnsure to implement all code, if you are unsure, write a plausible implementation.\\\\nInclude module dependency or package manager dependency definition file.\\\\nBefore you finish, double check that all parts of the architecture is present in the files.\\\\n\\\\nUseful to know:\\\\nYou almost always put different classes in different files.\\\\nFor Python, you always create an appropriate requirements.txt file.\\\\nFor NodeJS, you always create an appropriate package.json file.\\\\nYou always add a comment briefly describing the purpose of the function definition.\\\\nYou try to add comments explaining very complex bits of logic.\\\\nYou always follow the best practices for the requested languages in terms of describing the code written as a defined\\\\npackage/project.\\\\n\\\\n\\\\nPython toolbelt preferences:\\\\n- pytest\\\\n- dataclasses\\\\n\"', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='},\\n # … same conversation as earlier, ended with \"Make your own assumptions and state them explicitly before starting\".\\n {\\n \"role\": \"assistant\",\\n \"content\": \"Assumptions:\\\\n1. Model: The model will contain the game\\'s data, such as level information, character states, and enemy positions.\\\\n2. View: The view will handle the game\\'s visuals, including rendering the game objects, backgrounds, and updating the display.\\\\n3. Controller: The controller will manage user input, such as keyboard controls, and update the model accordingly.\\\\n\\\\nWith these assumptions in mind, is there any specific aspect of the keyboard control implementation you would like me to clarify?\"\\n },\\n {\\n \"role\": \"user\",', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='},\\n {\\n \"role\": \"user\",\\n \"content\": \"Please now remember the steps:\\\\n\\\\nThink step by step and reason yourself to the right decisions to make sure we get it right.\\\\nFirst lay out the names of the core classes, functions, methods that will be necessary, As well as a quick comment on their purpose.\\\\n\\\\nThen you will output the content of each file including ALL code.\\\\nEach file must strictly follow a markdown code block format, where the following tokens must be replaced such that\\\\nFILENAME is the lowercase file name including the file extension,\\\\nLANG is the markup code block language for the code\\'s language, and CODE is the code:\\\\n\\\\nFILENAME\\\\n```LANG\\\\nCODE\\\\n```\\\\n\\\\nPlease note that the code should be fully functional. No placeholders.\\\\n\\\\nYou will start with the \\\\\"entrypoint\\\\\" file, then go to the ones that are imported by that file, and so on.\\\\nFollow a language and framework appropriate best practice file naming convention.\\\\nMake sure that files contain all imports, types etc. The code should be fully functional. Make sure that code in different files are compatible with each other.\\\\nBefore you finish, double check that all parts of the architecture is present in the files.\\\\n\"', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='}\\n]\\nChallenges#\\nAfter going through key ideas and demos of building LLM-centered agents, I start to see a couple common limitations:', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='Finite context length: The restricted context capacity limits the inclusion of historical information, detailed instructions, API call context, and responses. The design of the system has to work with this limited communication bandwidth, while mechanisms like self-reflection to learn from past mistakes would benefit a lot from long or infinite context windows. Although vector stores and retrieval can provide access to a larger knowledge pool, their representation power is not as powerful as full attention.\\n\\n\\nChallenges in long-term planning and task decomposition: Planning over a lengthy history and effectively exploring the solution space remain challenging. LLMs struggle to adjust plans when faced with unexpected errors, making them less robust compared to humans who learn from trial and error.\\n\\n\\nReliability of natural language interface: Current agent system relies on natural language as an interface between LLMs and external components such as memory and tools. However, the reliability of model outputs is questionable, as LLMs may make formatting errors and occasionally exhibit rebellious behavior (e.g. refuse to follow an instruction). Consequently, much of the agent demo code focuses on parsing model output.\\n\\n\\nCitation#\\nCited as:\\n\\nWeng, Lilian. (Jun 2023). “LLM-powered Autonomous Agents”. Lil’Log. https://lilianweng.github.io/posts/2023-06-23-agent/.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='Or\\n@article{weng2023agent,\\n title = \"LLM-powered Autonomous Agents\",\\n author = \"Weng, Lilian\",\\n journal = \"lilianweng.github.io\",\\n year = \"2023\",\\n month = \"Jun\",\\n url = \"https://lilianweng.github.io/posts/2023-06-23-agent/\"\\n}\\nReferences#\\n[1] Wei et al. “Chain of thought prompting elicits reasoning in large language models.” NeurIPS 2022\\n[2] Yao et al. “Tree of Thoughts: Dliberate Problem Solving with Large Language Models.” arXiv preprint arXiv:2305.10601 (2023).\\n[3] Liu et al. “Chain of Hindsight Aligns Language Models with Feedback\\n“ arXiv preprint arXiv:2302.02676 (2023).\\n[4] Liu et al. “LLM+P: Empowering Large Language Models with Optimal Planning Proficiency” arXiv preprint arXiv:2304.11477 (2023).', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='[4] Liu et al. “LLM+P: Empowering Large Language Models with Optimal Planning Proficiency” arXiv preprint arXiv:2304.11477 (2023).\\n[5] Yao et al. “ReAct: Synergizing reasoning and acting in language models.” ICLR 2023.\\n[6] Google Blog. “Announcing ScaNN: Efficient Vector Similarity Search” July 28, 2020.\\n[7] https://chat.openai.com/share/46ff149e-a4c7-4dd7-a800-fc4a642ea389\\n[8] Shinn & Labash. “Reflexion: an autonomous agent with dynamic memory and self-reflection” arXiv preprint arXiv:2303.11366 (2023).\\n[9] Laskin et al. “In-context Reinforcement Learning with Algorithm Distillation” ICLR 2023.\\n[10] Karpas et al. “MRKL Systems A modular, neuro-symbolic architecture that combines large language models, external knowledge sources and discrete reasoning.” arXiv preprint arXiv:2205.00445 (2022).', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='[11] Nakano et al. “Webgpt: Browser-assisted question-answering with human feedback.” arXiv preprint arXiv:2112.09332 (2021).\\n[12] Parisi et al. “TALM: Tool Augmented Language Models”\\n[13] Schick et al. “Toolformer: Language Models Can Teach Themselves to Use Tools.” arXiv preprint arXiv:2302.04761 (2023).\\n[14] Weaviate Blog. Why is Vector Search so fast? Sep 13, 2022.\\n[15] Li et al. “API-Bank: A Benchmark for Tool-Augmented LLMs” arXiv preprint arXiv:2304.08244 (2023).\\n[16] Shen et al. “HuggingGPT: Solving AI Tasks with ChatGPT and its Friends in HuggingFace” arXiv preprint arXiv:2303.17580 (2023).\\n[17] Bran et al. “ChemCrow: Augmenting large-language models with chemistry tools.” arXiv preprint arXiv:2304.05376 (2023).', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='[17] Bran et al. “ChemCrow: Augmenting large-language models with chemistry tools.” arXiv preprint arXiv:2304.05376 (2023).\\n[18] Boiko et al. “Emergent autonomous scientific research capabilities of large language models.” arXiv preprint arXiv:2304.05332 (2023).\\n[19] Joon Sung Park, et al. “Generative Agents: Interactive Simulacra of Human Behavior.” arXiv preprint arXiv:2304.03442 (2023).\\n[20] AutoGPT. https://github.com/Significant-Gravitas/Auto-GPT\\n[21] GPT-Engineer. https://github.com/AntonOsika/gpt-engineer', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'})]" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "splits" + ] + }, { "cell_type": "markdown", "id": "427303a1-3ed4-430c-bfc7-cb3e48022f1d", "metadata": {}, "source": [ - "[Vectorstores](https://python.langchain.com/docs/integrations/vectorstores/)" + "[Vectorstores](https://python.langchain.com/docs/integrations/vectorstores/)\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 26, "id": "baa90aaf-cc1b-46a1-9fba-cf20804dcb41", "metadata": {}, "outputs": [], "source": [ "# Index\n", - "from langchain_openai import OpenAIEmbeddings\n", + "# from langchain_openai import OpenAIEmbeddings\n", + "from langchain_google_genai import GoogleGenerativeAIEmbeddings\n", + "\n", + "# If there is no environment variable set for the API key, you can pass the API\n", + "# key to the parameter `google_api_key` of the `GoogleGenerativeAIEmbeddings`\n", + "# function: `google_api_key = \"key\"`.\n", + "\n", + "gemini_embeddings = GoogleGenerativeAIEmbeddings(model=\"models/embedding-001\")\n", + "\n", "from langchain_community.vectorstores import Chroma\n", - "vectorstore = Chroma.from_documents(documents=splits, \n", - " embedding=OpenAIEmbeddings())\n", "\n", + "vectorstore = Chroma.from_documents(\n", + " documents=splits, # Data\n", + " embedding=gemini_embeddings, # Embedding model\n", + " # persist_directory=\"./chroma_db\" # Directory to save data\n", + " )\n", + "# vectorstore = Chroma.from_documents(documents=splits, \n", + "# embedding=OpenAIEmbeddings())\n", "retriever = vectorstore.as_retriever()" ] }, @@ -336,46 +652,96 @@ "id": "ba890329-1411-4922-bd27-fe0490dd1208", "metadata": {}, "source": [ - "## Part 3: Retrieval" + "## Part 3: Retrieval\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "id": "fafdada1-4c4e-41f8-ad1a-33861aae3930", "metadata": {}, "outputs": [], "source": [ "# Index\n", - "from langchain_openai import OpenAIEmbeddings\n", + "# from langchain_openai import OpenAIEmbeddings\n", + "# from langchain_community.vectorstores import Chroma\n", + "# vectorstore = Chroma.from_documents(documents=splits, \n", + "# embedding=OpenAIEmbeddings())\n", + "gemini_embeddings = GoogleGenerativeAIEmbeddings(model=\"models/embedding-001\")\n", + "\n", "from langchain_community.vectorstores import Chroma\n", - "vectorstore = Chroma.from_documents(documents=splits, \n", - " embedding=OpenAIEmbeddings())\n", "\n", + "vectorstore = Chroma.from_documents(\n", + " documents=splits, # Data\n", + " embedding=gemini_embeddings, # Embedding model\n", + " # persist_directory=\"./chroma_db\" # Directory to save data\n", + " )\n", "\n", "retriever = vectorstore.as_retriever(search_kwargs={\"k\": 1})" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 28, "id": "57c2de7a-93e6-4072-bc5b-db6516f96dda", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\_api\\deprecation.py:119: LangChainDeprecationWarning: The method `BaseRetriever.get_relevant_documents` was deprecated in langchain-core 0.1.46 and will be removed in 0.3.0. Use invoke instead.\n", + " warn_deprecated(\n" + ] + } + ], "source": [ "docs = retriever.get_relevant_documents(\"What is Task Decomposition?\")" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 29, "id": "db96f877-60d3-4741-9846-e2903831583d", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "1" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "len(docs)" ] }, + { + "cell_type": "code", + "execution_count": 30, + "id": "5c967d1d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Document(page_content='Fig. 1. Overview of a LLM-powered autonomous agent system.\\nComponent One: Planning#\\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\\nTask Decomposition#\\nChain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.\\nTree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.\\nTask decomposition can be done (1) by LLM with simple prompting like \"Steps for XYZ.\\\\n1.\", \"What are the subgoals for achieving XYZ?\", (2) by using task-specific instructions; e.g. \"Write a story outline.\" for writing a novel, or (3) with human inputs.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'})]" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "docs = retriever.get_relevant_documents(\"Fig. 1. Overview of a LLM-powered autonomous agent system.\\nComponent One: Planning#\\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\\nTask Decomposition#\\nChain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed ligh\")\n", + "docs" + ] + }, { "attachments": { "f9b0e284-58e4-4d33-9594-2dad351c569a.png": { @@ -388,17 +754,29 @@ "source": [ "## Part 4: Generation\n", "\n", - "![Screenshot 2024-02-12 at 1.37.38 PM.png](attachment:f9b0e284-58e4-4d33-9594-2dad351c569a.png)" + "![Screenshot 2024-02-12 at 1.37.38 PM.png](attachment:f9b0e284-58e4-4d33-9594-2dad351c569a.png)\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 31, "id": "8beb6c14-5e18-43e7-9d04-59e3b8a81cc9", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "ChatPromptTemplate(input_variables=['context', 'question'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template='Answer the question based only on the following context:\\n{context}\\n\\nQuestion: {question}\\n'))])" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "from langchain_openai import ChatOpenAI\n", + "# from langchain_google_genai import ChatGoogleGenerativeAI\n", + "\n", "from langchain.prompts import ChatPromptTemplate\n", "\n", "# Prompt\n", @@ -414,18 +792,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 32, "id": "e4461264-5cac-479a-917c-9bf589826da4", "metadata": {}, "outputs": [], "source": [ "# LLM\n", - "llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0)" + "# llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0)\n", + "llm = ChatGoogleGenerativeAI(model=\"gemini-pro\",\n", + " temperature=0.7, top_p=0.85)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 33, "id": "55d6629f-18ec-4372-a557-b254fbb1dd2d", "metadata": {}, "outputs": [], @@ -436,10 +816,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 34, "id": "94470770-8df4-4359-9504-ef6c8b3137ff", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "AIMessage(content='Task decomposition is the process of breaking down a complex task into smaller, more manageable steps. This can be done by an LLM with simple prompting, by using task-specific instructions, or with human inputs.', response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'safety_ratings': [{'category': 'HARM_CATEGORY_SEXUALLY_EXPLICIT', 'probability': 'NEGLIGIBLE', 'blocked': False}, {'category': 'HARM_CATEGORY_HATE_SPEECH', 'probability': 'NEGLIGIBLE', 'blocked': False}, {'category': 'HARM_CATEGORY_HARASSMENT', 'probability': 'NEGLIGIBLE', 'blocked': False}, {'category': 'HARM_CATEGORY_DANGEROUS_CONTENT', 'probability': 'NEGLIGIBLE', 'blocked': False}]}, id='run-267f88f2-3081-43ca-95c3-8b4c172360cf-0')" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Run\n", "chain.invoke({\"context\":docs,\"question\":\"What is Task Decomposition?\"})" @@ -447,7 +838,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 35, "id": "65770e2d-3d5e-4371-abc9-0aeca9646885", "metadata": {}, "outputs": [], @@ -458,10 +849,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 36, "id": "f53e5840-0a0f-4428-a4a4-6922800aff89", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "ChatPromptTemplate(input_variables=['context', 'question'], metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template=\"You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\\nQuestion: {question} \\nContext: {context} \\nAnswer:\"))])" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "prompt_hub_rag" ] @@ -471,15 +873,32 @@ "id": "8ffe29a1-5527-419e-9f12-8a3061d12885", "metadata": {}, "source": [ - "[RAG chains](https://python.langchain.com/docs/expression_language/get_started#rag-search-example)" + "[RAG chains](https://python.langchain.com/docs/expression_language/get_started#rag-search-example)\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 37, "id": "8208a8bc-c75f-4e8e-8601-680746cd6276", "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[37], line 11\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mlangchain_core\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mrunnables\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m RunnablePassthrough\n\u001b[0;32m 4\u001b[0m rag_chain \u001b[38;5;241m=\u001b[39m (\n\u001b[0;32m 5\u001b[0m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcontext\u001b[39m\u001b[38;5;124m\"\u001b[39m: retriever, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquestion\u001b[39m\u001b[38;5;124m\"\u001b[39m: RunnablePassthrough()}\n\u001b[0;32m 6\u001b[0m \u001b[38;5;241m|\u001b[39m prompt\n\u001b[0;32m 7\u001b[0m \u001b[38;5;241m|\u001b[39m llm\n\u001b[0;32m 8\u001b[0m \u001b[38;5;241m|\u001b[39m StrOutputParser()\n\u001b[0;32m 9\u001b[0m )\n\u001b[1;32m---> 11\u001b[0m \u001b[43mrag_chain\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minvoke\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mWhat is Task Decomposition?\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", + "File \u001b[1;32mc:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\runnables\\base.py:2499\u001b[0m, in \u001b[0;36mRunnableSequence.invoke\u001b[1;34m(self, input, config)\u001b[0m\n\u001b[0;32m 2497\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m 2498\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i, step \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msteps):\n\u001b[1;32m-> 2499\u001b[0m \u001b[38;5;28minput\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[43mstep\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minvoke\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 2500\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2501\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# mark each step as a child run\u001b[39;49;00m\n\u001b[0;32m 2502\u001b[0m \u001b[43m \u001b[49m\u001b[43mpatch_config\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 2503\u001b[0m \u001b[43m \u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcallbacks\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrun_manager\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_child\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mseq:step:\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mi\u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[0;32m 2504\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2505\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 2506\u001b[0m \u001b[38;5;66;03m# finish the root run\u001b[39;00m\n\u001b[0;32m 2507\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n", + "File \u001b[1;32mc:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\runnables\\base.py:3142\u001b[0m, in \u001b[0;36mRunnableParallel.invoke\u001b[1;34m(self, input, config)\u001b[0m\n\u001b[0;32m 3129\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m get_executor_for_config(config) \u001b[38;5;28;01mas\u001b[39;00m executor:\n\u001b[0;32m 3130\u001b[0m futures \u001b[38;5;241m=\u001b[39m [\n\u001b[0;32m 3131\u001b[0m executor\u001b[38;5;241m.\u001b[39msubmit(\n\u001b[0;32m 3132\u001b[0m step\u001b[38;5;241m.\u001b[39minvoke,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 3140\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m key, step \u001b[38;5;129;01min\u001b[39;00m steps\u001b[38;5;241m.\u001b[39mitems()\n\u001b[0;32m 3141\u001b[0m ]\n\u001b[1;32m-> 3142\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[43m{\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mfuture\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresult\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfuture\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mzip\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43msteps\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfutures\u001b[49m\u001b[43m)\u001b[49m\u001b[43m}\u001b[49m\n\u001b[0;32m 3143\u001b[0m \u001b[38;5;66;03m# finish the root run\u001b[39;00m\n\u001b[0;32m 3144\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n", + "File \u001b[1;32mc:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\runnables\\base.py:3142\u001b[0m, in \u001b[0;36m\u001b[1;34m(.0)\u001b[0m\n\u001b[0;32m 3129\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m get_executor_for_config(config) \u001b[38;5;28;01mas\u001b[39;00m executor:\n\u001b[0;32m 3130\u001b[0m futures \u001b[38;5;241m=\u001b[39m [\n\u001b[0;32m 3131\u001b[0m executor\u001b[38;5;241m.\u001b[39msubmit(\n\u001b[0;32m 3132\u001b[0m step\u001b[38;5;241m.\u001b[39minvoke,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 3140\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m key, step \u001b[38;5;129;01min\u001b[39;00m steps\u001b[38;5;241m.\u001b[39mitems()\n\u001b[0;32m 3141\u001b[0m ]\n\u001b[1;32m-> 3142\u001b[0m output \u001b[38;5;241m=\u001b[39m {key: \u001b[43mfuture\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresult\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mfor\u001b[39;00m key, future \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mzip\u001b[39m(steps, futures)}\n\u001b[0;32m 3143\u001b[0m \u001b[38;5;66;03m# finish the root run\u001b[39;00m\n\u001b[0;32m 3144\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n", + "File \u001b[1;32mc:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\concurrent\\futures\\_base.py:451\u001b[0m, in \u001b[0;36mFuture.result\u001b[1;34m(self, timeout)\u001b[0m\n\u001b[0;32m 448\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_state \u001b[38;5;241m==\u001b[39m FINISHED:\n\u001b[0;32m 449\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m__get_result()\n\u001b[1;32m--> 451\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_condition\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mwait\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 453\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_state \u001b[38;5;129;01min\u001b[39;00m [CANCELLED, CANCELLED_AND_NOTIFIED]:\n\u001b[0;32m 454\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m CancelledError()\n", + "File \u001b[1;32mc:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\threading.py:320\u001b[0m, in \u001b[0;36mCondition.wait\u001b[1;34m(self, timeout)\u001b[0m\n\u001b[0;32m 318\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m: \u001b[38;5;66;03m# restore state no matter what (e.g., KeyboardInterrupt)\u001b[39;00m\n\u001b[0;32m 319\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m timeout \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m--> 320\u001b[0m \u001b[43mwaiter\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43macquire\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 321\u001b[0m gotit \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[0;32m 322\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", + "\u001b[1;31mKeyboardInterrupt\u001b[0m: " + ] + } + ], "source": [ "from langchain_core.output_parsers import StrOutputParser\n", "from langchain_core.runnables import RunnablePassthrough\n", @@ -519,7 +938,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.8" + "version": "3.11.4" } }, "nbformat": 4, diff --git a/rag_from_scratch_5_to_9.ipynb b/rag_from_scratch_5_to_9.ipynb index 9220584..1a57369 100644 --- a/rag_from_scratch_5_to_9.ipynb +++ b/rag_from_scratch_5_to_9.ipynb @@ -18,17 +18,17 @@ "\n", "## Enviornment\n", "\n", - "`(1) Packages`" + "`(1) Packages`\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "df89be8f-2c49-4f4f-9503-2bff0b08a67a", "metadata": {}, "outputs": [], "source": [ - "! pip install langchain_community tiktoken langchain-openai langchainhub chromadb langchain" + "# ! pip install langchain_community tiktoken langchain-openai langchainhub chromadb langchain" ] }, { @@ -38,12 +38,12 @@ "source": [ "`(2) LangSmith`\n", "\n", - "https://docs.smith.langchain.com/" + "https://docs.smith.langchain.com/\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "5258de38-0cc0-4d9d-a5ca-6e750ebe6976", "metadata": {}, "outputs": [], @@ -51,7 +51,7 @@ "import os\n", "os.environ['LANGCHAIN_TRACING_V2'] = 'true'\n", "os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'\n", - "os.environ['LANGCHAIN_API_KEY'] = " + "os.environ['LANGCHAIN_API_KEY'] = \"\"" ] }, { @@ -59,17 +59,17 @@ "id": "feaccdca-1ab0-43b1-82c2-22e9cd27675b", "metadata": {}, "source": [ - "`(3) API Keys`" + "`(3) API Keys`\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "1cd6453b-2721-491c-b979-1860d58d8cf5", "metadata": {}, "outputs": [], "source": [ - "os.environ['OPENAI_API_KEY'] = " + "os.environ['GOOGLE_API_KEY'] = " ] }, { @@ -90,20 +90,20 @@ "\n", "Docs:\n", "\n", - "* https://python.langchain.com/docs/modules/data_connection/retrievers/MultiQueryRetriever\n", + "- https://python.langchain.com/docs/modules/data_connection/retrievers/MultiQueryRetriever\n", "\n", - "### Index" + "### Index\n" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 4, "id": "9d1b6e2b-dd76-410d-b870-23e02564a665", "metadata": {}, "outputs": [], "source": [ "#### INDEXING ####\n", - "\n", + "from langchain.vectorstores import Chroma\n", "# Load blog\n", "import bs4\n", "from langchain_community.document_loaders import WebBaseLoader\n", @@ -127,10 +127,25 @@ "splits = text_splitter.split_documents(blog_docs)\n", "\n", "# Index\n", - "from langchain_openai import OpenAIEmbeddings\n", - "from langchain_community.vectorstores import Chroma\n", - "vectorstore = Chroma.from_documents(documents=splits, \n", - " embedding=OpenAIEmbeddings())\n", + "# from langchain_openai import OpenAIEmbeddings\n", + "# from langchain_community.vectorstores import Chroma\n", + "# vectorstore = Chroma.from_documents(documents=splits, \n", + "# embedding=OpenAIEmbeddings())\n", + "\n", + "from langchain_google_genai import GoogleGenerativeAIEmbeddings\n", + "\n", + "# If there is no environment variable set for the API key, you can pass the API\n", + "# key to the parameter `google_api_key` of the `GoogleGenerativeAIEmbeddings`\n", + "# function: `google_api_key = \"key\"`.\n", + "\n", + "gemini_embeddings = GoogleGenerativeAIEmbeddings(model=\"models/embedding-001\")\n", + "\n", + "# Save to disk\n", + "vectorstore = Chroma.from_documents(\n", + " documents=splits, # Data\n", + " embedding=gemini_embeddings, # Embedding model\n", + " persist_directory=\"./chroma_db\" # Directory to save data\n", + " )\n", "\n", "retriever = vectorstore.as_retriever()" ] @@ -140,12 +155,12 @@ "id": "76f1b6c5-faa9-404b-90c6-22d3b40169fa", "metadata": {}, "source": [ - "### Prompt" + "### Prompt\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "965de464-0c98-4318-9f9e-f8a597c8d5d6", "metadata": {}, "outputs": [], @@ -161,11 +176,12 @@ "prompt_perspectives = ChatPromptTemplate.from_template(template)\n", "\n", "from langchain_core.output_parsers import StrOutputParser\n", - "from langchain_openai import ChatOpenAI\n", + "# from langchain_openai import ChatOpenAI\n", + "from langchain_google_genai import ChatGoogleGenerativeAI\n", "\n", "generate_queries = (\n", " prompt_perspectives \n", - " | ChatOpenAI(temperature=0) \n", + " | ChatGoogleGenerativeAI(model=\"gemini-pro\",temperature=0.7, top_p=0.85)\n", " | StrOutputParser() \n", " | (lambda x: x.split(\"\\n\"))\n", ")" @@ -173,10 +189,29 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "4f253520-386f-434b-8daa-d6dadb89eddb", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\_api\\beta_decorator.py:87: LangChainBetaWarning: The function `loads` is in beta. It is actively being worked on, so the API may change.\n", + " warn_beta(\n" + ] + }, + { + "data": { + "text/plain": [ + "1" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "from langchain.load import dumps, loads\n", "\n", @@ -198,13 +233,48 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, + "id": "6051dc1e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Document(page_content='}\\n]\\nChallenges#\\nAfter going through key ideas and demos of building LLM-centered agents, I start to see a couple common limitations:', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'})]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "docs" + ] + }, + { + "cell_type": "code", + "execution_count": 8, "id": "af6e74e8-ddae-4165-9e4b-0022ac125194", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'The provided context does not mention anything about task decomposition for LLM agents, so I cannot answer this question from the provided context.'" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ + "\n", "from operator import itemgetter\n", - "from langchain_openai import ChatOpenAI\n", + "# from langchain_openai import ChatOpenAI\n", + "from langchain_google_genai import GoogleGenerativeAIEmbeddings\n", + "\n", "from langchain_core.runnables import RunnablePassthrough\n", "\n", "# RAG\n", @@ -217,7 +287,9 @@ "\n", "prompt = ChatPromptTemplate.from_template(template)\n", "\n", - "llm = ChatOpenAI(temperature=0)\n", + "# llm = ChatOpenAI(temperature=0)\n", + "llm = ChatGoogleGenerativeAI(model=\"gemini-pro\",\n", + " temperature=0.7, top_p=0.85)\n", "\n", "final_rag_chain = (\n", " {\"context\": retrieval_chain, \n", @@ -248,18 +320,18 @@ "\n", "Docs:\n", "\n", - "* https://github.com/langchain-ai/langchain/blob/master/cookbook/rag_fusion.ipynb?ref=blog.langchain.dev\n", + "- https://github.com/langchain-ai/langchain/blob/master/cookbook/rag_fusion.ipynb?ref=blog.langchain.dev\n", "\n", - "Blog / repo: \n", + "Blog / repo:\n", "\n", - "* https://towardsdatascience.com/forget-rag-the-future-is-rag-fusion-1147298d8ad1\n", + "- https://towardsdatascience.com/forget-rag-the-future-is-rag-fusion-1147298d8ad1\n", "\n", - "### Prompt" + "### Prompt\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "34e7075b-b80d-461d-9e2e-e05e29436f3e", "metadata": {}, "outputs": [], @@ -275,17 +347,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "9781b40c-c408-42f4-ae14-cd11be513b63", "metadata": {}, "outputs": [], "source": [ "from langchain_core.output_parsers import StrOutputParser\n", - "from langchain_openai import ChatOpenAI\n", + "# from langchain_openai import ChatOpenAI\n", + "from langchain_google_genai import ChatGoogleGenerativeAI\n", + "\n", "\n", "generate_queries = (\n", " prompt_rag_fusion \n", - " | ChatOpenAI(temperature=0)\n", + " # | ChatOpenAI(temperature=0)\n", + " | ChatGoogleGenerativeAI(model=\"gemini-pro\",temperature=0.7, top_p=0.85)\n", " | StrOutputParser() \n", " | (lambda x: x.split(\"\\n\"))\n", ")" @@ -293,10 +368,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "2b1adff1-e993-4747-b95d-656eaaeccfdd", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "2" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "from langchain.load import dumps, loads\n", "\n", @@ -337,10 +423,45 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, + "id": "d2655801", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[(Document(page_content='pytest\\ndataclasses', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " 0.19518647226209362),\n", + " (Document(page_content='Conversatin samples:\\n[\\n {\\n \"role\": \"system\",', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " 0.06506215742069787)]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "docs" + ] + }, + { + "cell_type": "code", + "execution_count": 13, "id": "ce2adf2d-3d9f-4d43-afb0-8304edcfb1f1", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'The provided context does not contain information about task decomposition for LLM agents, so I cannot answer this question from the provided context.'" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "from langchain_core.runnables import RunnablePassthrough\n", "\n", @@ -372,7 +493,7 @@ "source": [ "Trace:\n", "\n", - "https://smith.langchain.com/public/071202c9-9f4d-41b1-bf9d-86b7c5a7525b/r" + "https://smith.langchain.com/public/071202c9-9f4d-41b1-bf9d-86b7c5a7525b/r\n" ] }, { @@ -381,12 +502,12 @@ "id": "94c812d3-4d91-4634-8301-0b68be88a887", "metadata": {}, "source": [ - "## Part 7: Decomposition" + "## Part 7: Decomposition\n" ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 14, "id": "f82fac99-58dc-4bb9-84e6-51180db855ad", "metadata": {}, "outputs": [], @@ -396,23 +517,25 @@ "# Decomposition\n", "template = \"\"\"You are a helpful assistant that generates multiple sub-questions related to an input question. \\n\n", "The goal is to break down the input into a set of sub-problems / sub-questions that can be answers in isolation. \\n\n", - "Generate multiple search queries related to: {question} \\n\n", + "Generate multiple search queries related to:{question} \\n\n", "Output (3 queries):\"\"\"\n", "prompt_decomposition = ChatPromptTemplate.from_template(template)" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 15, "id": "c31eefd9-5598-44a1-b0d6-dd04553a3eb4", "metadata": {}, "outputs": [], "source": [ - "from langchain_openai import ChatOpenAI\n", + "# from langchain_openai import ChatOpenAI\n", "from langchain_core.output_parsers import StrOutputParser\n", "\n", "# LLM\n", - "llm = ChatOpenAI(temperature=0)\n", + "# llm = ChatOpenAI(temperature=0)\n", + "llm = ChatGoogleGenerativeAI(model=\"gemini-pro\",\n", + " temperature=0.7, top_p=0.85)\n", "\n", "# Chain\n", "generate_queries_decomposition = ( prompt_decomposition | llm | StrOutputParser() | (lambda x: x.split(\"\\n\")))\n", @@ -424,19 +547,19 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 16, "id": "07191b5c-cf72-4b8f-a225-f57dfdc2fc78", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "['1. What is LLM technology and how does it work in autonomous agent systems?',\n", - " '2. What are the specific components that make up an LLM-powered autonomous agent system?',\n", - " '3. How do the main components of an LLM-powered autonomous agent system interact with each other to enable autonomous functionality?']" + "['1. What are the different types of LLMs used in autonomous agent systems?',\n", + " '2. How do LLMs contribute to the autonomy of an agent system?',\n", + " '3. What are the key components of an autonomous agent system that leverage LLM capabilities?']" ] }, - "execution_count": 4, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -455,19 +578,19 @@ "id": "19543d04-ff31-4774-b89c-9d31f5a28fc9", "metadata": {}, "source": [ - "### Answer recursively \n", + "### Answer recursively\n", "\n", "![Screenshot 2024-02-18 at 1.55.32 PM.png](attachment:9a9685de-051f-48fa-b68f-2b1f85344cdf.png)\n", "\n", "Papers:\n", "\n", - "* https://arxiv.org/pdf/2205.10625.pdf\n", - "* https://arxiv.org/abs/2212.10509.pdf" + "- https://arxiv.org/pdf/2205.10625.pdf\n", + "- https://arxiv.org/abs/2212.10509.pdf\n" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 17, "id": "c72bbd12-f85c-4ed0-9dfa-8503afebfafa", "metadata": {}, "outputs": [], @@ -493,7 +616,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 18, "id": "a20bf0d4-f567-4451-834d-a07190a3185e", "metadata": {}, "outputs": [], @@ -509,8 +632,9 @@ " return formatted_string.strip()\n", "\n", "# llm\n", - "llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0)\n", - "\n", + "# llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0)\n", + "llm = ChatGoogleGenerativeAI(model=\"gemini-pro\",\n", + " temperature=0, top_p=0.85)\n", "q_a_pairs = \"\"\n", "for q in questions:\n", " \n", @@ -529,17 +653,17 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 19, "id": "e6070fea-ffcf-49ca-ac99-7d7ed2744d40", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'The main components of an LLM-powered autonomous agent system interact with each other in a coordinated manner to enable autonomous functionality. \\n\\n1. Planning: The agent breaks down complex tasks into smaller subgoals through task decomposition. This allows for efficient handling of tasks by breaking them down into manageable steps. The agent can also engage in self-reflection and refinement, learning from past actions and improving future results.\\n\\n2. Memory: The agent stores information and past experiences to inform future actions. This memory component helps the agent make informed decisions based on past experiences.\\n\\n3. External Classical Planner (LLM+P approach): In some cases, the agent may outsource long-horizon planning to an external classical planner using the Planning Domain Definition Language (PDDL) as an intermediate interface. This allows for more complex planning tasks to be handled efficiently.\\n\\n4. Self-Reflection (ReAct approach): The agent integrates reasoning and acting within LLM by extending the action space to include task-specific discrete actions and language prompts. This allows the agent to interact with the environment and generate reasoning traces in natural language. Self-reflection plays a crucial role in enabling the agent to improve iteratively by refining past actions and correcting mistakes.\\n\\nOverall, the interaction between these components enables the LLM-powered autonomous agent system to effectively plan, execute tasks, learn from experiences, and improve its performance over time, ultimately enabling autonomous functionality.'" + "'The provided context does not mention the key components of an autonomous agent system that leverage LLM capabilities, so I cannot answer this question from the provided context.'" ] }, - "execution_count": 9, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -553,13 +677,13 @@ "id": "c3e13879-bb8f-43be-acb8-3dc3c41dabb2", "metadata": {}, "source": [ - "Trace: \n", + "Trace:\n", "\n", "Question 1: https://smith.langchain.com/public/faefde73-0ecb-4328-8fee-a237904115c0/r\n", "\n", "Question 2: https://smith.langchain.com/public/6142cad3-b314-454e-b2c9-15146cfcce78/r\n", "\n", - "Question 3: https://smith.langchain.com/public/84bdca0f-0fa4-46d4-9f89-a7f25bd857fe/r" + "Question 3: https://smith.langchain.com/public/84bdca0f-0fa4-46d4-9f89-a7f25bd857fe/r\n" ] }, { @@ -572,17 +696,26 @@ "id": "eb0fa2e4-d4f1-42fc-a1ad-8eaeb05a0d3e", "metadata": {}, "source": [ - "### Answer individually \n", + "### Answer individually\n", "\n", - "![Screenshot 2024-02-18 at 2.00.59 PM.png](attachment:e24502d7-f641-4262-a326-da1636822fa2.png)" + "![Screenshot 2024-02-18 at 2.00.59 PM.png](attachment:e24502d7-f641-4262-a326-da1636822fa2.png)\n" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 20, "id": "297425fa-975b-4599-9b9e-a11139b99140", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "c:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\_api\\deprecation.py:119: LangChainDeprecationWarning: The method `BaseRetriever.get_relevant_documents` was deprecated in langchain-core 0.1.46 and will be removed in 0.3.0. Use invoke instead.\n", + " warn_deprecated(\n" + ] + } + ], "source": [ "# Answer each sub-question individually \n", "\n", @@ -622,17 +755,17 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 21, "id": "b8631dda-bbcd-437c-81b3-5db7abb831f9", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'The main components of an LLM-powered autonomous agent system include planning, subgoal decomposition, reflection and refinement, memory, task decomposition, and self-reflection. Planning involves breaking down tasks into manageable subgoals, while reflection allows for self-criticism and learning from past actions. Task decomposition can be done through simple prompting by LLM, task-specific instructions, or human inputs. These components work together to enable autonomous behavior by allowing the agent to plan, learn from mistakes, and improve its actions over time.'" + "'The provided context does not contain the information necessary to answer this question.'" ] }, - "execution_count": 11, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -667,6 +800,27 @@ "final_rag_chain.invoke({\"context\":context,\"question\":question})" ] }, + { + "cell_type": "code", + "execution_count": 22, + "id": "59ade089", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "ChatPromptTemplate(input_variables=['context', 'question'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template='Here is a set of Q+A pairs:\\n\\n{context}\\n\\nUse these to synthesize an answer to the question: {question}\\n'))])" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "prompt" + ] + }, { "cell_type": "markdown", "id": "17698863-e683-48f4-b50e-adaa2bdee55d", @@ -674,7 +828,7 @@ "source": [ "Trace:\n", "\n", - "https://smith.langchain.com/public/d8f26f75-3fb8-498a-a3a2-6532aa77f56b/r" + "https://smith.langchain.com/public/d8f26f75-3fb8-498a-a3a2-6532aa77f56b/r\n" ] }, { @@ -691,14 +845,14 @@ "\n", "![Screenshot 2024-02-12 at 1.14.43 PM.png](attachment:715e11dc-7730-4f51-8469-b7f0b299ac9e.png)\n", "\n", - "Paper: \n", + "Paper:\n", "\n", - "* https://arxiv.org/pdf/2310.06117.pdf" + "- https://arxiv.org/pdf/2310.06117.pdf\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "id": "1d74f9f2-543d-4e41-b90b-7bb527eca1d9", "metadata": {}, "outputs": [], @@ -742,22 +896,84 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "id": "5cba100d-167f-4392-8f58-88729d3e4ce9", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'what are the capabilities and limitations of LLM agents?'" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "generate_queries_step_back = prompt | ChatOpenAI(temperature=0) | StrOutputParser()\n", + "generate_queries_step_back = prompt | llm | StrOutputParser()\n", "question = \"What is task decomposition for LLM agents?\"\n", "generate_queries_step_back.invoke({\"question\": question})" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "id": "999445b0-d8a0-4208-9bb6-38610667a00b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "ChatGoogleGenerativeAIError", + "evalue": "Invalid argument provided to Gemini: 400 Developer instruction is not enabled for models/gemini-pro", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31m_InactiveRpcError\u001b[0m Traceback (most recent call last)", + "File \u001b[1;32mc:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\google\\api_core\\grpc_helpers.py:72\u001b[0m, in \u001b[0;36m_wrap_unary_errors..error_remapped_callable\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 71\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m---> 72\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mcallable_\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 73\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m grpc\u001b[38;5;241m.\u001b[39mRpcError \u001b[38;5;28;01mas\u001b[39;00m exc:\n", + "File \u001b[1;32mc:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\grpc\\_channel.py:1161\u001b[0m, in \u001b[0;36m_UnaryUnaryMultiCallable.__call__\u001b[1;34m(self, request, timeout, metadata, credentials, wait_for_ready, compression)\u001b[0m\n\u001b[0;32m 1155\u001b[0m (\n\u001b[0;32m 1156\u001b[0m state,\n\u001b[0;32m 1157\u001b[0m call,\n\u001b[0;32m 1158\u001b[0m ) \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_blocking(\n\u001b[0;32m 1159\u001b[0m request, timeout, metadata, credentials, wait_for_ready, compression\n\u001b[0;32m 1160\u001b[0m )\n\u001b[1;32m-> 1161\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_end_unary_response_blocking\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstate\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcall\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\n", + "File \u001b[1;32mc:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\grpc\\_channel.py:1004\u001b[0m, in \u001b[0;36m_end_unary_response_blocking\u001b[1;34m(state, call, with_call, deadline)\u001b[0m\n\u001b[0;32m 1003\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m-> 1004\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m _InactiveRpcError(state)\n", + "\u001b[1;31m_InactiveRpcError\u001b[0m: <_InactiveRpcError of RPC that terminated with:\n\tstatus = StatusCode.INVALID_ARGUMENT\n\tdetails = \"Developer instruction is not enabled for models/gemini-pro\"\n\tdebug_error_string = \"UNKNOWN:Error received from peer ipv4:172.217.20.202:443 {grpc_message:\"Developer instruction is not enabled for models/gemini-pro\", grpc_status:3, created_time:\"2024-05-11T01:53:28.2221793+00:00\"}\"\n>", + "\nThe above exception was the direct cause of the following exception:\n", + "\u001b[1;31mInvalidArgument\u001b[0m Traceback (most recent call last)", + "File \u001b[1;32mc:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_google_genai\\chat_models.py:153\u001b[0m, in \u001b[0;36m_chat_with_retry.._chat_with_retry\u001b[1;34m(**kwargs)\u001b[0m\n\u001b[0;32m 152\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 153\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mgeneration_method\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 154\u001b[0m \u001b[38;5;66;03m# Do not retry for these errors.\u001b[39;00m\n", + "File \u001b[1;32mc:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\google\\generativeai\\generative_models.py:474\u001b[0m, in \u001b[0;36mChatSession.send_message\u001b[1;34m(self, content, generation_config, safety_settings, stream, tools, tool_config)\u001b[0m\n\u001b[0;32m 472\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCan\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt chat with `candidate_count > 1`\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m--> 474\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate_content\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 475\u001b[0m \u001b[43m \u001b[49m\u001b[43mcontents\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhistory\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 476\u001b[0m \u001b[43m \u001b[49m\u001b[43mgeneration_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgeneration_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 477\u001b[0m \u001b[43m \u001b[49m\u001b[43msafety_settings\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msafety_settings\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 478\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 479\u001b[0m \u001b[43m \u001b[49m\u001b[43mtools\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtools_lib\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 480\u001b[0m \u001b[43m \u001b[49m\u001b[43mtool_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtool_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 481\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 483\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_response(response\u001b[38;5;241m=\u001b[39mresponse, stream\u001b[38;5;241m=\u001b[39mstream)\n", + "File \u001b[1;32mc:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\google\\generativeai\\generative_models.py:262\u001b[0m, in \u001b[0;36mGenerativeModel.generate_content\u001b[1;34m(self, contents, generation_config, safety_settings, stream, tools, tool_config, request_options)\u001b[0m\n\u001b[0;32m 261\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 262\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_client\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate_content\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 263\u001b[0m \u001b[43m \u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 264\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mrequest_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 265\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 266\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m generation_types\u001b[38;5;241m.\u001b[39mGenerateContentResponse\u001b[38;5;241m.\u001b[39mfrom_response(response)\n", + "File \u001b[1;32mc:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\google\\ai\\generativelanguage_v1beta\\services\\generative_service\\client.py:791\u001b[0m, in \u001b[0;36mGenerativeServiceClient.generate_content\u001b[1;34m(self, request, model, contents, retry, timeout, metadata)\u001b[0m\n\u001b[0;32m 790\u001b[0m \u001b[38;5;66;03m# Send the request.\u001b[39;00m\n\u001b[1;32m--> 791\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mrpc\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 792\u001b[0m \u001b[43m \u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 793\u001b[0m \u001b[43m \u001b[49m\u001b[43mretry\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretry\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 794\u001b[0m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 795\u001b[0m \u001b[43m \u001b[49m\u001b[43mmetadata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmetadata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 796\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 798\u001b[0m \u001b[38;5;66;03m# Done; return the response.\u001b[39;00m\n", + "File \u001b[1;32mc:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\google\\api_core\\gapic_v1\\method.py:113\u001b[0m, in \u001b[0;36m_GapicCallable.__call__\u001b[1;34m(self, timeout, retry, *args, **kwargs)\u001b[0m\n\u001b[0;32m 111\u001b[0m kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmetadata\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m metadata\n\u001b[1;32m--> 113\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mwrapped_func\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[1;32mc:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\google\\api_core\\retry.py:349\u001b[0m, in \u001b[0;36mRetry.__call__..retry_wrapped_func\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 346\u001b[0m sleep_generator \u001b[38;5;241m=\u001b[39m exponential_sleep_generator(\n\u001b[0;32m 347\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_initial, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_maximum, multiplier\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_multiplier\n\u001b[0;32m 348\u001b[0m )\n\u001b[1;32m--> 349\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mretry_target\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 350\u001b[0m \u001b[43m \u001b[49m\u001b[43mtarget\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 351\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_predicate\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 352\u001b[0m \u001b[43m \u001b[49m\u001b[43msleep_generator\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 353\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_timeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 354\u001b[0m \u001b[43m \u001b[49m\u001b[43mon_error\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mon_error\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 355\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[1;32mc:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\google\\api_core\\retry.py:191\u001b[0m, in \u001b[0;36mretry_target\u001b[1;34m(target, predicate, sleep_generator, timeout, on_error, **kwargs)\u001b[0m\n\u001b[0;32m 190\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 191\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mtarget\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 193\u001b[0m \u001b[38;5;66;03m# pylint: disable=broad-except\u001b[39;00m\n\u001b[0;32m 194\u001b[0m \u001b[38;5;66;03m# This function explicitly must deal with broad exceptions.\u001b[39;00m\n", + "File \u001b[1;32mc:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\google\\api_core\\timeout.py:120\u001b[0m, in \u001b[0;36mTimeToDeadlineTimeout.__call__..func_with_timeout\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 118\u001b[0m kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtimeout\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mmax\u001b[39m(\u001b[38;5;241m0\u001b[39m, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_timeout \u001b[38;5;241m-\u001b[39m time_since_first_attempt)\n\u001b[1;32m--> 120\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[1;32mc:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\google\\api_core\\grpc_helpers.py:74\u001b[0m, in \u001b[0;36m_wrap_unary_errors..error_remapped_callable\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 73\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m grpc\u001b[38;5;241m.\u001b[39mRpcError \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[1;32m---> 74\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m exceptions\u001b[38;5;241m.\u001b[39mfrom_grpc_error(exc) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mexc\u001b[39;00m\n", + "\u001b[1;31mInvalidArgument\u001b[0m: 400 Developer instruction is not enabled for models/gemini-pro", + "\nThe above exception was the direct cause of the following exception:\n", + "\u001b[1;31mChatGoogleGenerativeAIError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[25], line 25\u001b[0m\n\u001b[0;32m 9\u001b[0m response_prompt \u001b[38;5;241m=\u001b[39m ChatPromptTemplate\u001b[38;5;241m.\u001b[39mfrom_template(response_prompt_template)\n\u001b[0;32m 11\u001b[0m chain \u001b[38;5;241m=\u001b[39m (\n\u001b[0;32m 12\u001b[0m {\n\u001b[0;32m 13\u001b[0m \u001b[38;5;66;03m# Retrieve context using the normal question\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 22\u001b[0m \u001b[38;5;241m|\u001b[39m StrOutputParser()\n\u001b[0;32m 23\u001b[0m )\n\u001b[1;32m---> 25\u001b[0m \u001b[43mchain\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minvoke\u001b[49m\u001b[43m(\u001b[49m\u001b[43m{\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mquestion\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mquestion\u001b[49m\u001b[43m}\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[1;32mc:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\runnables\\base.py:2499\u001b[0m, in \u001b[0;36mRunnableSequence.invoke\u001b[1;34m(self, input, config)\u001b[0m\n\u001b[0;32m 2497\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m 2498\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i, step \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msteps):\n\u001b[1;32m-> 2499\u001b[0m \u001b[38;5;28minput\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[43mstep\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minvoke\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 2500\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2501\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# mark each step as a child run\u001b[39;49;00m\n\u001b[0;32m 2502\u001b[0m \u001b[43m \u001b[49m\u001b[43mpatch_config\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 2503\u001b[0m \u001b[43m \u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcallbacks\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrun_manager\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_child\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mseq:step:\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mi\u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[0;32m 2504\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2505\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 2506\u001b[0m \u001b[38;5;66;03m# finish the root run\u001b[39;00m\n\u001b[0;32m 2507\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n", + "File \u001b[1;32mc:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\runnables\\base.py:3142\u001b[0m, in \u001b[0;36mRunnableParallel.invoke\u001b[1;34m(self, input, config)\u001b[0m\n\u001b[0;32m 3129\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m get_executor_for_config(config) \u001b[38;5;28;01mas\u001b[39;00m executor:\n\u001b[0;32m 3130\u001b[0m futures \u001b[38;5;241m=\u001b[39m [\n\u001b[0;32m 3131\u001b[0m executor\u001b[38;5;241m.\u001b[39msubmit(\n\u001b[0;32m 3132\u001b[0m step\u001b[38;5;241m.\u001b[39minvoke,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 3140\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m key, step \u001b[38;5;129;01min\u001b[39;00m steps\u001b[38;5;241m.\u001b[39mitems()\n\u001b[0;32m 3141\u001b[0m ]\n\u001b[1;32m-> 3142\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[43m{\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mfuture\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresult\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfuture\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mzip\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43msteps\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfutures\u001b[49m\u001b[43m)\u001b[49m\u001b[43m}\u001b[49m\n\u001b[0;32m 3143\u001b[0m \u001b[38;5;66;03m# finish the root run\u001b[39;00m\n\u001b[0;32m 3144\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n", + "File \u001b[1;32mc:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\runnables\\base.py:3142\u001b[0m, in \u001b[0;36m\u001b[1;34m(.0)\u001b[0m\n\u001b[0;32m 3129\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m get_executor_for_config(config) \u001b[38;5;28;01mas\u001b[39;00m executor:\n\u001b[0;32m 3130\u001b[0m futures \u001b[38;5;241m=\u001b[39m [\n\u001b[0;32m 3131\u001b[0m executor\u001b[38;5;241m.\u001b[39msubmit(\n\u001b[0;32m 3132\u001b[0m step\u001b[38;5;241m.\u001b[39minvoke,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 3140\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m key, step \u001b[38;5;129;01min\u001b[39;00m steps\u001b[38;5;241m.\u001b[39mitems()\n\u001b[0;32m 3141\u001b[0m ]\n\u001b[1;32m-> 3142\u001b[0m output \u001b[38;5;241m=\u001b[39m {key: \u001b[43mfuture\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresult\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mfor\u001b[39;00m key, future \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mzip\u001b[39m(steps, futures)}\n\u001b[0;32m 3143\u001b[0m \u001b[38;5;66;03m# finish the root run\u001b[39;00m\n\u001b[0;32m 3144\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n", + "File \u001b[1;32mc:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\concurrent\\futures\\_base.py:449\u001b[0m, in \u001b[0;36mFuture.result\u001b[1;34m(self, timeout)\u001b[0m\n\u001b[0;32m 447\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m CancelledError()\n\u001b[0;32m 448\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_state \u001b[38;5;241m==\u001b[39m FINISHED:\n\u001b[1;32m--> 449\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m__get_result\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 451\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_condition\u001b[38;5;241m.\u001b[39mwait(timeout)\n\u001b[0;32m 453\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_state \u001b[38;5;129;01min\u001b[39;00m [CANCELLED, CANCELLED_AND_NOTIFIED]:\n", + "File \u001b[1;32mc:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\concurrent\\futures\\_base.py:401\u001b[0m, in \u001b[0;36mFuture.__get_result\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 399\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_exception:\n\u001b[0;32m 400\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 401\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_exception\n\u001b[0;32m 402\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[0;32m 403\u001b[0m \u001b[38;5;66;03m# Break a reference cycle with the exception in self._exception\u001b[39;00m\n\u001b[0;32m 404\u001b[0m \u001b[38;5;28mself\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n", + "File \u001b[1;32mc:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\concurrent\\futures\\thread.py:58\u001b[0m, in \u001b[0;36m_WorkItem.run\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 55\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m\n\u001b[0;32m 57\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m---> 58\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 59\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[0;32m 60\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfuture\u001b[38;5;241m.\u001b[39mset_exception(exc)\n", + "File \u001b[1;32mc:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\runnables\\base.py:2499\u001b[0m, in \u001b[0;36mRunnableSequence.invoke\u001b[1;34m(self, input, config)\u001b[0m\n\u001b[0;32m 2497\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m 2498\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i, step \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msteps):\n\u001b[1;32m-> 2499\u001b[0m \u001b[38;5;28minput\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[43mstep\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minvoke\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 2500\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2501\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# mark each step as a child run\u001b[39;49;00m\n\u001b[0;32m 2502\u001b[0m \u001b[43m \u001b[49m\u001b[43mpatch_config\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 2503\u001b[0m \u001b[43m \u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcallbacks\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrun_manager\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_child\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mseq:step:\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mi\u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[0;32m 2504\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2505\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 2506\u001b[0m \u001b[38;5;66;03m# finish the root run\u001b[39;00m\n\u001b[0;32m 2507\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n", + "File \u001b[1;32mc:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\language_models\\chat_models.py:158\u001b[0m, in \u001b[0;36mBaseChatModel.invoke\u001b[1;34m(self, input, config, stop, **kwargs)\u001b[0m\n\u001b[0;32m 147\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21minvoke\u001b[39m(\n\u001b[0;32m 148\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[0;32m 149\u001b[0m \u001b[38;5;28minput\u001b[39m: LanguageModelInput,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 153\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any,\n\u001b[0;32m 154\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m BaseMessage:\n\u001b[0;32m 155\u001b[0m config \u001b[38;5;241m=\u001b[39m ensure_config(config)\n\u001b[0;32m 156\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cast(\n\u001b[0;32m 157\u001b[0m ChatGeneration,\n\u001b[1;32m--> 158\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate_prompt\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 159\u001b[0m \u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_convert_input\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 160\u001b[0m \u001b[43m \u001b[49m\u001b[43mstop\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstop\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 161\u001b[0m \u001b[43m \u001b[49m\u001b[43mcallbacks\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcallbacks\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 162\u001b[0m \u001b[43m \u001b[49m\u001b[43mtags\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtags\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 163\u001b[0m \u001b[43m \u001b[49m\u001b[43mmetadata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmetadata\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 164\u001b[0m \u001b[43m \u001b[49m\u001b[43mrun_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrun_name\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 165\u001b[0m \u001b[43m \u001b[49m\u001b[43mrun_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpop\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrun_id\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 166\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 167\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mgenerations[\u001b[38;5;241m0\u001b[39m][\u001b[38;5;241m0\u001b[39m],\n\u001b[0;32m 168\u001b[0m )\u001b[38;5;241m.\u001b[39mmessage\n", + "File \u001b[1;32mc:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\language_models\\chat_models.py:560\u001b[0m, in \u001b[0;36mBaseChatModel.generate_prompt\u001b[1;34m(self, prompts, stop, callbacks, **kwargs)\u001b[0m\n\u001b[0;32m 552\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mgenerate_prompt\u001b[39m(\n\u001b[0;32m 553\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[0;32m 554\u001b[0m prompts: List[PromptValue],\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 557\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any,\n\u001b[0;32m 558\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m LLMResult:\n\u001b[0;32m 559\u001b[0m prompt_messages \u001b[38;5;241m=\u001b[39m [p\u001b[38;5;241m.\u001b[39mto_messages() \u001b[38;5;28;01mfor\u001b[39;00m p \u001b[38;5;129;01min\u001b[39;00m prompts]\n\u001b[1;32m--> 560\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mprompt_messages\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstop\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstop\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcallbacks\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcallbacks\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[1;32mc:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\language_models\\chat_models.py:421\u001b[0m, in \u001b[0;36mBaseChatModel.generate\u001b[1;34m(self, messages, stop, callbacks, tags, metadata, run_name, run_id, **kwargs)\u001b[0m\n\u001b[0;32m 419\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m run_managers:\n\u001b[0;32m 420\u001b[0m run_managers[i]\u001b[38;5;241m.\u001b[39mon_llm_error(e, response\u001b[38;5;241m=\u001b[39mLLMResult(generations\u001b[38;5;241m=\u001b[39m[]))\n\u001b[1;32m--> 421\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[0;32m 422\u001b[0m flattened_outputs \u001b[38;5;241m=\u001b[39m [\n\u001b[0;32m 423\u001b[0m LLMResult(generations\u001b[38;5;241m=\u001b[39m[res\u001b[38;5;241m.\u001b[39mgenerations], llm_output\u001b[38;5;241m=\u001b[39mres\u001b[38;5;241m.\u001b[39mllm_output) \u001b[38;5;66;03m# type: ignore[list-item]\u001b[39;00m\n\u001b[0;32m 424\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m res \u001b[38;5;129;01min\u001b[39;00m results\n\u001b[0;32m 425\u001b[0m ]\n\u001b[0;32m 426\u001b[0m llm_output \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_combine_llm_outputs([res\u001b[38;5;241m.\u001b[39mllm_output \u001b[38;5;28;01mfor\u001b[39;00m res \u001b[38;5;129;01min\u001b[39;00m results])\n", + "File \u001b[1;32mc:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\language_models\\chat_models.py:411\u001b[0m, in \u001b[0;36mBaseChatModel.generate\u001b[1;34m(self, messages, stop, callbacks, tags, metadata, run_name, run_id, **kwargs)\u001b[0m\n\u001b[0;32m 408\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i, m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(messages):\n\u001b[0;32m 409\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m 410\u001b[0m results\u001b[38;5;241m.\u001b[39mappend(\n\u001b[1;32m--> 411\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_generate_with_cache\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 412\u001b[0m \u001b[43m \u001b[49m\u001b[43mm\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 413\u001b[0m \u001b[43m \u001b[49m\u001b[43mstop\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstop\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 414\u001b[0m \u001b[43m \u001b[49m\u001b[43mrun_manager\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrun_managers\u001b[49m\u001b[43m[\u001b[49m\u001b[43mi\u001b[49m\u001b[43m]\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mrun_managers\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[0;32m 415\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 416\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 417\u001b[0m )\n\u001b[0;32m 418\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m 419\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m run_managers:\n", + "File \u001b[1;32mc:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_core\\language_models\\chat_models.py:632\u001b[0m, in \u001b[0;36mBaseChatModel._generate_with_cache\u001b[1;34m(self, messages, stop, run_manager, **kwargs)\u001b[0m\n\u001b[0;32m 630\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 631\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m inspect\u001b[38;5;241m.\u001b[39msignature(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_generate)\u001b[38;5;241m.\u001b[39mparameters\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrun_manager\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[1;32m--> 632\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_generate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 633\u001b[0m \u001b[43m \u001b[49m\u001b[43mmessages\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstop\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstop\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrun_manager\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrun_manager\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\n\u001b[0;32m 634\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 635\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 636\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_generate(messages, stop\u001b[38;5;241m=\u001b[39mstop, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n", + "File \u001b[1;32mc:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_google_genai\\chat_models.py:666\u001b[0m, in \u001b[0;36mChatGoogleGenerativeAI._generate\u001b[1;34m(self, messages, stop, run_manager, **kwargs)\u001b[0m\n\u001b[0;32m 654\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_generate\u001b[39m(\n\u001b[0;32m 655\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[0;32m 656\u001b[0m messages: List[BaseMessage],\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 659\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any,\n\u001b[0;32m 660\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m ChatResult:\n\u001b[0;32m 661\u001b[0m params, chat, message \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_prepare_chat(\n\u001b[0;32m 662\u001b[0m messages,\n\u001b[0;32m 663\u001b[0m stop\u001b[38;5;241m=\u001b[39mstop,\n\u001b[0;32m 664\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[0;32m 665\u001b[0m )\n\u001b[1;32m--> 666\u001b[0m response: genai\u001b[38;5;241m.\u001b[39mtypes\u001b[38;5;241m.\u001b[39mGenerateContentResponse \u001b[38;5;241m=\u001b[39m \u001b[43m_chat_with_retry\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 667\u001b[0m \u001b[43m \u001b[49m\u001b[43mcontent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmessage\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 668\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 669\u001b[0m \u001b[43m \u001b[49m\u001b[43mgeneration_method\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchat\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msend_message\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 670\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 671\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _response_to_result(response)\n", + "File \u001b[1;32mc:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_google_genai\\chat_models.py:171\u001b[0m, in \u001b[0;36m_chat_with_retry\u001b[1;34m(generation_method, **kwargs)\u001b[0m\n\u001b[0;32m 168\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m 169\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m--> 171\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_chat_with_retry\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[1;32mc:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\tenacity\\__init__.py:289\u001b[0m, in \u001b[0;36mBaseRetrying.wraps..wrapped_f\u001b[1;34m(*args, **kw)\u001b[0m\n\u001b[0;32m 287\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(f)\n\u001b[0;32m 288\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrapped_f\u001b[39m(\u001b[38;5;241m*\u001b[39margs: t\u001b[38;5;241m.\u001b[39mAny, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkw: t\u001b[38;5;241m.\u001b[39mAny) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m t\u001b[38;5;241m.\u001b[39mAny:\n\u001b[1;32m--> 289\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkw\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[1;32mc:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\tenacity\\__init__.py:379\u001b[0m, in \u001b[0;36mRetrying.__call__\u001b[1;34m(self, fn, *args, **kwargs)\u001b[0m\n\u001b[0;32m 377\u001b[0m retry_state \u001b[38;5;241m=\u001b[39m RetryCallState(retry_object\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m, fn\u001b[38;5;241m=\u001b[39mfn, args\u001b[38;5;241m=\u001b[39margs, kwargs\u001b[38;5;241m=\u001b[39mkwargs)\n\u001b[0;32m 378\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[1;32m--> 379\u001b[0m do \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43miter\u001b[49m\u001b[43m(\u001b[49m\u001b[43mretry_state\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretry_state\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 380\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(do, DoAttempt):\n\u001b[0;32m 381\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n", + "File \u001b[1;32mc:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\tenacity\\__init__.py:314\u001b[0m, in \u001b[0;36mBaseRetrying.iter\u001b[1;34m(self, retry_state)\u001b[0m\n\u001b[0;32m 312\u001b[0m is_explicit_retry \u001b[38;5;241m=\u001b[39m fut\u001b[38;5;241m.\u001b[39mfailed \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(fut\u001b[38;5;241m.\u001b[39mexception(), TryAgain)\n\u001b[0;32m 313\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (is_explicit_retry \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mretry(retry_state)):\n\u001b[1;32m--> 314\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfut\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresult\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 316\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mafter \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 317\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mafter(retry_state)\n", + "File \u001b[1;32mc:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\concurrent\\futures\\_base.py:449\u001b[0m, in \u001b[0;36mFuture.result\u001b[1;34m(self, timeout)\u001b[0m\n\u001b[0;32m 447\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m CancelledError()\n\u001b[0;32m 448\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_state \u001b[38;5;241m==\u001b[39m FINISHED:\n\u001b[1;32m--> 449\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m__get_result\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 451\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_condition\u001b[38;5;241m.\u001b[39mwait(timeout)\n\u001b[0;32m 453\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_state \u001b[38;5;129;01min\u001b[39;00m [CANCELLED, CANCELLED_AND_NOTIFIED]:\n", + "File \u001b[1;32mc:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\concurrent\\futures\\_base.py:401\u001b[0m, in \u001b[0;36mFuture.__get_result\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 399\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_exception:\n\u001b[0;32m 400\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 401\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_exception\n\u001b[0;32m 402\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[0;32m 403\u001b[0m \u001b[38;5;66;03m# Break a reference cycle with the exception in self._exception\u001b[39;00m\n\u001b[0;32m 404\u001b[0m \u001b[38;5;28mself\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n", + "File \u001b[1;32mc:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\tenacity\\__init__.py:382\u001b[0m, in \u001b[0;36mRetrying.__call__\u001b[1;34m(self, fn, *args, **kwargs)\u001b[0m\n\u001b[0;32m 380\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(do, DoAttempt):\n\u001b[0;32m 381\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 382\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 383\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m: \u001b[38;5;66;03m# noqa: B902\u001b[39;00m\n\u001b[0;32m 384\u001b[0m retry_state\u001b[38;5;241m.\u001b[39mset_exception(sys\u001b[38;5;241m.\u001b[39mexc_info()) \u001b[38;5;66;03m# type: ignore[arg-type]\u001b[39;00m\n", + "File \u001b[1;32mc:\\Users\\Acer\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\langchain_google_genai\\chat_models.py:165\u001b[0m, in \u001b[0;36m_chat_with_retry.._chat_with_retry\u001b[1;34m(**kwargs)\u001b[0m\n\u001b[0;32m 162\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(error_msg)\n\u001b[0;32m 164\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m google\u001b[38;5;241m.\u001b[39mapi_core\u001b[38;5;241m.\u001b[39mexceptions\u001b[38;5;241m.\u001b[39mInvalidArgument \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m--> 165\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m ChatGoogleGenerativeAIError(\n\u001b[0;32m 166\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mInvalid argument provided to Gemini: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00me\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 167\u001b[0m ) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01me\u001b[39;00m\n\u001b[0;32m 168\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m 169\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n", + "\u001b[1;31mChatGoogleGenerativeAIError\u001b[0m: Invalid argument provided to Gemini: 400 Developer instruction is not enabled for models/gemini-pro" + ] + } + ], "source": [ "# Response prompt \n", "response_prompt_template = \"\"\"You are an expert of world knowledge. I am going to ask you a question. Your response should be comprehensive and not contradicted with the following context if they are relevant. Otherwise, ignore them if they are not relevant.\n", @@ -779,13 +995,34 @@ " \"question\": lambda x: x[\"question\"],\n", " }\n", " | response_prompt\n", - " | ChatOpenAI(temperature=0)\n", + " | llm\n", " | StrOutputParser()\n", ")\n", "\n", "chain.invoke({\"question\": question})" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "2e57ee60", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "ChatPromptTemplate(input_variables=['normal_context', 'question', 'step_back_context'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['normal_context', 'question', 'step_back_context'], template='You are an expert of world knowledge. I am going to ask you a question. Your response should be comprehensive and not contradicted with the following context if they are relevant. Otherwise, ignore them if they are not relevant.\\n\\n# {normal_context}\\n# {step_back_context}\\n\\n# Original Question: {question}\\n# Answer:'))])" + ] + }, + "execution_count": 110, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "response_prompt" + ] + }, { "attachments": { "1982149e-720b-426e-a1ab-8d96f6616b5a.png": { @@ -800,13 +1037,13 @@ "\n", "![Screenshot 2024-02-12 at 1.12.45 PM.png](attachment:1982149e-720b-426e-a1ab-8d96f6616b5a.png)\n", "\n", - "Docs: \n", + "Docs:\n", "\n", - "* https://github.com/langchain-ai/langchain/blob/master/cookbook/hypothetical_document_embeddings.ipynb\n", + "- https://github.com/langchain-ai/langchain/blob/master/cookbook/hypothetical_document_embeddings.ipynb\n", "\n", "Paper:\n", "\n", - "* https://arxiv.org/abs/2212.10496" + "- https://arxiv.org/abs/2212.10496\n" ] }, { @@ -814,7 +1051,18 @@ "execution_count": null, "id": "c2902575-bbbb-41a9-835b-9a24dc08261b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'**Task Decomposition for Large Language Models (LLMs)**\\n\\n**Introduction**\\nLarge language models (LLMs) are advanced artificial intelligence models that can process and generate text. However, LLMs often struggle with complex or multi-step tasks. Task decomposition is a technique used to break down complex tasks into smaller, more manageable subtasks that LLMs can handle more effectively.\\n\\n**Benefits of Task Decomposition**\\nTask decomposition offers several benefits for LLMs, including:\\n\\n* **Improved performance:** Breaking down tasks into smaller steps allows LLMs to focus on specific aspects of the task, leading to more accurate and efficient results.\\n* **Reduced cognitive load:** By decomposing tasks, LLMs can avoid overloading their memory and processing capabilities, enabling them to handle more complex problems.\\n* **Increased explainability:** Decomposed tasks are easier to understand and analyze, which helps in debugging and improving the overall performance of LLMs.\\n\\n**Methods of Task Decomposition**\\nVarious methods can be used for task decomposition in LLMs:\\n\\n* **Hierarchical decomposition:** Dividing tasks into a hierarchy of subtasks, where each subtask is a step towards completing the overall task.\\n* **Functional decomposition:** Breaking tasks down based on their functionality, identifying distinct components that perform specific tasks.\\n* **Data-driven decomposition:** Using data analysis techniques to identify natural subtasks within a task.\\n\\n**Examples of Task Decomposition**\\nTask decomposition can be applied to various tasks in LLMs, such as:\\n\\n* **Question answering:** Decomposing a question into subtasks of identifying key concepts, searching for relevant information, and generating a response.\\n* **Natural language generation:** Breaking down a text generation task into subtasks of idea generation, sentence planning, and language production.\\n* **Translation:** Decomposing a translation task into subtasks of understanding the source language, generating a target language representation, and producing the final translation.\\n\\n**Conclusion**\\nTask decomposition is a crucial technique for enhancing the performance of LLMs in complex tasks. By breaking down tasks into smaller, more manageable subtasks, LLMs can achieve better accuracy, efficiency, and explainability. Ongoing research in this area aims to develop more sophisticated decomposition methods and explore their applications in a wide range of LLM-based tasks.'" + ] + }, + "execution_count": 114, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "from langchain.prompts import ChatPromptTemplate\n", "\n", @@ -828,7 +1076,7 @@ "from langchain_openai import ChatOpenAI\n", "\n", "generate_docs_for_retrieval = (\n", - " prompt_hyde | ChatOpenAI(temperature=0) | StrOutputParser() \n", + " prompt_hyde | ChatGoogleGenerativeAI(model=\"gemini-pro\") | StrOutputParser() \n", ")\n", "\n", "# Run\n", @@ -841,7 +1089,21 @@ "execution_count": null, "id": "d47587bb-23db-42a0-b087-beef9e95308b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "[Document(page_content='Fig. 1. Overview of a LLM-powered autonomous agent system.\\nComponent One: Planning#\\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\\nTask Decomposition#\\nChain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.\\nTree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.\\nTask decomposition can be done (1) by LLM with simple prompting like \"Steps for XYZ.\\\\n1.\", \"What are the subgoals for achieving XYZ?\", (2) by using task-specific instructions; e.g. \"Write a story outline.\" for writing a novel, or (3) with human inputs.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='Fig. 1. Overview of a LLM-powered autonomous agent system.\\nComponent One: Planning#\\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\\nTask Decomposition#\\nChain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.\\nTree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.\\nTask decomposition can be done (1) by LLM with simple prompting like \"Steps for XYZ.\\\\n1.\", \"What are the subgoals for achieving XYZ?\", (2) by using task-specific instructions; e.g. \"Write a story outline.\" for writing a novel, or (3) with human inputs.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='LLM Powered Autonomous Agents\\n \\nDate: June 23, 2023 | Estimated Reading Time: 31 min | Author: Lilian Weng\\n\\n\\nBuilding agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\\nAgent System Overview#\\nIn a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:\\n\\nPlanning\\n\\nSubgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\\nReflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refine them for future steps, thereby improving the quality of final results.\\n\\n\\nMemory', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", + " Document(page_content='LLM Powered Autonomous Agents\\n \\nDate: June 23, 2023 | Estimated Reading Time: 31 min | Author: Lilian Weng\\n\\n\\nBuilding agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\\nAgent System Overview#\\nIn a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:\\n\\nPlanning\\n\\nSubgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\\nReflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refine them for future steps, thereby improving the quality of final results.\\n\\n\\nMemory', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'})]" + ] + }, + "execution_count": 115, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Retrieve\n", "retrieval_chain = generate_docs_for_retrieval | retriever \n", @@ -854,7 +1116,18 @@ "execution_count": null, "id": "604fcc36-a1d7-4096-99b5-50db30950fc5", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'Task decomposition for LLM agents involves breaking down large tasks into smaller, manageable subgoals. This enables the agent to handle complex tasks more efficiently. Task decomposition can be done using simple prompting with LLM, using task-specific instructions, or with human inputs.'" + ] + }, + "execution_count": 117, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# RAG\n", "template = \"\"\"Answer the following question based on this context:\n", @@ -868,7 +1141,7 @@ "\n", "final_rag_chain = (\n", " prompt\n", - " | llm\n", + " | ChatGoogleGenerativeAI(model=\"gemini-pro\")\n", " | StrOutputParser()\n", ")\n", "\n", @@ -900,7 +1173,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.8" + "version": "3.11.4" } }, "nbformat": 4,