From 8c52e9dc906f89e9edc59bcd8edb2ef9af5e0da0 Mon Sep 17 00:00:00 2001
From: Jen Hamon <jhamon@pinecone.io>
Date: Thu, 29 Jan 2026 07:48:59 -0500
Subject: [PATCH 1/8] chore: modernize Pinecone SDK in
 gpt-4-langchain-docs.ipynb

Update pinecone-client to pinecone package for SDK v8 compatibility.
The notebook already uses modern SDK patterns.

Closes SDK-173
---
 .../openai/gpt-4-langchain-docs.ipynb         | 4283 ++++++++---------
 1 file changed, 2126 insertions(+), 2157 deletions(-)

diff --git a/learn/generation/openai/gpt-4-langchain-docs.ipynb b/learn/generation/openai/gpt-4-langchain-docs.ipynb
index fcc7aef9..74393bf8 100644
--- a/learn/generation/openai/gpt-4-langchain-docs.ipynb
+++ b/learn/generation/openai/gpt-4-langchain-docs.ipynb
@@ -1,2238 +1,2207 @@
 {
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "GFLLl1Agum8O"
-      },
-      "source": [
-        "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pinecone-io/examples/blob/master/learn/generation/openai/gpt-4-langchain-docs.ipynb) [![Open nbviewer](https://raw.githubusercontent.com/pinecone-io/examples/master/assets/nbviewer-shield.svg)](https://nbviewer.org/github/pinecone-io/examples/blob/master/learn/generation/openai/gpt-4-langchain-docs.ipynb)\n",
-        "\n",
-        "# GPT4 with Retrieval Augmentation over LangChain Docs\n",
-        "\n",
-        "[![Open nbviewer](https://raw.githubusercontent.com/pinecone-io/examples/master/assets/fast-link.svg)](https://github.com/pinecone-io/examples/blob/master/docs/gpt-4-langchain-docs.ipynb)\n",
-        "\n",
-        "In this notebook we'll work through an example of using GPT-4 with retrieval augmentation to answer questions about the LangChain Python library."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 1,
-      "metadata": {
-        "id": "_HDKlQO5svqI"
-      },
-      "outputs": [],
-      "source": [
-        "!pip install -qU \\\n",
-        "  tiktoken==0.4.0 \\\n",
-        "  openai==0.27.7 \\\n",
-        "  langchain==0.0.179 \\\n",
-        "  pinecone-client==3.1.0 \\\n",
-        "  datasets==2.13.1"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "7c1EpQ-jq7SU"
-      },
-      "source": [
-        "---\n",
-        "\n",
-        "\ud83d\udea8 _Note: the above `pip install` is formatted for Jupyter notebooks. If running elsewhere you may need to drop the `!`._\n",
-        "\n",
-        "---"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "NgUEJ6vDum8q"
-      },
-      "source": [
-        "In this example, we will download the LangChain docs, we can find a static version of the docs on Hugging Face datasets in `jamescalam/langchain-docs-23-06-27`. To download them we do:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 2,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 237,
-          "referenced_widgets": [
-            "63de2154fea24b49a87bf4b8428fa630",
-            "4b4cfb1a834342198c75a02d28448b57",
-            "a9d471008dc34f67a5307bbb26d6123c",
-            "580e5dd4c9d9497caa40802d5918e75c",
-            "bd09981e486d461eaa2cf166b32921e1",
-            "bed2dd81769b4910831cb34a7b475c72",
-            "ccad7c2aec604ee29b41497ec0f37fa7",
-            "390f06d63dd547d395dcf18f1ebe265d",
-            "6545006e51824be9b6cb5cdb2cb2ba5a",
-            "241b0de59e53465f8acad4ac74b17b57",
-            "05199362d95449699254c45c1d5cee94",
-            "6881722e02fe4395a5fcaf668cb7ebcb",
-            "2b960a7f46444ad3bd3392517b415f2d",
-            "a3e8499ed740449586ca31500038c7a8",
-            "08c52a0369b74e7da99574ec29612189",
-            "ffb822b2f739434dbe99e8a992716c30",
-            "7e2b88be1cae49da824e6c6c0782cb50",
-            "9f4e9da63bb64d279ded5ee1730b5cba",
-            "3b319c7a4f6f41ea9ea6e6268cd29343",
-            "908935a03fea42efbded99cd81de54c5",
-            "dd3ece4c242d4eae946f8bc4f95d1dbf",
-            "ae71cc7e26ee4b51b7eb67520f66c9bd",
-            "d83b0b3089c34bb58ddb1272a240c2f9",
-            "34d21f61f6dc499a9d1504634e470bdd",
-            "64aae9675d394df48d233b31e5f0eb3c",
-            "d1d3dde6ec3b483f8b14139a7d6a9ae0",
-            "690ca50e9785402bb17fa266f8e40ea9",
-            "482f891d61ab4c2080d95a9b84ea5c6d",
-            "622987b045e74a13b79553d3d062e72a",
-            "6c7236b0655e4397b3a9d5f4d83c03fe",
-            "6f7e876e10fd4c58aa2d1f1ed4ff2762",
-            "9a8b01998f8a4c6bb0bfe71e02b3352c",
-            "ec224feb9828415eb018831e985d22c0",
-            "a532b2307c734cf188092d40299c40ad",
-            "fab781bfae4647968aa69f19ae6a5754",
-            "5961b9e44ce14a2a8eb65a9e5b6be90d",
-            "5f15e4b12305489180e54c61769dcebe",
-            "324465ed674740c2a18a88a2633f2093",
-            "f82b21e87eba4e06a0531c791dc09b3f",
-            "5c0bb7407c844ae19479416752f66190",
-            "5ef6d125261b49679dcb4d886b3e382c",
-            "294d5fc4fa1e40429e08137934481ba2",
-            "f5d992e8c1224879be5e5464a424a3a4",
-            "7e828bf7b91e4029bc2093876128a78b"
-          ]
-        },
-        "id": "xo9gYhGPr_DQ",
-        "outputId": "016b896d-87a6-4d17-bad1-027475510a8b"
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Downloading and preparing dataset json/jamescalam--langchain-docs-23-06-27 to /root/.cache/huggingface/datasets/jamescalam___json/jamescalam--langchain-docs-23-06-27-4631410d07444b03/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96...\n"
-          ]
-        },
-        {
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "63de2154fea24b49a87bf4b8428fa630",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "6881722e02fe4395a5fcaf668cb7ebcb",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "Downloading data:   0%|          | 0.00/4.68M [00:00<?, ?B/s]"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "d83b0b3089c34bb58ddb1272a240c2f9",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "a532b2307c734cf188092d40299c40ad",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "Generating train split: 0 examples [00:00, ? examples/s]"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Dataset json downloaded and prepared to /root/.cache/huggingface/datasets/jamescalam___json/jamescalam--langchain-docs-23-06-27-4631410d07444b03/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96. Subsequent calls will reuse this data.\n"
-          ]
-        },
-        {
-          "data": {
-            "text/plain": [
-              "Dataset({\n",
-              "    features: ['id', 'text', 'url'],\n",
-              "    num_rows: 505\n",
-              "})"
-            ]
-          },
-          "execution_count": 2,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "from datasets import load_dataset\n",
-        "\n",
-        "docs = load_dataset('jamescalam/langchain-docs-23-06-27', split='train')\n",
-        "docs"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "ahFEI4U3vdxV"
-      },
-      "source": [
-        "This leaves us with `505` doc pages. Let's take a look at the format each one contains:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 3,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 52
-        },
-        "id": "BJuef8z1vfz4",
-        "outputId": "6c62ecbe-cf82-475d-b39f-d97ff02422fe"
-      },
-      "outputs": [
-        {
-          "data": {
-            "application/vnd.google.colaboratory.intrinsic+json": {
-              "type": "string"
-            },
-            "text/plain": [
-              "'Example Selector\\uf0c1\\nLogic for selecting examples to include in prompts.\\nclass langchain.prompts.example_selector.LengthBasedExampleSelector(*, examples, example_prompt, get_text_length=<function _get_le'"
-            ]
-          },
-          "execution_count": 3,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "docs[20]['text'][:200]"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "jNfppr8fvhOX"
-      },
-      "source": [
-        "We access the plaintext page content like so:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 4,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "vfdQLriyvjDk",
-        "outputId": "b7644566-ac2d-4dcf-dab3-1736191d357a"
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Example Selector\uf0c1\n",
-            "Logic for selecting examples to include in prompts.\n",
-            "class langchain.prompts.example_selector.LengthBasedExampleSelector(*, examples, example_prompt, get_text_length=<function _get_le\n"
-          ]
-        }
-      ],
-      "source": [
-        "print(docs[20]['text'][:200])"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "r-mxgm-6vo9s"
-      },
-      "source": [
-        "We can also find the source of each document:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 5,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 35
-        },
-        "id": "NGUGao9_uNH3",
-        "outputId": "f3efbe72-d10c-4223-dd21-f38d02ad96d5"
-      },
-      "outputs": [
-        {
-          "data": {
-            "application/vnd.google.colaboratory.intrinsic+json": {
-              "type": "string"
-            },
-            "text/plain": [
-              "'https://api.python.langchain.com/en/latest/modules/example_selector.html'"
-            ]
-          },
-          "execution_count": 5,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "docs[20]['url']"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "ouY4rcx7z2oa"
-      },
-      "source": [
-        "Now let's see how we can process all of these. We will chunk everything into ~500 token chunks, we can do this easily with `langchain` and `tiktoken`:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 6,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 35
-        },
-        "id": "Rb7KxUqYzsuV",
-        "outputId": "af54a189-b2e5-4b6d-9752-e74edd01b5e4"
-      },
-      "outputs": [
-        {
-          "data": {
-            "application/vnd.google.colaboratory.intrinsic+json": {
-              "type": "string"
-            },
-            "text/plain": [
-              "'cl100k_base'"
-            ]
-          },
-          "execution_count": 6,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "import tiktoken\n",
-        "\n",
-        "tokenizer_name = tiktoken.encoding_for_model('gpt-4')\n",
-        "tokenizer_name.name"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 7,
-      "metadata": {
-        "id": "N635Sgsbx_ME"
-      },
-      "outputs": [],
-      "source": [
-        "tokenizer = tiktoken.get_encoding(tokenizer_name.name)\n",
-        "\n",
-        "# create the length function\n",
-        "def tiktoken_len(text):\n",
-        "    tokens = tokenizer.encode(\n",
-        "        text,\n",
-        "        disallowed_special=()\n",
-        "    )\n",
-        "    return len(tokens)"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 8,
-      "metadata": {
-        "id": "OKO8e3Dp0dQS"
-      },
-      "outputs": [],
-      "source": [
-        "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
-        "\n",
-        "text_splitter = RecursiveCharacterTextSplitter(\n",
-        "    chunk_size=500,\n",
-        "    chunk_overlap=20,\n",
-        "    length_function=tiktoken_len,\n",
-        "    separators=[\"\\n\\n\", \"\\n\", \" \", \"\"]\n",
-        ")"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "bLdvW8eq06Zd"
-      },
-      "source": [
-        "Process the `docs` into more chunks using this approach."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 10,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 66,
-          "referenced_widgets": [
-            "01296cac12234000a13bdca80b31ba8b",
-            "930601ee00454f71b1114c4aaff0175b",
-            "e976d05935374e47b86773ca852cfa9e",
-            "bf9b29814dd04a22a7ff4ca1c6160c21",
-            "6d110cd070fe4776b9449de74759dff3",
-            "d670714b504847e3b72cd84510219ec7",
-            "037869180d9d4b1eb1bdbed67337e349",
-            "894a9b32ecc3404eb1213a8fa9ea38e2",
-            "5b14b2d018c74766954d580853eae7fc",
-            "41920d8d2aa44511814576dab37d96e7",
-            "d4c5704e6136468b910684e418074271"
-          ]
-        },
-        "id": "uOdPyiAQ0uWs",
-        "outputId": "a36d52b2-810d-4422-cc82-105af8d1c83b"
-      },
-      "outputs": [
-        {
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "01296cac12234000a13bdca80b31ba8b",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "  0%|          | 0/505 [00:00<?, ?it/s]"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "text/plain": [
-              "2482"
-            ]
-          },
-          "execution_count": 10,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "from typing_extensions import Concatenate\n",
-        "from uuid import uuid4\n",
-        "from tqdm.auto import tqdm\n",
-        "\n",
-        "chunks = []\n",
-        "\n",
-        "for page in tqdm(docs):\n",
-        "    if len(page['text']) < 200:\n",
-        "        # if page content is short we can skip\n",
-        "        continue\n",
-        "    texts = text_splitter.split_text(page['text'])\n",
-        "    chunks.extend([{\n",
-        "        'id': page['id'] + f'-{i}',\n",
-        "        'text': texts[i],\n",
-        "        'url': page['url'],\n",
-        "        'chunk': i\n",
-        "    } for i in range(len(texts))])\n",
-        "len(chunks)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "JegURaAg2PuN"
-      },
-      "source": [
-        "Our chunks are ready so now we move onto embedding and indexing everything."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "zGIZbQqJ2WBh"
-      },
-      "source": [
-        "## Initialize Embedding Model\n",
-        "\n",
-        "We use `text-embedding-ada-002` as the embedding model. We can embed text like so:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 11,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "p0U9_7Fium8u",
-        "outputId": "e2b285d4-dbde-4b2b-8624-bb515f6206cf"
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "<OpenAIObject list at 0x7fc82e74dee0> JSON: {\n",
-              "  \"data\": [\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"whisper-1\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-internal\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"babbage\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"davinci\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"text-davinci-edit-001\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"babbage-code-search-code\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"text-similarity-babbage-001\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"text-embedding-ada-002\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-internal\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"code-davinci-edit-001\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"text-davinci-001\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"ada\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"babbage-code-search-text\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"babbage-similarity\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"code-search-babbage-text-001\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"text-curie-001\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"gpt-4-0314\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"gpt-4-0613\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"code-search-babbage-code-001\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"text-ada-001\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"text-similarity-ada-001\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"curie-instruct-beta\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"gpt-4\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"ada-code-search-code\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"ada-similarity\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"code-search-ada-text-001\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"text-search-ada-query-001\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"davinci-search-document\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"ada-code-search-text\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"text-search-ada-doc-001\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"davinci-instruct-beta\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"text-similarity-curie-001\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"code-search-ada-code-001\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"ada-search-query\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"text-search-davinci-query-001\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"curie-search-query\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"davinci-search-query\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"babbage-search-document\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"ada-search-document\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"text-search-curie-query-001\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"text-search-babbage-doc-001\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"curie-search-document\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"text-search-curie-doc-001\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"babbage-search-query\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"text-babbage-001\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"text-search-davinci-doc-001\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"text-search-babbage-query-001\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"curie-similarity\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"gpt-3.5-turbo-0613\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"curie\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"gpt-3.5-turbo-16k-0613\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"text-similarity-davinci-001\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"text-davinci-002\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"gpt-3.5-turbo-0301\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"text-davinci-003\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-internal\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"davinci-similarity\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"gpt-3.5-turbo\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"gpt-3.5-turbo-16k\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-internal\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    }\n",
-              "  ],\n",
-              "  \"object\": \"list\"\n",
-              "}"
-            ]
-          },
-          "execution_count": 11,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "import os\n",
-        "import openai\n",
-        "\n",
-        "# get API key from top-right dropdown on OpenAI website\n",
-        "openai.api_key = os.getenv(\"OPENAI_API_KEY\") or \"OPENAI_API_KEY\"\n",
-        "\n",
-        "openai.Engine.list()  # check we have authenticated"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 12,
-      "metadata": {
-        "id": "kteZ69Z5M55S"
-      },
-      "outputs": [],
-      "source": [
-        "embed_model = \"text-embedding-ada-002\"\n",
-        "\n",
-        "res = openai.Embedding.create(\n",
-        "    input=[\n",
-        "        \"Sample document text goes here\",\n",
-        "        \"there will be several phrases in each batch\"\n",
-        "    ], engine=embed_model\n",
-        ")"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "aNZ7IWekNLbu"
-      },
-      "source": [
-        "In the response `res` we will find a JSON-like object containing our new embeddings within the `'data'` field."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 13,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "esagZj6iNLPZ",
-        "outputId": "8e26f18a-4890-43ca-95e7-9e256e29e3be"
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "dict_keys(['object', 'data', 'model', 'usage'])"
-            ]
-          },
-          "execution_count": 13,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "res.keys()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "zStnHFpkNVIU"
-      },
-      "source": [
-        "Inside `'data'` we will find two records, one for each of the two sentences we just embedded. Each vector embedding contains `1536` dimensions (the output dimensionality of the `text-embedding-ada-002` model."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 14,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "uVoP9VcINWAC",
-        "outputId": "d9f797af-0df8-4ee9-f779-8d8a62589134"
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "2"
-            ]
-          },
-          "execution_count": 14,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "len(res['data'])"
-      ]
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "GFLLl1Agum8O"
+   },
+   "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pinecone-io/examples/blob/master/learn/generation/openai/gpt-4-langchain-docs.ipynb) [![Open nbviewer](https://raw.githubusercontent.com/pinecone-io/examples/master/assets/nbviewer-shield.svg)](https://nbviewer.org/github/pinecone-io/examples/blob/master/learn/generation/openai/gpt-4-langchain-docs.ipynb)\n",
+    "\n",
+    "# GPT4 with Retrieval Augmentation over LangChain Docs\n",
+    "\n",
+    "[![Open nbviewer](https://raw.githubusercontent.com/pinecone-io/examples/master/assets/fast-link.svg)](https://github.com/pinecone-io/examples/blob/master/docs/gpt-4-langchain-docs.ipynb)\n",
+    "\n",
+    "In this notebook we'll work through an example of using GPT-4 with retrieval augmentation to answer questions about the LangChain Python library."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "id": "_HDKlQO5svqI"
+   },
+   "outputs": [],
+   "source": [
+    "!pip install -qU \\\n",
+    "  tiktoken==0.4.0 \\\n",
+    "  openai==0.27.7 \\\n",
+    "  langchain==0.0.179 \\\n",
+    "  pinecone \\\n",
+    "  datasets==2.13.1"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "7c1EpQ-jq7SU"
+   },
+   "source": [
+    "---\n",
+    "\n",
+    "🚨 _Note: the above `pip install` is formatted for Jupyter notebooks. If running elsewhere you may need to drop the `!`._\n",
+    "\n",
+    "---"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "NgUEJ6vDum8q"
+   },
+   "source": [
+    "In this example, we will download the LangChain docs, we can find a static version of the docs on Hugging Face datasets in `jamescalam/langchain-docs-23-06-27`. To download them we do:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 237,
+     "referenced_widgets": [
+      "63de2154fea24b49a87bf4b8428fa630",
+      "4b4cfb1a834342198c75a02d28448b57",
+      "a9d471008dc34f67a5307bbb26d6123c",
+      "580e5dd4c9d9497caa40802d5918e75c",
+      "bd09981e486d461eaa2cf166b32921e1",
+      "bed2dd81769b4910831cb34a7b475c72",
+      "ccad7c2aec604ee29b41497ec0f37fa7",
+      "390f06d63dd547d395dcf18f1ebe265d",
+      "6545006e51824be9b6cb5cdb2cb2ba5a",
+      "241b0de59e53465f8acad4ac74b17b57",
+      "05199362d95449699254c45c1d5cee94",
+      "6881722e02fe4395a5fcaf668cb7ebcb",
+      "2b960a7f46444ad3bd3392517b415f2d",
+      "a3e8499ed740449586ca31500038c7a8",
+      "08c52a0369b74e7da99574ec29612189",
+      "ffb822b2f739434dbe99e8a992716c30",
+      "7e2b88be1cae49da824e6c6c0782cb50",
+      "9f4e9da63bb64d279ded5ee1730b5cba",
+      "3b319c7a4f6f41ea9ea6e6268cd29343",
+      "908935a03fea42efbded99cd81de54c5",
+      "dd3ece4c242d4eae946f8bc4f95d1dbf",
+      "ae71cc7e26ee4b51b7eb67520f66c9bd",
+      "d83b0b3089c34bb58ddb1272a240c2f9",
+      "34d21f61f6dc499a9d1504634e470bdd",
+      "64aae9675d394df48d233b31e5f0eb3c",
+      "d1d3dde6ec3b483f8b14139a7d6a9ae0",
+      "690ca50e9785402bb17fa266f8e40ea9",
+      "482f891d61ab4c2080d95a9b84ea5c6d",
+      "622987b045e74a13b79553d3d062e72a",
+      "6c7236b0655e4397b3a9d5f4d83c03fe",
+      "6f7e876e10fd4c58aa2d1f1ed4ff2762",
+      "9a8b01998f8a4c6bb0bfe71e02b3352c",
+      "ec224feb9828415eb018831e985d22c0",
+      "a532b2307c734cf188092d40299c40ad",
+      "fab781bfae4647968aa69f19ae6a5754",
+      "5961b9e44ce14a2a8eb65a9e5b6be90d",
+      "5f15e4b12305489180e54c61769dcebe",
+      "324465ed674740c2a18a88a2633f2093",
+      "f82b21e87eba4e06a0531c791dc09b3f",
+      "5c0bb7407c844ae19479416752f66190",
+      "5ef6d125261b49679dcb4d886b3e382c",
+      "294d5fc4fa1e40429e08137934481ba2",
+      "f5d992e8c1224879be5e5464a424a3a4",
+      "7e828bf7b91e4029bc2093876128a78b"
+     ]
     },
+    "id": "xo9gYhGPr_DQ",
+    "outputId": "016b896d-87a6-4d17-bad1-027475510a8b"
+   },
+   "outputs": [
     {
-      "cell_type": "code",
-      "execution_count": 15,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "s-zraDCjNeC6",
-        "outputId": "5f09e471-28de-4c39-d040-a80def97708e"
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "(1536, 1536)"
-            ]
-          },
-          "execution_count": 15,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "len(res['data'][0]['embedding']), len(res['data'][1]['embedding'])"
-      ]
+     "output_type": "stream",
+     "text": [
+      "Downloading and preparing dataset json/jamescalam--langchain-docs-23-06-27 to /root/.cache/huggingface/datasets/jamescalam___json/jamescalam--langchain-docs-23-06-27-4631410d07444b03/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96...\n"
+     ]
     },
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "XPd41MjANhmp"
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "63de2154fea24b49a87bf4b8428fa630",
+       "version_major": 2,
+       "version_minor": 0
       },
-      "source": [
-        "We will apply this same embedding logic to the langchain docs dataset we've just scraped. But before doing so we must create a place to store the embeddings."
+      "text/plain": [
+       "Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]"
       ]
+     },
+     "output_type": "display_data"
     },
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "WPi4MZvMNvUH"
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "6881722e02fe4395a5fcaf668cb7ebcb",
+       "version_major": 2,
+       "version_minor": 0
       },
-      "source": [
-        "## Initializing the Index"
+      "text/plain": [
+       "Downloading data:   0%|          | 0.00/4.68M [00:00<?, ?B/s]"
       ]
+     },
+     "output_type": "display_data"
     },
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "H5RRQArrN2lN"
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "d83b0b3089c34bb58ddb1272a240c2f9",
+       "version_major": 2,
+       "version_minor": 0
       },
-      "source": [
-        "Now we need a place to store these embeddings and enable a efficient vector search through them all. To do that we use Pinecone, we can get a [free API key](https://app.pinecone.io/) and enter it below where we will initialize our connection to Pinecone and create a new index."
+      "text/plain": [
+       "Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]"
       ]
+     },
+     "output_type": "display_data"
     },
     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "import os\n",
-        "from pinecone import Pinecone\n",
-        "\n",
-        "# initialize connection to pinecone (get API key at app.pinecone.io)\n",
-        "api_key = os.environ.get('PINECONE_API_KEY') or 'PINECONE_API_KEY'\n",
-        "\n",
-        "# configure client\n",
-        "pc = Pinecone(api_key=api_key)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "Now we setup our index specification, this allows us to define the cloud provider and region where we want to deploy our index. You can find a list of all [available providers and regions here](https://docs.pinecone.io/docs/projects)."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "from pinecone import ServerlessSpec\n",
-        "\n",
-        "cloud = os.environ.get('PINECONE_CLOUD') or 'aws'\n",
-        "region = os.environ.get('PINECONE_REGION') or 'us-east-1'\n",
-        "\n",
-        "spec = ServerlessSpec(cloud=cloud, region=region)"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 17,
-      "metadata": {
-        "id": "2GQAnohhum8v",
-        "tags": [
-          "parameters"
-        ]
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "a532b2307c734cf188092d40299c40ad",
+       "version_major": 2,
+       "version_minor": 0
       },
-      "outputs": [],
-      "source": [
-        "index_name = 'gpt-4-langchain-docs'"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "import time\n",
-        "\n",
-        "# check if index already exists (it shouldn't if this is first time)\n",
-        "if index_name not in pc.list_indexes().names():\n",
-        "    # if does not exist, create index\n",
-        "    pc.create_index(\n",
-        "        index_name,\n",
-        "        dimension=len(res['data'][0]['embedding']),\n",
-        "        metric='cosine',\n",
-        "        spec=spec\n",
-        "    )\n",
-        "    # wait for index to be initialized\n",
-        "    while not pc.describe_index(index_name).status['ready']:\n",
-        "        time.sleep(1)\n",
-        "\n",
-        "# connect to index\n",
-        "index = pc.Index(index_name)\n",
-        "# view index stats\n",
-        "index.describe_index_stats()"
+      "text/plain": [
+       "Generating train split: 0 examples [00:00, ? examples/s]"
       ]
+     },
+     "output_type": "display_data"
     },
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "ezSTzN2rPa2o"
-      },
-      "source": [
-        "We can see the index is currently empty with a `total_vector_count` of `0`. We can begin populating it with OpenAI `text-embedding-ada-002` built embeddings like so:"
-      ]
+     "output_type": "stream",
+     "text": [
+      "Dataset json downloaded and prepared to /root/.cache/huggingface/datasets/jamescalam___json/jamescalam--langchain-docs-23-06-27-4631410d07444b03/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96. Subsequent calls will reuse this data.\n"
+     ]
     },
     {
-      "cell_type": "code",
-      "execution_count": 19,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 49,
-          "referenced_widgets": [
-            "760c608de89946298cb6845d5ff1b020",
-            "f6f7d673d7a145bda593848f7e87ca2c",
-            "effb0c1b07574547aca5956963b371c8",
-            "e6e0b0054fb5449c84ad745308510ddb",
-            "b1e6d4d46b334bcf96efcab6f57c7536",
-            "e5a120d5b9494d14a142fbf519bcbbdf",
-            "78fe5eb48ae748bda91ddc70f422212c",
-            "34e43d6a7a92453490c45e39498afd64",
-            "45c7fb32593141abb8168b8077e31f59",
-            "0ed96243151440a18994669e2f85e819",
-            "05a0a1ebc92f463d9f3e953e51742a85"
-          ]
-        },
-        "id": "iZbFbulAPeop",
-        "outputId": "97cbb020-f6f9-4914-ff14-dd472354f64a"
-      },
-      "outputs": [
-        {
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "760c608de89946298cb6845d5ff1b020",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "  0%|          | 0/25 [00:00<?, ?it/s]"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        }
-      ],
-      "source": [
-        "from tqdm.auto import tqdm\n",
-        "\n",
-        "batch_size = 100  # how many embeddings we create and insert at once\n",
-        "\n",
-        "for i in tqdm(range(0, len(chunks), batch_size)):\n",
-        "    # find end of batch\n",
-        "    i_end = min(len(chunks), i+batch_size)\n",
-        "    meta_batch = chunks[i:i_end]\n",
-        "    # get ids\n",
-        "    ids_batch = [x['id'] for x in meta_batch]\n",
-        "    # get texts to encode\n",
-        "    texts = [x['text'] for x in meta_batch]\n",
-        "    # create embeddings (try-except added to avoid RateLimitError)\n",
-        "    try:\n",
-        "        res = openai.Embedding.create(input=texts, engine=embed_model)\n",
-        "    except:\n",
-        "        done = False\n",
-        "        while not done:\n",
-        "            time.sleep(5)\n",
-        "            try:\n",
-        "                res = openai.Embedding.create(input=texts, engine=embed_model)\n",
-        "                done = True\n",
-        "            except:\n",
-        "                pass\n",
-        "    embeds = [record['embedding'] for record in res['data']]\n",
-        "    # cleanup metadata\n",
-        "    meta_batch = [{\n",
-        "        'text': x['text'],\n",
-        "        'chunk': x['chunk'],\n",
-        "        'url': x['url']\n",
-        "    } for x in meta_batch]\n",
-        "    to_upsert = list(zip(ids_batch, embeds, meta_batch))\n",
-        "    # upsert to Pinecone\n",
-        "    index.upsert(vectors=to_upsert)"
+     "data": {
+      "text/plain": [
+       "Dataset({\n",
+       "    features: ['id', 'text', 'url'],\n",
+       "    num_rows: 505\n",
+       "})"
       ]
+     },
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from datasets import load_dataset\n",
+    "\n",
+    "docs = load_dataset(\"jamescalam/langchain-docs-23-06-27\", split=\"train\")\n",
+    "docs"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "ahFEI4U3vdxV"
+   },
+   "source": [
+    "This leaves us with `505` doc pages. Let's take a look at the format each one contains:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 52
     },
+    "id": "BJuef8z1vfz4",
+    "outputId": "6c62ecbe-cf82-475d-b39f-d97ff02422fe"
+   },
+   "outputs": [
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "YttJOrEtQIF9"
+     "data": {
+      "application/vnd.google.colaboratory.intrinsic+json": {
+       "type": "string"
       },
-      "source": [
-        "Now we've added all of our langchain docs to the index. With that we can move on to retrieval and then answer generation using GPT-4."
+      "text/plain": [
+       "'Example Selector\\uf0c1\\nLogic for selecting examples to include in prompts.\\nclass langchain.prompts.example_selector.LengthBasedExampleSelector(*, examples, example_prompt, get_text_length=<function _get_le'"
       ]
+     },
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "docs[20][\"text\"][:200]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "jNfppr8fvhOX"
+   },
+   "source": [
+    "We access the plaintext page content like so:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "vfdQLriyvjDk",
+    "outputId": "b7644566-ac2d-4dcf-dab3-1736191d357a"
+   },
+   "outputs": [
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "FumVmMRlQQ7w"
-      },
-      "source": [
-        "## Retrieval"
-      ]
+     "output_type": "stream",
+     "text": [
+      "Example Selector\n",
+      "Logic for selecting examples to include in prompts.\n",
+      "class langchain.prompts.example_selector.LengthBasedExampleSelector(*, examples, example_prompt, get_text_length=<function _get_le\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(docs[20][\"text\"][:200])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "r-mxgm-6vo9s"
+   },
+   "source": [
+    "We can also find the source of each document:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 35
     },
+    "id": "NGUGao9_uNH3",
+    "outputId": "f3efbe72-d10c-4223-dd21-f38d02ad96d5"
+   },
+   "outputs": [
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "nLRODeL-QTJ9"
+     "data": {
+      "application/vnd.google.colaboratory.intrinsic+json": {
+       "type": "string"
       },
-      "source": [
-        "To search through our documents we first need to create a query vector `xq`. Using `xq` we will retrieve the most relevant chunks from the LangChain docs, like so:"
+      "text/plain": [
+       "'https://api.python.langchain.com/en/latest/modules/example_selector.html'"
       ]
+     },
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "docs[20][\"url\"]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "ouY4rcx7z2oa"
+   },
+   "source": [
+    "Now let's see how we can process all of these. We will chunk everything into ~500 token chunks, we can do this easily with `langchain` and `tiktoken`:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 35
     },
+    "id": "Rb7KxUqYzsuV",
+    "outputId": "af54a189-b2e5-4b6d-9752-e74edd01b5e4"
+   },
+   "outputs": [
     {
-      "cell_type": "code",
-      "execution_count": 20,
-      "metadata": {
-        "id": "FMUPdX9cQQYC"
+     "data": {
+      "application/vnd.google.colaboratory.intrinsic+json": {
+       "type": "string"
       },
-      "outputs": [],
-      "source": [
-        "query = \"how do I use the LLMChain in LangChain?\"\n",
-        "\n",
-        "res = openai.Embedding.create(\n",
-        "    input=[query],\n",
-        "    engine=embed_model\n",
-        ")\n",
-        "\n",
-        "# retrieve from Pinecone\n",
-        "xq = res['data'][0]['embedding']\n",
-        "\n",
-        "# get relevant contexts (including the questions)\n",
-        "res = index.query(vector=xq, top_k=5, include_metadata=True)"
+      "text/plain": [
+       "'cl100k_base'"
       ]
+     },
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import tiktoken\n",
+    "\n",
+    "tokenizer_name = tiktoken.encoding_for_model(\"gpt-4\")\n",
+    "tokenizer_name.name"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "id": "N635Sgsbx_ME"
+   },
+   "outputs": [],
+   "source": [
+    "tokenizer = tiktoken.get_encoding(tokenizer_name.name)\n",
+    "\n",
+    "\n",
+    "# create the length function\n",
+    "def tiktoken_len(text):\n",
+    "    tokens = tokenizer.encode(text, disallowed_special=())\n",
+    "    return len(tokens)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "id": "OKO8e3Dp0dQS"
+   },
+   "outputs": [],
+   "source": [
+    "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
+    "\n",
+    "text_splitter = RecursiveCharacterTextSplitter(\n",
+    "    chunk_size=500,\n",
+    "    chunk_overlap=20,\n",
+    "    length_function=tiktoken_len,\n",
+    "    separators=[\"\\n\\n\", \"\\n\", \" \", \"\"],\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "bLdvW8eq06Zd"
+   },
+   "source": [
+    "Process the `docs` into more chunks using this approach."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 66,
+     "referenced_widgets": [
+      "01296cac12234000a13bdca80b31ba8b",
+      "930601ee00454f71b1114c4aaff0175b",
+      "e976d05935374e47b86773ca852cfa9e",
+      "bf9b29814dd04a22a7ff4ca1c6160c21",
+      "6d110cd070fe4776b9449de74759dff3",
+      "d670714b504847e3b72cd84510219ec7",
+      "037869180d9d4b1eb1bdbed67337e349",
+      "894a9b32ecc3404eb1213a8fa9ea38e2",
+      "5b14b2d018c74766954d580853eae7fc",
+      "41920d8d2aa44511814576dab37d96e7",
+      "d4c5704e6136468b910684e418074271"
+     ]
     },
+    "id": "uOdPyiAQ0uWs",
+    "outputId": "a36d52b2-810d-4422-cc82-105af8d1c83b"
+   },
+   "outputs": [
     {
-      "cell_type": "code",
-      "execution_count": 21,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "zl9SrFPkQjg-",
-        "outputId": "f7e6e60b-ad81-4aae-89a9-3e2f04a65ccd"
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "01296cac12234000a13bdca80b31ba8b",
+       "version_major": 2,
+       "version_minor": 0
       },
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "{'matches': [{'id': '35afffd0-a42a-42ee-ac6f-92b5491183fb-0',\n",
-              "              'metadata': {'chunk': 0.0,\n",
-              "                           'text': 'Source code for langchain.chains.llm\\n'\n",
-              "                                   '\"\"\"Chain that just formats a prompt and '\n",
-              "                                   'calls an LLM.\"\"\"\\n'\n",
-              "                                   'from __future__ import annotations\\n'\n",
-              "                                   'import warnings\\n'\n",
-              "                                   'from typing import Any, Dict, List, '\n",
-              "                                   'Optional, Sequence, Tuple, Union\\n'\n",
-              "                                   'from pydantic import Extra, Field\\n'\n",
-              "                                   'from langchain.base_language import '\n",
-              "                                   'BaseLanguageModel\\n'\n",
-              "                                   'from langchain.callbacks.manager import (\\n'\n",
-              "                                   '    AsyncCallbackManager,\\n'\n",
-              "                                   '    AsyncCallbackManagerForChainRun,\\n'\n",
-              "                                   '    CallbackManager,\\n'\n",
-              "                                   '    CallbackManagerForChainRun,\\n'\n",
-              "                                   '    Callbacks,\\n'\n",
-              "                                   ')\\n'\n",
-              "                                   'from langchain.chains.base import Chain\\n'\n",
-              "                                   'from langchain.input import '\n",
-              "                                   'get_colored_text\\n'\n",
-              "                                   'from langchain.load.dump import dumpd\\n'\n",
-              "                                   'from langchain.prompts.base import '\n",
-              "                                   'BasePromptTemplate\\n'\n",
-              "                                   'from langchain.prompts.prompt import '\n",
-              "                                   'PromptTemplate\\n'\n",
-              "                                   'from langchain.schema import (\\n'\n",
-              "                                   '    BaseLLMOutputParser,\\n'\n",
-              "                                   '    LLMResult,\\n'\n",
-              "                                   '    NoOpOutputParser,\\n'\n",
-              "                                   '    PromptValue,\\n'\n",
-              "                                   ')\\n'\n",
-              "                                   '[docs]class LLMChain(Chain):\\n'\n",
-              "                                   '    \"\"\"Chain to run queries against LLMs.\\n'\n",
-              "                                   '    Example:\\n'\n",
-              "                                   '        .. code-block:: python\\n'\n",
-              "                                   '            from langchain import '\n",
-              "                                   'LLMChain, OpenAI, PromptTemplate\\n'\n",
-              "                                   '            prompt_template = \"Tell me a '\n",
-              "                                   '{adjective} joke\"\\n'\n",
-              "                                   '            prompt = PromptTemplate(\\n'\n",
-              "                                   '                '\n",
-              "                                   'input_variables=[\"adjective\"], '\n",
-              "                                   'template=prompt_template\\n'\n",
-              "                                   '            )\\n'\n",
-              "                                   '            llm = LLMChain(llm=OpenAI(), '\n",
-              "                                   'prompt=prompt)\\n'\n",
-              "                                   '    \"\"\"\\n'\n",
-              "                                   '    @property\\n'\n",
-              "                                   '    def lc_serializable(self) -> bool:\\n'\n",
-              "                                   '        return True\\n'\n",
-              "                                   '    prompt: BasePromptTemplate\\n'\n",
-              "                                   '    \"\"\"Prompt object to use.\"\"\"\\n'\n",
-              "                                   '    llm: BaseLanguageModel\\n'\n",
-              "                                   '    \"\"\"Language model to call.\"\"\"\\n'\n",
-              "                                   '    output_key: str = \"text\"  #: :meta '\n",
-              "                                   'private:\\n'\n",
-              "                                   '    output_parser: BaseLLMOutputParser = '\n",
-              "                                   'Field(default_factory=NoOpOutputParser)\\n'\n",
-              "                                   '    \"\"\"Output parser to use.\\n'\n",
-              "                                   '    Defaults to one that takes the most '\n",
-              "                                   'likely string but does not change it \\n'\n",
-              "                                   '    otherwise.\"\"\"\\n'\n",
-              "                                   '    return_final_only: bool = True\\n'\n",
-              "                                   '    \"\"\"Whether to return only the final '\n",
-              "                                   'parsed result. Defaults to True.\\n'\n",
-              "                                   '    If false, will return a bunch of extra '\n",
-              "                                   'information about the generation.\"\"\"\\n'\n",
-              "                                   '    llm_kwargs: dict = '\n",
-              "                                   'Field(default_factory=dict)\\n'\n",
-              "                                   '    class Config:\\n'\n",
-              "                                   '        \"\"\"Configuration for this pydantic '\n",
-              "                                   'object.\"\"\"\\n'\n",
-              "                                   '        extra = Extra.forbid\\n'\n",
-              "                                   '        arbitrary_types_allowed = True',\n",
-              "                           'url': 'https://api.python.langchain.com/en/latest/_modules/langchain/chains/llm.html'},\n",
-              "              'score': 0.800940871,\n",
-              "              'values': []},\n",
-              "             {'id': '35cde68a-b909-43b6-b918-81c4eb2db5cd-82',\n",
-              "              'metadata': {'chunk': 82.0,\n",
-              "                           'text': 'Bases: langchain.chains.base.Chain\\n'\n",
-              "                                   'Chain for question-answering with '\n",
-              "                                   'self-verification.\\n'\n",
-              "                                   'Example\\n'\n",
-              "                                   'from langchain import OpenAI, '\n",
-              "                                   'LLMSummarizationCheckerChain\\n'\n",
-              "                                   'llm = OpenAI(temperature=0.0)\\n'\n",
-              "                                   'checker_chain = '\n",
-              "                                   'LLMSummarizationCheckerChain.from_llm(llm)\\n'\n",
-              "                                   'Parameters\\n'\n",
-              "                                   'memory '\n",
-              "                                   '(Optional[langchain.schema.BaseMemory]) '\n",
-              "                                   '\u2013 \\n'\n",
-              "                                   'callbacks '\n",
-              "                                   '(Optional[Union[List[langchain.callbacks.base.BaseCallbackHandler], '\n",
-              "                                   'langchain.callbacks.base.BaseCallbackManager]]) '\n",
-              "                                   '\u2013 \\n'\n",
-              "                                   'callback_manager '\n",
-              "                                   '(Optional[langchain.callbacks.base.BaseCallbackManager]) '\n",
-              "                                   '\u2013 \\n'\n",
-              "                                   'verbose (bool) \u2013 \\n'\n",
-              "                                   'tags (Optional[List[str]]) \u2013 \\n'\n",
-              "                                   'sequential_chain '\n",
-              "                                   '(langchain.chains.sequential.SequentialChain) '\n",
-              "                                   '\u2013 \\n'\n",
-              "                                   'llm '\n",
-              "                                   '(Optional[langchain.base_language.BaseLanguageModel]) '\n",
-              "                                   '\u2013 \\n'\n",
-              "                                   'create_assertions_prompt '\n",
-              "                                   '(langchain.prompts.prompt.PromptTemplate) '\n",
-              "                                   '\u2013 \\n'\n",
-              "                                   'check_assertions_prompt '\n",
-              "                                   '(langchain.prompts.prompt.PromptTemplate) '\n",
-              "                                   '\u2013 \\n'\n",
-              "                                   'revised_summary_prompt '\n",
-              "                                   '(langchain.prompts.prompt.PromptTemplate) '\n",
-              "                                   '\u2013 \\n'\n",
-              "                                   'are_all_true_prompt '\n",
-              "                                   '(langchain.prompts.prompt.PromptTemplate) '\n",
-              "                                   '\u2013 \\n'\n",
-              "                                   'input_key (str) \u2013 \\n'\n",
-              "                                   'output_key (str) \u2013 \\n'\n",
-              "                                   'max_checks (int) \u2013 \\n'\n",
-              "                                   'Return type\\n'\n",
-              "                                   'None',\n",
-              "                           'url': 'https://api.python.langchain.com/en/latest/modules/chains.html'},\n",
-              "              'score': 0.79580605,\n",
-              "              'values': []},\n",
-              "             {'id': '993db45b-4e3b-431d-a2a6-48ed5944912a-1',\n",
-              "              'metadata': {'chunk': 1.0,\n",
-              "                           'text': '[docs]    @classmethod\\n'\n",
-              "                                   '    def from_llm(\\n'\n",
-              "                                   '        cls,\\n'\n",
-              "                                   '        llm: BaseLanguageModel,\\n'\n",
-              "                                   '        chain: LLMChain,\\n'\n",
-              "                                   '        critique_prompt: '\n",
-              "                                   'BasePromptTemplate = CRITIQUE_PROMPT,\\n'\n",
-              "                                   '        revision_prompt: '\n",
-              "                                   'BasePromptTemplate = REVISION_PROMPT,\\n'\n",
-              "                                   '        **kwargs: Any,\\n'\n",
-              "                                   '    ) -> \"ConstitutionalChain\":\\n'\n",
-              "                                   '        \"\"\"Create a chain from an LLM.\"\"\"\\n'\n",
-              "                                   '        critique_chain = LLMChain(llm=llm, '\n",
-              "                                   'prompt=critique_prompt)\\n'\n",
-              "                                   '        revision_chain = LLMChain(llm=llm, '\n",
-              "                                   'prompt=revision_prompt)\\n'\n",
-              "                                   '        return cls(\\n'\n",
-              "                                   '            chain=chain,\\n'\n",
-              "                                   '            '\n",
-              "                                   'critique_chain=critique_chain,\\n'\n",
-              "                                   '            '\n",
-              "                                   'revision_chain=revision_chain,\\n'\n",
-              "                                   '            **kwargs,\\n'\n",
-              "                                   '        )\\n'\n",
-              "                                   '    @property\\n'\n",
-              "                                   '    def input_keys(self) -> List[str]:\\n'\n",
-              "                                   '        \"\"\"Defines the input keys.\"\"\"\\n'\n",
-              "                                   '        return self.chain.input_keys\\n'\n",
-              "                                   '    @property\\n'\n",
-              "                                   '    def output_keys(self) -> List[str]:\\n'\n",
-              "                                   '        \"\"\"Defines the output keys.\"\"\"\\n'\n",
-              "                                   '        if '\n",
-              "                                   'self.return_intermediate_steps:\\n'\n",
-              "                                   '            return [\"output\", '\n",
-              "                                   '\"critiques_and_revisions\", '\n",
-              "                                   '\"initial_output\"]\\n'\n",
-              "                                   '        return [\"output\"]\\n'\n",
-              "                                   '    def _call(\\n'\n",
-              "                                   '        self,\\n'\n",
-              "                                   '        inputs: Dict[str, Any],\\n'\n",
-              "                                   '        run_manager: '\n",
-              "                                   'Optional[CallbackManagerForChainRun] = '\n",
-              "                                   'None,\\n'\n",
-              "                                   '    ) -> Dict[str, Any]:\\n'\n",
-              "                                   '        _run_manager = run_manager or '\n",
-              "                                   'CallbackManagerForChainRun.get_noop_manager()\\n'\n",
-              "                                   '        response = self.chain.run(\\n'\n",
-              "                                   '            **inputs,\\n'\n",
-              "                                   '            '\n",
-              "                                   'callbacks=_run_manager.get_child(\"original\"),\\n'\n",
-              "                                   '        )\\n'\n",
-              "                                   '        initial_response = response\\n'\n",
-              "                                   '        input_prompt = '\n",
-              "                                   'self.chain.prompt.format(**inputs)\\n'\n",
-              "                                   '        _run_manager.on_text(\\n'\n",
-              "                                   '            text=\"Initial response: \" + '\n",
-              "                                   'response + \"\\\\n\\\\n\",\\n'\n",
-              "                                   '            verbose=self.verbose,\\n'\n",
-              "                                   '            color=\"yellow\",\\n'\n",
-              "                                   '        )\\n'\n",
-              "                                   '        critiques_and_revisions = []\\n'\n",
-              "                                   '        for constitutional_principle in '\n",
-              "                                   'self.constitutional_principles:\\n'\n",
-              "                                   '            # Do critique\\n'\n",
-              "                                   '            raw_critique = '\n",
-              "                                   'self.critique_chain.run(\\n'\n",
-              "                                   '                '\n",
-              "                                   'input_prompt=input_prompt,\\n'\n",
-              "                                   '                '\n",
-              "                                   'output_from_model=response,\\n'\n",
-              "                                   '                '\n",
-              "                                   'critique_request=constitutional_principle.critique_request,\\n'\n",
-              "                                   '                '\n",
-              "                                   'callbacks=_run_manager.get_child(\"critique\"),\\n'\n",
-              "                                   '            )\\n'\n",
-              "                                   '            critique = '\n",
-              "                                   'self._parse_critique(\\n'\n",
-              "                                   '                '\n",
-              "                                   'output_string=raw_critique,',\n",
-              "                           'url': 'https://api.python.langchain.com/en/latest/_modules/langchain/chains/constitutional_ai/base.html'},\n",
-              "              'score': 0.79369247,\n",
-              "              'values': []},\n",
-              "             {'id': 'adea5d40-2691-4bc9-9403-3360345bc25e-0',\n",
-              "              'metadata': {'chunk': 0.0,\n",
-              "                           'text': 'Source code for '\n",
-              "                                   'langchain.chains.conversation.base\\n'\n",
-              "                                   '\"\"\"Chain that carries on a conversation '\n",
-              "                                   'and calls an LLM.\"\"\"\\n'\n",
-              "                                   'from typing import Dict, List\\n'\n",
-              "                                   'from pydantic import Extra, Field, '\n",
-              "                                   'root_validator\\n'\n",
-              "                                   'from langchain.chains.conversation.prompt '\n",
-              "                                   'import PROMPT\\n'\n",
-              "                                   'from langchain.chains.llm import LLMChain\\n'\n",
-              "                                   'from langchain.memory.buffer import '\n",
-              "                                   'ConversationBufferMemory\\n'\n",
-              "                                   'from langchain.prompts.base import '\n",
-              "                                   'BasePromptTemplate\\n'\n",
-              "                                   'from langchain.schema import BaseMemory\\n'\n",
-              "                                   '[docs]class ConversationChain(LLMChain):\\n'\n",
-              "                                   '    \"\"\"Chain to have a conversation and '\n",
-              "                                   'load context from memory.\\n'\n",
-              "                                   '    Example:\\n'\n",
-              "                                   '        .. code-block:: python\\n'\n",
-              "                                   '            from langchain import '\n",
-              "                                   'ConversationChain, OpenAI\\n'\n",
-              "                                   '            conversation = '\n",
-              "                                   'ConversationChain(llm=OpenAI())\\n'\n",
-              "                                   '    \"\"\"\\n'\n",
-              "                                   '    memory: BaseMemory = '\n",
-              "                                   'Field(default_factory=ConversationBufferMemory)\\n'\n",
-              "                                   '    \"\"\"Default memory store.\"\"\"\\n'\n",
-              "                                   '    prompt: BasePromptTemplate = PROMPT\\n'\n",
-              "                                   '    \"\"\"Default conversation prompt to '\n",
-              "                                   'use.\"\"\"\\n'\n",
-              "                                   '    input_key: str = \"input\"  #: :meta '\n",
-              "                                   'private:\\n'\n",
-              "                                   '    output_key: str = \"response\"  #: :meta '\n",
-              "                                   'private:\\n'\n",
-              "                                   '    class Config:\\n'\n",
-              "                                   '        \"\"\"Configuration for this pydantic '\n",
-              "                                   'object.\"\"\"\\n'\n",
-              "                                   '        extra = Extra.forbid\\n'\n",
-              "                                   '        arbitrary_types_allowed = True\\n'\n",
-              "                                   '    @property\\n'\n",
-              "                                   '    def input_keys(self) -> List[str]:\\n'\n",
-              "                                   '        \"\"\"Use this since so some prompt '\n",
-              "                                   'vars come from history.\"\"\"\\n'\n",
-              "                                   '        return [self.input_key]\\n'\n",
-              "                                   '    @root_validator()\\n'\n",
-              "                                   '    def '\n",
-              "                                   'validate_prompt_input_variables(cls, '\n",
-              "                                   'values: Dict) -> Dict:\\n'\n",
-              "                                   '        \"\"\"Validate that prompt input '\n",
-              "                                   'variables are consistent.\"\"\"\\n'\n",
-              "                                   '        memory_keys = '\n",
-              "                                   'values[\"memory\"].memory_variables\\n'\n",
-              "                                   '        input_key = values[\"input_key\"]\\n'\n",
-              "                                   '        if input_key in memory_keys:\\n'\n",
-              "                                   '            raise ValueError(\\n'\n",
-              "                                   '                f\"The input key '\n",
-              "                                   '{input_key} was also found in the memory '\n",
-              "                                   'keys \"\\n'\n",
-              "                                   '                f\"({memory_keys}) - please '\n",
-              "                                   'provide keys that don\\'t overlap.\"\\n'\n",
-              "                                   '            )\\n'\n",
-              "                                   '        prompt_variables = '\n",
-              "                                   'values[\"prompt\"].input_variables\\n'\n",
-              "                                   '        expected_keys = memory_keys + '\n",
-              "                                   '[input_key]\\n'\n",
-              "                                   '        if set(expected_keys) != '\n",
-              "                                   'set(prompt_variables):\\n'\n",
-              "                                   '            raise ValueError(\\n'\n",
-              "                                   '                \"Got unexpected prompt '\n",
-              "                                   'input variables. The prompt expects \"\\n'\n",
-              "                                   '                f\"{prompt_variables}, but '\n",
-              "                                   'got {memory_keys} as inputs from \"\\n'\n",
-              "                                   '                f\"memory, and {input_key} '\n",
-              "                                   'as the normal input key.\"\\n'\n",
-              "                                   '            )\\n'\n",
-              "                                   '        return values',\n",
-              "                           'url': 'https://api.python.langchain.com/en/latest/_modules/langchain/chains/conversation/base.html'},\n",
-              "              'score': 0.792259932,\n",
-              "              'values': []},\n",
-              "             {'id': '3b6f9660-0346-4992-a6f5-b9cc2977f446-5',\n",
-              "              'metadata': {'chunk': 5.0,\n",
-              "                           'text': 'callbacks: Callbacks = None,\\n'\n",
-              "                                   '        **kwargs: Any,\\n'\n",
-              "                                   '    ) -> '\n",
-              "                                   'BaseConversationalRetrievalChain:\\n'\n",
-              "                                   '        \"\"\"Load chain from LLM.\"\"\"\\n'\n",
-              "                                   '        combine_docs_chain_kwargs = '\n",
-              "                                   'combine_docs_chain_kwargs or {}\\n'\n",
-              "                                   '        doc_chain = load_qa_chain(\\n'\n",
-              "                                   '            llm,\\n'\n",
-              "                                   '            chain_type=chain_type,\\n'\n",
-              "                                   '            callbacks=callbacks,\\n'\n",
-              "                                   '            **combine_docs_chain_kwargs,\\n'\n",
-              "                                   '        )\\n'\n",
-              "                                   '        condense_question_chain = '\n",
-              "                                   'LLMChain(\\n'\n",
-              "                                   '            llm=llm, '\n",
-              "                                   'prompt=condense_question_prompt, '\n",
-              "                                   'callbacks=callbacks\\n'\n",
-              "                                   '        )\\n'\n",
-              "                                   '        return cls(\\n'\n",
-              "                                   '            vectorstore=vectorstore,\\n'\n",
-              "                                   '            combine_docs_chain=doc_chain,\\n'\n",
-              "                                   '            '\n",
-              "                                   'question_generator=condense_question_chain,\\n'\n",
-              "                                   '            callbacks=callbacks,\\n'\n",
-              "                                   '            **kwargs,\\n'\n",
-              "                                   '        )',\n",
-              "                           'url': 'https://api.python.langchain.com/en/latest/_modules/langchain/chains/conversational_retrieval/base.html'},\n",
-              "              'score': 0.791279614,\n",
-              "              'values': []}],\n",
-              " 'namespace': ''}"
-            ]
-          },
-          "execution_count": 21,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "res"
+      "text/plain": [
+       "  0%|          | 0/505 [00:00<?, ?it/s]"
       ]
+     },
+     "output_type": "display_data"
     },
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "MoBSiDLIUADZ"
-      },
-      "source": [
-        "With retrieval complete, we move on to feeding these into GPT-4 to produce answers."
+     "data": {
+      "text/plain": [
+       "2482"
       ]
+     },
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from typing_extensions import Concatenate\n",
+    "from uuid import uuid4\n",
+    "from tqdm.auto import tqdm\n",
+    "\n",
+    "chunks = []\n",
+    "\n",
+    "for page in tqdm(docs):\n",
+    "    if len(page[\"text\"]) < 200:\n",
+    "        # if page content is short we can skip\n",
+    "        continue\n",
+    "    texts = text_splitter.split_text(page[\"text\"])\n",
+    "    chunks.extend(\n",
+    "        [\n",
+    "            {\n",
+    "                \"id\": page[\"id\"] + f\"-{i}\",\n",
+    "                \"text\": texts[i],\n",
+    "                \"url\": page[\"url\"],\n",
+    "                \"chunk\": i,\n",
+    "            }\n",
+    "            for i in range(len(texts))\n",
+    "        ]\n",
+    "    )\n",
+    "len(chunks)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "JegURaAg2PuN"
+   },
+   "source": [
+    "Our chunks are ready so now we move onto embedding and indexing everything."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "zGIZbQqJ2WBh"
+   },
+   "source": [
+    "## Initialize Embedding Model\n",
+    "\n",
+    "We use `text-embedding-ada-002` as the embedding model. We can embed text like so:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "p0U9_7Fium8u",
+    "outputId": "e2b285d4-dbde-4b2b-8624-bb515f6206cf"
+   },
+   "outputs": [
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "qfzS4-6-UXgX"
-      },
-      "source": [
-        "## Retrieval Augmented Generation"
+     "data": {
+      "text/plain": [
+       "<OpenAIObject list at 0x7fc82e74dee0> JSON: {\n",
+       "  \"data\": [\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"whisper-1\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-internal\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"babbage\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"davinci\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"text-davinci-edit-001\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"babbage-code-search-code\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"text-similarity-babbage-001\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"text-embedding-ada-002\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-internal\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"code-davinci-edit-001\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"text-davinci-001\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"ada\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"babbage-code-search-text\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"babbage-similarity\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"code-search-babbage-text-001\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"text-curie-001\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"gpt-4-0314\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"gpt-4-0613\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"code-search-babbage-code-001\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"text-ada-001\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"text-similarity-ada-001\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"curie-instruct-beta\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"gpt-4\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"ada-code-search-code\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"ada-similarity\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"code-search-ada-text-001\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"text-search-ada-query-001\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"davinci-search-document\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"ada-code-search-text\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"text-search-ada-doc-001\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"davinci-instruct-beta\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"text-similarity-curie-001\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"code-search-ada-code-001\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"ada-search-query\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"text-search-davinci-query-001\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"curie-search-query\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"davinci-search-query\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"babbage-search-document\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"ada-search-document\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"text-search-curie-query-001\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"text-search-babbage-doc-001\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"curie-search-document\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"text-search-curie-doc-001\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"babbage-search-query\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"text-babbage-001\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"text-search-davinci-doc-001\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"text-search-babbage-query-001\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"curie-similarity\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"gpt-3.5-turbo-0613\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"curie\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"gpt-3.5-turbo-16k-0613\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"text-similarity-davinci-001\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"text-davinci-002\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"gpt-3.5-turbo-0301\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"text-davinci-003\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-internal\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"davinci-similarity\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"gpt-3.5-turbo\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"gpt-3.5-turbo-16k\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-internal\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    }\n",
+       "  ],\n",
+       "  \"object\": \"list\"\n",
+       "}"
       ]
+     },
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import os\n",
+    "import openai\n",
+    "\n",
+    "# get API key from top-right dropdown on OpenAI website\n",
+    "openai.api_key = os.getenv(\"OPENAI_API_KEY\") or \"OPENAI_API_KEY\"\n",
+    "\n",
+    "openai.Engine.list()  # check we have authenticated"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {
+    "id": "kteZ69Z5M55S"
+   },
+   "outputs": [],
+   "source": [
+    "embed_model = \"text-embedding-ada-002\"\n",
+    "\n",
+    "res = openai.Embedding.create(\n",
+    "    input=[\n",
+    "        \"Sample document text goes here\",\n",
+    "        \"there will be several phrases in each batch\",\n",
+    "    ],\n",
+    "    engine=embed_model,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "aNZ7IWekNLbu"
+   },
+   "source": [
+    "In the response `res` we will find a JSON-like object containing our new embeddings within the `'data'` field."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "esagZj6iNLPZ",
+    "outputId": "8e26f18a-4890-43ca-95e7-9e256e29e3be"
+   },
+   "outputs": [
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "XPC1jQaKUcy0"
-      },
-      "source": [
-        "GPT-4 is currently accessed via the `ChatCompletions` endpoint of OpenAI. To add the information we retrieved into the model, we need to pass it into our user prompts *alongside* our original query. We can do that like so:"
+     "data": {
+      "text/plain": [
+       "dict_keys(['object', 'data', 'model', 'usage'])"
       ]
+     },
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "res.keys()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "zStnHFpkNVIU"
+   },
+   "source": [
+    "Inside `'data'` we will find two records, one for each of the two sentences we just embedded. Each vector embedding contains `1536` dimensions (the output dimensionality of the `text-embedding-ada-002` model."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "uVoP9VcINWAC",
+    "outputId": "d9f797af-0df8-4ee9-f779-8d8a62589134"
+   },
+   "outputs": [
     {
-      "cell_type": "code",
-      "execution_count": 22,
-      "metadata": {
-        "id": "unZstoHNUHeG"
-      },
-      "outputs": [],
-      "source": [
-        "# get list of retrieved text\n",
-        "contexts = [item['metadata']['text'] for item in res['matches']]\n",
-        "\n",
-        "augmented_query = \"\\n\\n---\\n\\n\".join(contexts)+\"\\n\\n-----\\n\\n\"+query"
+     "data": {
+      "text/plain": [
+       "2"
       ]
+     },
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(res[\"data\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "s-zraDCjNeC6",
+    "outputId": "5f09e471-28de-4c39-d040-a80def97708e"
+   },
+   "outputs": [
     {
-      "cell_type": "code",
-      "execution_count": 27,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "LRcEHm0Z9fXE",
-        "outputId": "636c6825-ecd1-4953-ee25-ebabcb3a2fed"
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Source code for langchain.chains.llm\n",
-            "\"\"\"Chain that just formats a prompt and calls an LLM.\"\"\"\n",
-            "from __future__ import annotations\n",
-            "import warnings\n",
-            "from typing import Any, Dict, List, Optional, Sequence, Tuple, Union\n",
-            "from pydantic import Extra, Field\n",
-            "from langchain.base_language import BaseLanguageModel\n",
-            "from langchain.callbacks.manager import (\n",
-            "    AsyncCallbackManager,\n",
-            "    AsyncCallbackManagerForChainRun,\n",
-            "    CallbackManager,\n",
-            "    CallbackManagerForChainRun,\n",
-            "    Callbacks,\n",
-            ")\n",
-            "from langchain.chains.base import Chain\n",
-            "from langchain.input import get_colored_text\n",
-            "from langchain.load.dump import dumpd\n",
-            "from langchain.prompts.base import BasePromptTemplate\n",
-            "from langchain.prompts.prompt import PromptTemplate\n",
-            "from langchain.schema import (\n",
-            "    BaseLLMOutputParser,\n",
-            "    LLMResult,\n",
-            "    NoOpOutputParser,\n",
-            "    PromptValue,\n",
-            ")\n",
-            "[docs]class LLMChain(Chain):\n",
-            "    \"\"\"Chain to run queries against LLMs.\n",
-            "    Example:\n",
-            "        .. code-block:: python\n",
-            "            from langchain import LLMChain, OpenAI, PromptTemplate\n",
-            "            prompt_template = \"Tell me a {adjective} joke\"\n",
-            "            prompt = PromptTemplate(\n",
-            "                input_variables=[\"adjective\"], template=prompt_template\n",
-            "            )\n",
-            "            llm = LLMChain(llm=OpenAI(), prompt=prompt)\n",
-            "    \"\"\"\n",
-            "    @property\n",
-            "    def lc_serializable(self) -> bool:\n",
-            "        return True\n",
-            "    prompt: BasePromptTemplate\n",
-            "    \"\"\"Prompt object to use.\"\"\"\n",
-            "    llm: BaseLanguageModel\n",
-            "    \"\"\"Language model to call.\"\"\"\n",
-            "    output_key: str = \"text\"  #: :meta private:\n",
-            "    output_parser: BaseLLMOutputParser = Field(default_factory=NoOpOutputParser)\n",
-            "    \"\"\"Output parser to use.\n",
-            "    Defaults to one that takes the most likely string but does not change it \n",
-            "    otherwise.\"\"\"\n",
-            "    return_final_only: bool = True\n",
-            "    \"\"\"Whether to return only the final parsed result. Defaults to True.\n",
-            "    If false, will return a bunch of extra information about the generation.\"\"\"\n",
-            "    llm_kwargs: dict = Field(default_factory=dict)\n",
-            "    class Config:\n",
-            "        \"\"\"Configuration for this pydantic object.\"\"\"\n",
-            "        extra = Extra.forbid\n",
-            "        arbitrary_types_allowed = True\n",
-            "\n",
-            "---\n",
-            "\n",
-            "Bases: langchain.chains.base.Chain\n",
-            "Chain for question-answering with self-verification.\n",
-            "Example\n",
-            "from langchain import OpenAI, LLMSummarizationCheckerChain\n",
-            "llm = OpenAI(temperature=0.0)\n",
-            "checker_chain = LLMSummarizationCheckerChain.from_llm(llm)\n",
-            "Parameters\n",
-            "memory (Optional[langchain.schema.BaseMemory]) \u2013 \n",
-            "callbacks (Optional[Union[List[langchain.callbacks.base.BaseCallbackHandler], langchain.callbacks.base.BaseCallbackManager]]) \u2013 \n",
-            "callback_manager (Optional[langchain.callbacks.base.BaseCallbackManager]) \u2013 \n",
-            "verbose (bool) \u2013 \n",
-            "tags (Optional[List[str]]) \u2013 \n",
-            "sequential_chain (langchain.chains.sequential.SequentialChain) \u2013 \n",
-            "llm (Optional[langchain.base_language.BaseLanguageModel]) \u2013 \n",
-            "create_assertions_prompt (langchain.prompts.prompt.PromptTemplate) \u2013 \n",
-            "check_assertions_prompt (langchain.prompts.prompt.PromptTemplate) \u2013 \n",
-            "revised_summary_prompt (langchain.prompts.prompt.PromptTemplate) \u2013 \n",
-            "are_all_true_prompt (langchain.prompts.prompt.PromptTemplate) \u2013 \n",
-            "input_key (str) \u2013 \n",
-            "output_key (str) \u2013 \n",
-            "max_checks (int) \u2013 \n",
-            "Return type\n",
-            "None\n",
-            "\n",
-            "---\n",
-            "\n",
-            "[docs]    @classmethod\n",
-            "    def from_llm(\n",
-            "        cls,\n",
-            "        llm: BaseLanguageModel,\n",
-            "        chain: LLMChain,\n",
-            "        critique_prompt: BasePromptTemplate = CRITIQUE_PROMPT,\n",
-            "        revision_prompt: BasePromptTemplate = REVISION_PROMPT,\n",
-            "        **kwargs: Any,\n",
-            "    ) -> \"ConstitutionalChain\":\n",
-            "        \"\"\"Create a chain from an LLM.\"\"\"\n",
-            "        critique_chain = LLMChain(llm=llm, prompt=critique_prompt)\n",
-            "        revision_chain = LLMChain(llm=llm, prompt=revision_prompt)\n",
-            "        return cls(\n",
-            "            chain=chain,\n",
-            "            critique_chain=critique_chain,\n",
-            "            revision_chain=revision_chain,\n",
-            "            **kwargs,\n",
-            "        )\n",
-            "    @property\n",
-            "    def input_keys(self) -> List[str]:\n",
-            "        \"\"\"Defines the input keys.\"\"\"\n",
-            "        return self.chain.input_keys\n",
-            "    @property\n",
-            "    def output_keys(self) -> List[str]:\n",
-            "        \"\"\"Defines the output keys.\"\"\"\n",
-            "        if self.return_intermediate_steps:\n",
-            "            return [\"output\", \"critiques_and_revisions\", \"initial_output\"]\n",
-            "        return [\"output\"]\n",
-            "    def _call(\n",
-            "        self,\n",
-            "        inputs: Dict[str, Any],\n",
-            "        run_manager: Optional[CallbackManagerForChainRun] = None,\n",
-            "    ) -> Dict[str, Any]:\n",
-            "        _run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()\n",
-            "        response = self.chain.run(\n",
-            "            **inputs,\n",
-            "            callbacks=_run_manager.get_child(\"original\"),\n",
-            "        )\n",
-            "        initial_response = response\n",
-            "        input_prompt = self.chain.prompt.format(**inputs)\n",
-            "        _run_manager.on_text(\n",
-            "            text=\"Initial response: \" + response + \"\\n\\n\",\n",
-            "            verbose=self.verbose,\n",
-            "            color=\"yellow\",\n",
-            "        )\n",
-            "        critiques_and_revisions = []\n",
-            "        for constitutional_principle in self.constitutional_principles:\n",
-            "            # Do critique\n",
-            "            raw_critique = self.critique_chain.run(\n",
-            "                input_prompt=input_prompt,\n",
-            "                output_from_model=response,\n",
-            "                critique_request=constitutional_principle.critique_request,\n",
-            "                callbacks=_run_manager.get_child(\"critique\"),\n",
-            "            )\n",
-            "            critique = self._parse_critique(\n",
-            "                output_string=raw_critique,\n",
-            "\n",
-            "---\n",
-            "\n",
-            "Source code for langchain.chains.conversation.base\n",
-            "\"\"\"Chain that carries on a conversation and calls an LLM.\"\"\"\n",
-            "from typing import Dict, List\n",
-            "from pydantic import Extra, Field, root_validator\n",
-            "from langchain.chains.conversation.prompt import PROMPT\n",
-            "from langchain.chains.llm import LLMChain\n",
-            "from langchain.memory.buffer import ConversationBufferMemory\n",
-            "from langchain.prompts.base import BasePromptTemplate\n",
-            "from langchain.schema import BaseMemory\n",
-            "[docs]class ConversationChain(LLMChain):\n",
-            "    \"\"\"Chain to have a conversation and load context from memory.\n",
-            "    Example:\n",
-            "        .. code-block:: python\n",
-            "            from langchain import ConversationChain, OpenAI\n",
-            "            conversation = ConversationChain(llm=OpenAI())\n",
-            "    \"\"\"\n",
-            "    memory: BaseMemory = Field(default_factory=ConversationBufferMemory)\n",
-            "    \"\"\"Default memory store.\"\"\"\n",
-            "    prompt: BasePromptTemplate = PROMPT\n",
-            "    \"\"\"Default conversation prompt to use.\"\"\"\n",
-            "    input_key: str = \"input\"  #: :meta private:\n",
-            "    output_key: str = \"response\"  #: :meta private:\n",
-            "    class Config:\n",
-            "        \"\"\"Configuration for this pydantic object.\"\"\"\n",
-            "        extra = Extra.forbid\n",
-            "        arbitrary_types_allowed = True\n",
-            "    @property\n",
-            "    def input_keys(self) -> List[str]:\n",
-            "        \"\"\"Use this since so some prompt vars come from history.\"\"\"\n",
-            "        return [self.input_key]\n",
-            "    @root_validator()\n",
-            "    def validate_prompt_input_variables(cls, values: Dict) -> Dict:\n",
-            "        \"\"\"Validate that prompt input variables are consistent.\"\"\"\n",
-            "        memory_keys = values[\"memory\"].memory_variables\n",
-            "        input_key = values[\"input_key\"]\n",
-            "        if input_key in memory_keys:\n",
-            "            raise ValueError(\n",
-            "                f\"The input key {input_key} was also found in the memory keys \"\n",
-            "                f\"({memory_keys}) - please provide keys that don't overlap.\"\n",
-            "            )\n",
-            "        prompt_variables = values[\"prompt\"].input_variables\n",
-            "        expected_keys = memory_keys + [input_key]\n",
-            "        if set(expected_keys) != set(prompt_variables):\n",
-            "            raise ValueError(\n",
-            "                \"Got unexpected prompt input variables. The prompt expects \"\n",
-            "                f\"{prompt_variables}, but got {memory_keys} as inputs from \"\n",
-            "                f\"memory, and {input_key} as the normal input key.\"\n",
-            "            )\n",
-            "        return values\n",
-            "\n",
-            "---\n",
-            "\n",
-            "callbacks: Callbacks = None,\n",
-            "        **kwargs: Any,\n",
-            "    ) -> BaseConversationalRetrievalChain:\n",
-            "        \"\"\"Load chain from LLM.\"\"\"\n",
-            "        combine_docs_chain_kwargs = combine_docs_chain_kwargs or {}\n",
-            "        doc_chain = load_qa_chain(\n",
-            "            llm,\n",
-            "            chain_type=chain_type,\n",
-            "            callbacks=callbacks,\n",
-            "            **combine_docs_chain_kwargs,\n",
-            "        )\n",
-            "        condense_question_chain = LLMChain(\n",
-            "            llm=llm, prompt=condense_question_prompt, callbacks=callbacks\n",
-            "        )\n",
-            "        return cls(\n",
-            "            vectorstore=vectorstore,\n",
-            "            combine_docs_chain=doc_chain,\n",
-            "            question_generator=condense_question_chain,\n",
-            "            callbacks=callbacks,\n",
-            "            **kwargs,\n",
-            "        )\n",
-            "\n",
-            "-----\n",
-            "\n",
-            "how do I use the LLMChain in LangChain?\n"
-          ]
-        }
-      ],
-      "source": [
-        "print(augmented_query)"
+     "data": {
+      "text/plain": [
+       "(1536, 1536)"
       ]
+     },
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(res[\"data\"][0][\"embedding\"]), len(res[\"data\"][1][\"embedding\"])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "XPd41MjANhmp"
+   },
+   "source": [
+    "We will apply this same embedding logic to the langchain docs dataset we've just scraped. But before doing so we must create a place to store the embeddings."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "WPi4MZvMNvUH"
+   },
+   "source": [
+    "## Initializing the Index"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "H5RRQArrN2lN"
+   },
+   "source": [
+    "Now we need a place to store these embeddings and enable a efficient vector search through them all. To do that we use Pinecone, we can get a [free API key](https://app.pinecone.io/) and enter it below where we will initialize our connection to Pinecone and create a new index."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "from pinecone import Pinecone\n",
+    "\n",
+    "# initialize connection to pinecone (get API key at app.pinecone.io)\n",
+    "api_key = os.environ.get(\"PINECONE_API_KEY\") or \"PINECONE_API_KEY\"\n",
+    "\n",
+    "# configure client\n",
+    "pc = Pinecone(api_key=api_key)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now we setup our index specification, this allows us to define the cloud provider and region where we want to deploy our index. You can find a list of all [available providers and regions here](https://docs.pinecone.io/docs/projects)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pinecone import ServerlessSpec\n",
+    "\n",
+    "cloud = os.environ.get(\"PINECONE_CLOUD\") or \"aws\"\n",
+    "region = os.environ.get(\"PINECONE_REGION\") or \"us-east-1\"\n",
+    "\n",
+    "spec = ServerlessSpec(cloud=cloud, region=region)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {
+    "id": "2GQAnohhum8v",
+    "tags": [
+     "parameters"
+    ]
+   },
+   "outputs": [],
+   "source": [
+    "index_name = \"gpt-4-langchain-docs\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import time\n",
+    "\n",
+    "# check if index already exists (it shouldn't if this is first time)\n",
+    "if index_name not in pc.list_indexes().names():\n",
+    "    # if does not exist, create index\n",
+    "    pc.create_index(\n",
+    "        index_name,\n",
+    "        dimension=len(res[\"data\"][0][\"embedding\"]),\n",
+    "        metric=\"cosine\",\n",
+    "        spec=spec,\n",
+    "    )\n",
+    "    # wait for index to be initialized\n",
+    "    while not pc.describe_index(index_name).status[\"ready\"]:\n",
+    "        time.sleep(1)\n",
+    "\n",
+    "# connect to index\n",
+    "index = pc.Index(index_name)\n",
+    "# view index stats\n",
+    "index.describe_index_stats()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "ezSTzN2rPa2o"
+   },
+   "source": [
+    "We can see the index is currently empty with a `total_vector_count` of `0`. We can begin populating it with OpenAI `text-embedding-ada-002` built embeddings like so:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 49,
+     "referenced_widgets": [
+      "760c608de89946298cb6845d5ff1b020",
+      "f6f7d673d7a145bda593848f7e87ca2c",
+      "effb0c1b07574547aca5956963b371c8",
+      "e6e0b0054fb5449c84ad745308510ddb",
+      "b1e6d4d46b334bcf96efcab6f57c7536",
+      "e5a120d5b9494d14a142fbf519bcbbdf",
+      "78fe5eb48ae748bda91ddc70f422212c",
+      "34e43d6a7a92453490c45e39498afd64",
+      "45c7fb32593141abb8168b8077e31f59",
+      "0ed96243151440a18994669e2f85e819",
+      "05a0a1ebc92f463d9f3e953e51742a85"
+     ]
     },
+    "id": "iZbFbulAPeop",
+    "outputId": "97cbb020-f6f9-4914-ff14-dd472354f64a"
+   },
+   "outputs": [
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "sihH_GMiV5_p"
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "760c608de89946298cb6845d5ff1b020",
+       "version_major": 2,
+       "version_minor": 0
       },
-      "source": [
-        "Now we ask the question:"
+      "text/plain": [
+       "  0%|          | 0/25 [00:00<?, ?it/s]"
       ]
+     },
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "from tqdm.auto import tqdm\n",
+    "\n",
+    "batch_size = 100  # how many embeddings we create and insert at once\n",
+    "\n",
+    "for i in tqdm(range(0, len(chunks), batch_size)):\n",
+    "    # find end of batch\n",
+    "    i_end = min(len(chunks), i + batch_size)\n",
+    "    meta_batch = chunks[i:i_end]\n",
+    "    # get ids\n",
+    "    ids_batch = [x[\"id\"] for x in meta_batch]\n",
+    "    # get texts to encode\n",
+    "    texts = [x[\"text\"] for x in meta_batch]\n",
+    "    # create embeddings (try-except added to avoid RateLimitError)\n",
+    "    try:\n",
+    "        res = openai.Embedding.create(input=texts, engine=embed_model)\n",
+    "    except:\n",
+    "        done = False\n",
+    "        while not done:\n",
+    "            time.sleep(5)\n",
+    "            try:\n",
+    "                res = openai.Embedding.create(input=texts, engine=embed_model)\n",
+    "                done = True\n",
+    "            except:\n",
+    "                pass\n",
+    "    embeds = [record[\"embedding\"] for record in res[\"data\"]]\n",
+    "    # cleanup metadata\n",
+    "    meta_batch = [\n",
+    "        {\"text\": x[\"text\"], \"chunk\": x[\"chunk\"], \"url\": x[\"url\"]} for x in meta_batch\n",
+    "    ]\n",
+    "    to_upsert = list(zip(ids_batch, embeds, meta_batch))\n",
+    "    # upsert to Pinecone\n",
+    "    index.upsert(vectors=to_upsert)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "YttJOrEtQIF9"
+   },
+   "source": [
+    "Now we've added all of our langchain docs to the index. With that we can move on to retrieval and then answer generation using GPT-4."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "FumVmMRlQQ7w"
+   },
+   "source": [
+    "## Retrieval"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "nLRODeL-QTJ9"
+   },
+   "source": [
+    "To search through our documents we first need to create a query vector `xq`. Using `xq` we will retrieve the most relevant chunks from the LangChain docs, like so:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {
+    "id": "FMUPdX9cQQYC"
+   },
+   "outputs": [],
+   "source": [
+    "query = \"how do I use the LLMChain in LangChain?\"\n",
+    "\n",
+    "res = openai.Embedding.create(input=[query], engine=embed_model)\n",
+    "\n",
+    "# retrieve from Pinecone\n",
+    "xq = res[\"data\"][0][\"embedding\"]\n",
+    "\n",
+    "# get relevant contexts (including the questions)\n",
+    "res = index.query(vector=xq, top_k=5, include_metadata=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "zl9SrFPkQjg-",
+    "outputId": "f7e6e60b-ad81-4aae-89a9-3e2f04a65ccd"
+   },
+   "outputs": [
     {
-      "cell_type": "code",
-      "execution_count": 28,
-      "metadata": {
-        "id": "IThBqBi8V70d"
-      },
-      "outputs": [],
-      "source": [
-        "# system message to 'prime' the model\n",
-        "primer = f\"\"\"You are Q&A bot. A highly intelligent system that answers\n",
-        "user questions based on the information provided by the user above\n",
-        "each question. If the information can not be found in the information\n",
-        "provided by the user you truthfully say \"I don't know\".\n",
-        "\"\"\"\n",
-        "\n",
-        "res = openai.ChatCompletion.create(\n",
-        "    model=\"gpt-4\",\n",
-        "    messages=[\n",
-        "        {\"role\": \"system\", \"content\": primer},\n",
-        "        {\"role\": \"user\", \"content\": augmented_query}\n",
-        "    ]\n",
-        ")"
+     "data": {
+      "text/plain": [
+       "{'matches': [{'id': '35afffd0-a42a-42ee-ac6f-92b5491183fb-0',\n",
+       "              'metadata': {'chunk': 0.0,\n",
+       "                           'text': 'Source code for langchain.chains.llm\\n'\n",
+       "                                   '\"\"\"Chain that just formats a prompt and '\n",
+       "                                   'calls an LLM.\"\"\"\\n'\n",
+       "                                   'from __future__ import annotations\\n'\n",
+       "                                   'import warnings\\n'\n",
+       "                                   'from typing import Any, Dict, List, '\n",
+       "                                   'Optional, Sequence, Tuple, Union\\n'\n",
+       "                                   'from pydantic import Extra, Field\\n'\n",
+       "                                   'from langchain.base_language import '\n",
+       "                                   'BaseLanguageModel\\n'\n",
+       "                                   'from langchain.callbacks.manager import (\\n'\n",
+       "                                   '    AsyncCallbackManager,\\n'\n",
+       "                                   '    AsyncCallbackManagerForChainRun,\\n'\n",
+       "                                   '    CallbackManager,\\n'\n",
+       "                                   '    CallbackManagerForChainRun,\\n'\n",
+       "                                   '    Callbacks,\\n'\n",
+       "                                   ')\\n'\n",
+       "                                   'from langchain.chains.base import Chain\\n'\n",
+       "                                   'from langchain.input import '\n",
+       "                                   'get_colored_text\\n'\n",
+       "                                   'from langchain.load.dump import dumpd\\n'\n",
+       "                                   'from langchain.prompts.base import '\n",
+       "                                   'BasePromptTemplate\\n'\n",
+       "                                   'from langchain.prompts.prompt import '\n",
+       "                                   'PromptTemplate\\n'\n",
+       "                                   'from langchain.schema import (\\n'\n",
+       "                                   '    BaseLLMOutputParser,\\n'\n",
+       "                                   '    LLMResult,\\n'\n",
+       "                                   '    NoOpOutputParser,\\n'\n",
+       "                                   '    PromptValue,\\n'\n",
+       "                                   ')\\n'\n",
+       "                                   '[docs]class LLMChain(Chain):\\n'\n",
+       "                                   '    \"\"\"Chain to run queries against LLMs.\\n'\n",
+       "                                   '    Example:\\n'\n",
+       "                                   '        .. code-block:: python\\n'\n",
+       "                                   '            from langchain import '\n",
+       "                                   'LLMChain, OpenAI, PromptTemplate\\n'\n",
+       "                                   '            prompt_template = \"Tell me a '\n",
+       "                                   '{adjective} joke\"\\n'\n",
+       "                                   '            prompt = PromptTemplate(\\n'\n",
+       "                                   '                '\n",
+       "                                   'input_variables=[\"adjective\"], '\n",
+       "                                   'template=prompt_template\\n'\n",
+       "                                   '            )\\n'\n",
+       "                                   '            llm = LLMChain(llm=OpenAI(), '\n",
+       "                                   'prompt=prompt)\\n'\n",
+       "                                   '    \"\"\"\\n'\n",
+       "                                   '    @property\\n'\n",
+       "                                   '    def lc_serializable(self) -> bool:\\n'\n",
+       "                                   '        return True\\n'\n",
+       "                                   '    prompt: BasePromptTemplate\\n'\n",
+       "                                   '    \"\"\"Prompt object to use.\"\"\"\\n'\n",
+       "                                   '    llm: BaseLanguageModel\\n'\n",
+       "                                   '    \"\"\"Language model to call.\"\"\"\\n'\n",
+       "                                   '    output_key: str = \"text\"  #: :meta '\n",
+       "                                   'private:\\n'\n",
+       "                                   '    output_parser: BaseLLMOutputParser = '\n",
+       "                                   'Field(default_factory=NoOpOutputParser)\\n'\n",
+       "                                   '    \"\"\"Output parser to use.\\n'\n",
+       "                                   '    Defaults to one that takes the most '\n",
+       "                                   'likely string but does not change it \\n'\n",
+       "                                   '    otherwise.\"\"\"\\n'\n",
+       "                                   '    return_final_only: bool = True\\n'\n",
+       "                                   '    \"\"\"Whether to return only the final '\n",
+       "                                   'parsed result. Defaults to True.\\n'\n",
+       "                                   '    If false, will return a bunch of extra '\n",
+       "                                   'information about the generation.\"\"\"\\n'\n",
+       "                                   '    llm_kwargs: dict = '\n",
+       "                                   'Field(default_factory=dict)\\n'\n",
+       "                                   '    class Config:\\n'\n",
+       "                                   '        \"\"\"Configuration for this pydantic '\n",
+       "                                   'object.\"\"\"\\n'\n",
+       "                                   '        extra = Extra.forbid\\n'\n",
+       "                                   '        arbitrary_types_allowed = True',\n",
+       "                           'url': 'https://api.python.langchain.com/en/latest/_modules/langchain/chains/llm.html'},\n",
+       "              'score': 0.800940871,\n",
+       "              'values': []},\n",
+       "             {'id': '35cde68a-b909-43b6-b918-81c4eb2db5cd-82',\n",
+       "              'metadata': {'chunk': 82.0,\n",
+       "                           'text': 'Bases: langchain.chains.base.Chain\\n'\n",
+       "                                   'Chain for question-answering with '\n",
+       "                                   'self-verification.\\n'\n",
+       "                                   'Example\\n'\n",
+       "                                   'from langchain import OpenAI, '\n",
+       "                                   'LLMSummarizationCheckerChain\\n'\n",
+       "                                   'llm = OpenAI(temperature=0.0)\\n'\n",
+       "                                   'checker_chain = '\n",
+       "                                   'LLMSummarizationCheckerChain.from_llm(llm)\\n'\n",
+       "                                   'Parameters\\n'\n",
+       "                                   'memory '\n",
+       "                                   '(Optional[langchain.schema.BaseMemory]) '\n",
+       "                                   '– \\n'\n",
+       "                                   'callbacks '\n",
+       "                                   '(Optional[Union[List[langchain.callbacks.base.BaseCallbackHandler], '\n",
+       "                                   'langchain.callbacks.base.BaseCallbackManager]]) '\n",
+       "                                   '– \\n'\n",
+       "                                   'callback_manager '\n",
+       "                                   '(Optional[langchain.callbacks.base.BaseCallbackManager]) '\n",
+       "                                   '– \\n'\n",
+       "                                   'verbose (bool) – \\n'\n",
+       "                                   'tags (Optional[List[str]]) – \\n'\n",
+       "                                   'sequential_chain '\n",
+       "                                   '(langchain.chains.sequential.SequentialChain) '\n",
+       "                                   '– \\n'\n",
+       "                                   'llm '\n",
+       "                                   '(Optional[langchain.base_language.BaseLanguageModel]) '\n",
+       "                                   '– \\n'\n",
+       "                                   'create_assertions_prompt '\n",
+       "                                   '(langchain.prompts.prompt.PromptTemplate) '\n",
+       "                                   '– \\n'\n",
+       "                                   'check_assertions_prompt '\n",
+       "                                   '(langchain.prompts.prompt.PromptTemplate) '\n",
+       "                                   '– \\n'\n",
+       "                                   'revised_summary_prompt '\n",
+       "                                   '(langchain.prompts.prompt.PromptTemplate) '\n",
+       "                                   '– \\n'\n",
+       "                                   'are_all_true_prompt '\n",
+       "                                   '(langchain.prompts.prompt.PromptTemplate) '\n",
+       "                                   '– \\n'\n",
+       "                                   'input_key (str) – \\n'\n",
+       "                                   'output_key (str) – \\n'\n",
+       "                                   'max_checks (int) – \\n'\n",
+       "                                   'Return type\\n'\n",
+       "                                   'None',\n",
+       "                           'url': 'https://api.python.langchain.com/en/latest/modules/chains.html'},\n",
+       "              'score': 0.79580605,\n",
+       "              'values': []},\n",
+       "             {'id': '993db45b-4e3b-431d-a2a6-48ed5944912a-1',\n",
+       "              'metadata': {'chunk': 1.0,\n",
+       "                           'text': '[docs]    @classmethod\\n'\n",
+       "                                   '    def from_llm(\\n'\n",
+       "                                   '        cls,\\n'\n",
+       "                                   '        llm: BaseLanguageModel,\\n'\n",
+       "                                   '        chain: LLMChain,\\n'\n",
+       "                                   '        critique_prompt: '\n",
+       "                                   'BasePromptTemplate = CRITIQUE_PROMPT,\\n'\n",
+       "                                   '        revision_prompt: '\n",
+       "                                   'BasePromptTemplate = REVISION_PROMPT,\\n'\n",
+       "                                   '        **kwargs: Any,\\n'\n",
+       "                                   '    ) -> \"ConstitutionalChain\":\\n'\n",
+       "                                   '        \"\"\"Create a chain from an LLM.\"\"\"\\n'\n",
+       "                                   '        critique_chain = LLMChain(llm=llm, '\n",
+       "                                   'prompt=critique_prompt)\\n'\n",
+       "                                   '        revision_chain = LLMChain(llm=llm, '\n",
+       "                                   'prompt=revision_prompt)\\n'\n",
+       "                                   '        return cls(\\n'\n",
+       "                                   '            chain=chain,\\n'\n",
+       "                                   '            '\n",
+       "                                   'critique_chain=critique_chain,\\n'\n",
+       "                                   '            '\n",
+       "                                   'revision_chain=revision_chain,\\n'\n",
+       "                                   '            **kwargs,\\n'\n",
+       "                                   '        )\\n'\n",
+       "                                   '    @property\\n'\n",
+       "                                   '    def input_keys(self) -> List[str]:\\n'\n",
+       "                                   '        \"\"\"Defines the input keys.\"\"\"\\n'\n",
+       "                                   '        return self.chain.input_keys\\n'\n",
+       "                                   '    @property\\n'\n",
+       "                                   '    def output_keys(self) -> List[str]:\\n'\n",
+       "                                   '        \"\"\"Defines the output keys.\"\"\"\\n'\n",
+       "                                   '        if '\n",
+       "                                   'self.return_intermediate_steps:\\n'\n",
+       "                                   '            return [\"output\", '\n",
+       "                                   '\"critiques_and_revisions\", '\n",
+       "                                   '\"initial_output\"]\\n'\n",
+       "                                   '        return [\"output\"]\\n'\n",
+       "                                   '    def _call(\\n'\n",
+       "                                   '        self,\\n'\n",
+       "                                   '        inputs: Dict[str, Any],\\n'\n",
+       "                                   '        run_manager: '\n",
+       "                                   'Optional[CallbackManagerForChainRun] = '\n",
+       "                                   'None,\\n'\n",
+       "                                   '    ) -> Dict[str, Any]:\\n'\n",
+       "                                   '        _run_manager = run_manager or '\n",
+       "                                   'CallbackManagerForChainRun.get_noop_manager()\\n'\n",
+       "                                   '        response = self.chain.run(\\n'\n",
+       "                                   '            **inputs,\\n'\n",
+       "                                   '            '\n",
+       "                                   'callbacks=_run_manager.get_child(\"original\"),\\n'\n",
+       "                                   '        )\\n'\n",
+       "                                   '        initial_response = response\\n'\n",
+       "                                   '        input_prompt = '\n",
+       "                                   'self.chain.prompt.format(**inputs)\\n'\n",
+       "                                   '        _run_manager.on_text(\\n'\n",
+       "                                   '            text=\"Initial response: \" + '\n",
+       "                                   'response + \"\\\\n\\\\n\",\\n'\n",
+       "                                   '            verbose=self.verbose,\\n'\n",
+       "                                   '            color=\"yellow\",\\n'\n",
+       "                                   '        )\\n'\n",
+       "                                   '        critiques_and_revisions = []\\n'\n",
+       "                                   '        for constitutional_principle in '\n",
+       "                                   'self.constitutional_principles:\\n'\n",
+       "                                   '            # Do critique\\n'\n",
+       "                                   '            raw_critique = '\n",
+       "                                   'self.critique_chain.run(\\n'\n",
+       "                                   '                '\n",
+       "                                   'input_prompt=input_prompt,\\n'\n",
+       "                                   '                '\n",
+       "                                   'output_from_model=response,\\n'\n",
+       "                                   '                '\n",
+       "                                   'critique_request=constitutional_principle.critique_request,\\n'\n",
+       "                                   '                '\n",
+       "                                   'callbacks=_run_manager.get_child(\"critique\"),\\n'\n",
+       "                                   '            )\\n'\n",
+       "                                   '            critique = '\n",
+       "                                   'self._parse_critique(\\n'\n",
+       "                                   '                '\n",
+       "                                   'output_string=raw_critique,',\n",
+       "                           'url': 'https://api.python.langchain.com/en/latest/_modules/langchain/chains/constitutional_ai/base.html'},\n",
+       "              'score': 0.79369247,\n",
+       "              'values': []},\n",
+       "             {'id': 'adea5d40-2691-4bc9-9403-3360345bc25e-0',\n",
+       "              'metadata': {'chunk': 0.0,\n",
+       "                           'text': 'Source code for '\n",
+       "                                   'langchain.chains.conversation.base\\n'\n",
+       "                                   '\"\"\"Chain that carries on a conversation '\n",
+       "                                   'and calls an LLM.\"\"\"\\n'\n",
+       "                                   'from typing import Dict, List\\n'\n",
+       "                                   'from pydantic import Extra, Field, '\n",
+       "                                   'root_validator\\n'\n",
+       "                                   'from langchain.chains.conversation.prompt '\n",
+       "                                   'import PROMPT\\n'\n",
+       "                                   'from langchain.chains.llm import LLMChain\\n'\n",
+       "                                   'from langchain.memory.buffer import '\n",
+       "                                   'ConversationBufferMemory\\n'\n",
+       "                                   'from langchain.prompts.base import '\n",
+       "                                   'BasePromptTemplate\\n'\n",
+       "                                   'from langchain.schema import BaseMemory\\n'\n",
+       "                                   '[docs]class ConversationChain(LLMChain):\\n'\n",
+       "                                   '    \"\"\"Chain to have a conversation and '\n",
+       "                                   'load context from memory.\\n'\n",
+       "                                   '    Example:\\n'\n",
+       "                                   '        .. code-block:: python\\n'\n",
+       "                                   '            from langchain import '\n",
+       "                                   'ConversationChain, OpenAI\\n'\n",
+       "                                   '            conversation = '\n",
+       "                                   'ConversationChain(llm=OpenAI())\\n'\n",
+       "                                   '    \"\"\"\\n'\n",
+       "                                   '    memory: BaseMemory = '\n",
+       "                                   'Field(default_factory=ConversationBufferMemory)\\n'\n",
+       "                                   '    \"\"\"Default memory store.\"\"\"\\n'\n",
+       "                                   '    prompt: BasePromptTemplate = PROMPT\\n'\n",
+       "                                   '    \"\"\"Default conversation prompt to '\n",
+       "                                   'use.\"\"\"\\n'\n",
+       "                                   '    input_key: str = \"input\"  #: :meta '\n",
+       "                                   'private:\\n'\n",
+       "                                   '    output_key: str = \"response\"  #: :meta '\n",
+       "                                   'private:\\n'\n",
+       "                                   '    class Config:\\n'\n",
+       "                                   '        \"\"\"Configuration for this pydantic '\n",
+       "                                   'object.\"\"\"\\n'\n",
+       "                                   '        extra = Extra.forbid\\n'\n",
+       "                                   '        arbitrary_types_allowed = True\\n'\n",
+       "                                   '    @property\\n'\n",
+       "                                   '    def input_keys(self) -> List[str]:\\n'\n",
+       "                                   '        \"\"\"Use this since so some prompt '\n",
+       "                                   'vars come from history.\"\"\"\\n'\n",
+       "                                   '        return [self.input_key]\\n'\n",
+       "                                   '    @root_validator()\\n'\n",
+       "                                   '    def '\n",
+       "                                   'validate_prompt_input_variables(cls, '\n",
+       "                                   'values: Dict) -> Dict:\\n'\n",
+       "                                   '        \"\"\"Validate that prompt input '\n",
+       "                                   'variables are consistent.\"\"\"\\n'\n",
+       "                                   '        memory_keys = '\n",
+       "                                   'values[\"memory\"].memory_variables\\n'\n",
+       "                                   '        input_key = values[\"input_key\"]\\n'\n",
+       "                                   '        if input_key in memory_keys:\\n'\n",
+       "                                   '            raise ValueError(\\n'\n",
+       "                                   '                f\"The input key '\n",
+       "                                   '{input_key} was also found in the memory '\n",
+       "                                   'keys \"\\n'\n",
+       "                                   '                f\"({memory_keys}) - please '\n",
+       "                                   'provide keys that don\\'t overlap.\"\\n'\n",
+       "                                   '            )\\n'\n",
+       "                                   '        prompt_variables = '\n",
+       "                                   'values[\"prompt\"].input_variables\\n'\n",
+       "                                   '        expected_keys = memory_keys + '\n",
+       "                                   '[input_key]\\n'\n",
+       "                                   '        if set(expected_keys) != '\n",
+       "                                   'set(prompt_variables):\\n'\n",
+       "                                   '            raise ValueError(\\n'\n",
+       "                                   '                \"Got unexpected prompt '\n",
+       "                                   'input variables. The prompt expects \"\\n'\n",
+       "                                   '                f\"{prompt_variables}, but '\n",
+       "                                   'got {memory_keys} as inputs from \"\\n'\n",
+       "                                   '                f\"memory, and {input_key} '\n",
+       "                                   'as the normal input key.\"\\n'\n",
+       "                                   '            )\\n'\n",
+       "                                   '        return values',\n",
+       "                           'url': 'https://api.python.langchain.com/en/latest/_modules/langchain/chains/conversation/base.html'},\n",
+       "              'score': 0.792259932,\n",
+       "              'values': []},\n",
+       "             {'id': '3b6f9660-0346-4992-a6f5-b9cc2977f446-5',\n",
+       "              'metadata': {'chunk': 5.0,\n",
+       "                           'text': 'callbacks: Callbacks = None,\\n'\n",
+       "                                   '        **kwargs: Any,\\n'\n",
+       "                                   '    ) -> '\n",
+       "                                   'BaseConversationalRetrievalChain:\\n'\n",
+       "                                   '        \"\"\"Load chain from LLM.\"\"\"\\n'\n",
+       "                                   '        combine_docs_chain_kwargs = '\n",
+       "                                   'combine_docs_chain_kwargs or {}\\n'\n",
+       "                                   '        doc_chain = load_qa_chain(\\n'\n",
+       "                                   '            llm,\\n'\n",
+       "                                   '            chain_type=chain_type,\\n'\n",
+       "                                   '            callbacks=callbacks,\\n'\n",
+       "                                   '            **combine_docs_chain_kwargs,\\n'\n",
+       "                                   '        )\\n'\n",
+       "                                   '        condense_question_chain = '\n",
+       "                                   'LLMChain(\\n'\n",
+       "                                   '            llm=llm, '\n",
+       "                                   'prompt=condense_question_prompt, '\n",
+       "                                   'callbacks=callbacks\\n'\n",
+       "                                   '        )\\n'\n",
+       "                                   '        return cls(\\n'\n",
+       "                                   '            vectorstore=vectorstore,\\n'\n",
+       "                                   '            combine_docs_chain=doc_chain,\\n'\n",
+       "                                   '            '\n",
+       "                                   'question_generator=condense_question_chain,\\n'\n",
+       "                                   '            callbacks=callbacks,\\n'\n",
+       "                                   '            **kwargs,\\n'\n",
+       "                                   '        )',\n",
+       "                           'url': 'https://api.python.langchain.com/en/latest/_modules/langchain/chains/conversational_retrieval/base.html'},\n",
+       "              'score': 0.791279614,\n",
+       "              'values': []}],\n",
+       " 'namespace': ''}"
       ]
+     },
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "res"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "MoBSiDLIUADZ"
+   },
+   "source": [
+    "With retrieval complete, we move on to feeding these into GPT-4 to produce answers."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "qfzS4-6-UXgX"
+   },
+   "source": [
+    "## Retrieval Augmented Generation"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "XPC1jQaKUcy0"
+   },
+   "source": [
+    "GPT-4 is currently accessed via the `ChatCompletions` endpoint of OpenAI. To add the information we retrieved into the model, we need to pass it into our user prompts *alongside* our original query. We can do that like so:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {
+    "id": "unZstoHNUHeG"
+   },
+   "outputs": [],
+   "source": [
+    "# get list of retrieved text\n",
+    "contexts = [item[\"metadata\"][\"text\"] for item in res[\"matches\"]]\n",
+    "\n",
+    "augmented_query = \"\\n\\n---\\n\\n\".join(contexts) + \"\\n\\n-----\\n\\n\" + query"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "LRcEHm0Z9fXE",
+    "outputId": "636c6825-ecd1-4953-ee25-ebabcb3a2fed"
+   },
+   "outputs": [
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "QvS1yJhOWpiJ"
-      },
-      "source": [
-        "To display this response nicely, we will display it in markdown."
-      ]
+     "output_type": "stream",
+     "text": [
+      "Source code for langchain.chains.llm\n",
+      "\"\"\"Chain that just formats a prompt and calls an LLM.\"\"\"\n",
+      "from __future__ import annotations\n",
+      "import warnings\n",
+      "from typing import Any, Dict, List, Optional, Sequence, Tuple, Union\n",
+      "from pydantic import Extra, Field\n",
+      "from langchain.base_language import BaseLanguageModel\n",
+      "from langchain.callbacks.manager import (\n",
+      "    AsyncCallbackManager,\n",
+      "    AsyncCallbackManagerForChainRun,\n",
+      "    CallbackManager,\n",
+      "    CallbackManagerForChainRun,\n",
+      "    Callbacks,\n",
+      ")\n",
+      "from langchain.chains.base import Chain\n",
+      "from langchain.input import get_colored_text\n",
+      "from langchain.load.dump import dumpd\n",
+      "from langchain.prompts.base import BasePromptTemplate\n",
+      "from langchain.prompts.prompt import PromptTemplate\n",
+      "from langchain.schema import (\n",
+      "    BaseLLMOutputParser,\n",
+      "    LLMResult,\n",
+      "    NoOpOutputParser,\n",
+      "    PromptValue,\n",
+      ")\n",
+      "[docs]class LLMChain(Chain):\n",
+      "    \"\"\"Chain to run queries against LLMs.\n",
+      "    Example:\n",
+      "        .. code-block:: python\n",
+      "            from langchain import LLMChain, OpenAI, PromptTemplate\n",
+      "            prompt_template = \"Tell me a {adjective} joke\"\n",
+      "            prompt = PromptTemplate(\n",
+      "                input_variables=[\"adjective\"], template=prompt_template\n",
+      "            )\n",
+      "            llm = LLMChain(llm=OpenAI(), prompt=prompt)\n",
+      "    \"\"\"\n",
+      "    @property\n",
+      "    def lc_serializable(self) -> bool:\n",
+      "        return True\n",
+      "    prompt: BasePromptTemplate\n",
+      "    \"\"\"Prompt object to use.\"\"\"\n",
+      "    llm: BaseLanguageModel\n",
+      "    \"\"\"Language model to call.\"\"\"\n",
+      "    output_key: str = \"text\"  #: :meta private:\n",
+      "    output_parser: BaseLLMOutputParser = Field(default_factory=NoOpOutputParser)\n",
+      "    \"\"\"Output parser to use.\n",
+      "    Defaults to one that takes the most likely string but does not change it \n",
+      "    otherwise.\"\"\"\n",
+      "    return_final_only: bool = True\n",
+      "    \"\"\"Whether to return only the final parsed result. Defaults to True.\n",
+      "    If false, will return a bunch of extra information about the generation.\"\"\"\n",
+      "    llm_kwargs: dict = Field(default_factory=dict)\n",
+      "    class Config:\n",
+      "        \"\"\"Configuration for this pydantic object.\"\"\"\n",
+      "        extra = Extra.forbid\n",
+      "        arbitrary_types_allowed = True\n",
+      "\n",
+      "---\n",
+      "\n",
+      "Bases: langchain.chains.base.Chain\n",
+      "Chain for question-answering with self-verification.\n",
+      "Example\n",
+      "from langchain import OpenAI, LLMSummarizationCheckerChain\n",
+      "llm = OpenAI(temperature=0.0)\n",
+      "checker_chain = LLMSummarizationCheckerChain.from_llm(llm)\n",
+      "Parameters\n",
+      "memory (Optional[langchain.schema.BaseMemory]) – \n",
+      "callbacks (Optional[Union[List[langchain.callbacks.base.BaseCallbackHandler], langchain.callbacks.base.BaseCallbackManager]]) – \n",
+      "callback_manager (Optional[langchain.callbacks.base.BaseCallbackManager]) – \n",
+      "verbose (bool) – \n",
+      "tags (Optional[List[str]]) – \n",
+      "sequential_chain (langchain.chains.sequential.SequentialChain) – \n",
+      "llm (Optional[langchain.base_language.BaseLanguageModel]) – \n",
+      "create_assertions_prompt (langchain.prompts.prompt.PromptTemplate) – \n",
+      "check_assertions_prompt (langchain.prompts.prompt.PromptTemplate) – \n",
+      "revised_summary_prompt (langchain.prompts.prompt.PromptTemplate) – \n",
+      "are_all_true_prompt (langchain.prompts.prompt.PromptTemplate) – \n",
+      "input_key (str) – \n",
+      "output_key (str) – \n",
+      "max_checks (int) – \n",
+      "Return type\n",
+      "None\n",
+      "\n",
+      "---\n",
+      "\n",
+      "[docs]    @classmethod\n",
+      "    def from_llm(\n",
+      "        cls,\n",
+      "        llm: BaseLanguageModel,\n",
+      "        chain: LLMChain,\n",
+      "        critique_prompt: BasePromptTemplate = CRITIQUE_PROMPT,\n",
+      "        revision_prompt: BasePromptTemplate = REVISION_PROMPT,\n",
+      "        **kwargs: Any,\n",
+      "    ) -> \"ConstitutionalChain\":\n",
+      "        \"\"\"Create a chain from an LLM.\"\"\"\n",
+      "        critique_chain = LLMChain(llm=llm, prompt=critique_prompt)\n",
+      "        revision_chain = LLMChain(llm=llm, prompt=revision_prompt)\n",
+      "        return cls(\n",
+      "            chain=chain,\n",
+      "            critique_chain=critique_chain,\n",
+      "            revision_chain=revision_chain,\n",
+      "            **kwargs,\n",
+      "        )\n",
+      "    @property\n",
+      "    def input_keys(self) -> List[str]:\n",
+      "        \"\"\"Defines the input keys.\"\"\"\n",
+      "        return self.chain.input_keys\n",
+      "    @property\n",
+      "    def output_keys(self) -> List[str]:\n",
+      "        \"\"\"Defines the output keys.\"\"\"\n",
+      "        if self.return_intermediate_steps:\n",
+      "            return [\"output\", \"critiques_and_revisions\", \"initial_output\"]\n",
+      "        return [\"output\"]\n",
+      "    def _call(\n",
+      "        self,\n",
+      "        inputs: Dict[str, Any],\n",
+      "        run_manager: Optional[CallbackManagerForChainRun] = None,\n",
+      "    ) -> Dict[str, Any]:\n",
+      "        _run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()\n",
+      "        response = self.chain.run(\n",
+      "            **inputs,\n",
+      "            callbacks=_run_manager.get_child(\"original\"),\n",
+      "        )\n",
+      "        initial_response = response\n",
+      "        input_prompt = self.chain.prompt.format(**inputs)\n",
+      "        _run_manager.on_text(\n",
+      "            text=\"Initial response: \" + response + \"\\n\\n\",\n",
+      "            verbose=self.verbose,\n",
+      "            color=\"yellow\",\n",
+      "        )\n",
+      "        critiques_and_revisions = []\n",
+      "        for constitutional_principle in self.constitutional_principles:\n",
+      "            # Do critique\n",
+      "            raw_critique = self.critique_chain.run(\n",
+      "                input_prompt=input_prompt,\n",
+      "                output_from_model=response,\n",
+      "                critique_request=constitutional_principle.critique_request,\n",
+      "                callbacks=_run_manager.get_child(\"critique\"),\n",
+      "            )\n",
+      "            critique = self._parse_critique(\n",
+      "                output_string=raw_critique,\n",
+      "\n",
+      "---\n",
+      "\n",
+      "Source code for langchain.chains.conversation.base\n",
+      "\"\"\"Chain that carries on a conversation and calls an LLM.\"\"\"\n",
+      "from typing import Dict, List\n",
+      "from pydantic import Extra, Field, root_validator\n",
+      "from langchain.chains.conversation.prompt import PROMPT\n",
+      "from langchain.chains.llm import LLMChain\n",
+      "from langchain.memory.buffer import ConversationBufferMemory\n",
+      "from langchain.prompts.base import BasePromptTemplate\n",
+      "from langchain.schema import BaseMemory\n",
+      "[docs]class ConversationChain(LLMChain):\n",
+      "    \"\"\"Chain to have a conversation and load context from memory.\n",
+      "    Example:\n",
+      "        .. code-block:: python\n",
+      "            from langchain import ConversationChain, OpenAI\n",
+      "            conversation = ConversationChain(llm=OpenAI())\n",
+      "    \"\"\"\n",
+      "    memory: BaseMemory = Field(default_factory=ConversationBufferMemory)\n",
+      "    \"\"\"Default memory store.\"\"\"\n",
+      "    prompt: BasePromptTemplate = PROMPT\n",
+      "    \"\"\"Default conversation prompt to use.\"\"\"\n",
+      "    input_key: str = \"input\"  #: :meta private:\n",
+      "    output_key: str = \"response\"  #: :meta private:\n",
+      "    class Config:\n",
+      "        \"\"\"Configuration for this pydantic object.\"\"\"\n",
+      "        extra = Extra.forbid\n",
+      "        arbitrary_types_allowed = True\n",
+      "    @property\n",
+      "    def input_keys(self) -> List[str]:\n",
+      "        \"\"\"Use this since so some prompt vars come from history.\"\"\"\n",
+      "        return [self.input_key]\n",
+      "    @root_validator()\n",
+      "    def validate_prompt_input_variables(cls, values: Dict) -> Dict:\n",
+      "        \"\"\"Validate that prompt input variables are consistent.\"\"\"\n",
+      "        memory_keys = values[\"memory\"].memory_variables\n",
+      "        input_key = values[\"input_key\"]\n",
+      "        if input_key in memory_keys:\n",
+      "            raise ValueError(\n",
+      "                f\"The input key {input_key} was also found in the memory keys \"\n",
+      "                f\"({memory_keys}) - please provide keys that don't overlap.\"\n",
+      "            )\n",
+      "        prompt_variables = values[\"prompt\"].input_variables\n",
+      "        expected_keys = memory_keys + [input_key]\n",
+      "        if set(expected_keys) != set(prompt_variables):\n",
+      "            raise ValueError(\n",
+      "                \"Got unexpected prompt input variables. The prompt expects \"\n",
+      "                f\"{prompt_variables}, but got {memory_keys} as inputs from \"\n",
+      "                f\"memory, and {input_key} as the normal input key.\"\n",
+      "            )\n",
+      "        return values\n",
+      "\n",
+      "---\n",
+      "\n",
+      "callbacks: Callbacks = None,\n",
+      "        **kwargs: Any,\n",
+      "    ) -> BaseConversationalRetrievalChain:\n",
+      "        \"\"\"Load chain from LLM.\"\"\"\n",
+      "        combine_docs_chain_kwargs = combine_docs_chain_kwargs or {}\n",
+      "        doc_chain = load_qa_chain(\n",
+      "            llm,\n",
+      "            chain_type=chain_type,\n",
+      "            callbacks=callbacks,\n",
+      "            **combine_docs_chain_kwargs,\n",
+      "        )\n",
+      "        condense_question_chain = LLMChain(\n",
+      "            llm=llm, prompt=condense_question_prompt, callbacks=callbacks\n",
+      "        )\n",
+      "        return cls(\n",
+      "            vectorstore=vectorstore,\n",
+      "            combine_docs_chain=doc_chain,\n",
+      "            question_generator=condense_question_chain,\n",
+      "            callbacks=callbacks,\n",
+      "            **kwargs,\n",
+      "        )\n",
+      "\n",
+      "-----\n",
+      "\n",
+      "how do I use the LLMChain in LangChain?\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(augmented_query)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "sihH_GMiV5_p"
+   },
+   "source": [
+    "Now we ask the question:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {
+    "id": "IThBqBi8V70d"
+   },
+   "outputs": [],
+   "source": [
+    "# system message to 'prime' the model\n",
+    "primer = f\"\"\"You are Q&A bot. A highly intelligent system that answers\n",
+    "user questions based on the information provided by the user above\n",
+    "each question. If the information can not be found in the information\n",
+    "provided by the user you truthfully say \"I don't know\".\n",
+    "\"\"\"\n",
+    "\n",
+    "res = openai.ChatCompletion.create(\n",
+    "    model=\"gpt-4\",\n",
+    "    messages=[\n",
+    "        {\"role\": \"system\", \"content\": primer},\n",
+    "        {\"role\": \"user\", \"content\": augmented_query},\n",
+    "    ],\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "QvS1yJhOWpiJ"
+   },
+   "source": [
+    "To display this response nicely, we will display it in markdown."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 465
     },
+    "id": "RDo2qeMHWto1",
+    "outputId": "9a9b677f-9b4f-4f77-822d-80baf75ed04a"
+   },
+   "outputs": [
     {
-      "cell_type": "code",
-      "execution_count": 29,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 465
-        },
-        "id": "RDo2qeMHWto1",
-        "outputId": "9a9b677f-9b4f-4f77-822d-80baf75ed04a"
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/markdown": [
-              "To use the LLMChain in LangChain, you need to first import the necessary modules and classes. In this example, we will use the OpenAI language model. Follow the steps below:\n",
-              "\n",
-              "1. Import all required modules and classes:\n",
-              "\n",
-              "```python\n",
-              "from langchain import LLMChain, OpenAI, PromptTemplate\n",
-              "```\n",
-              "\n",
-              "2. Define the prompt template you want to use with the language model. For example, if you want to create jokes based on provided adjectives:\n",
-              "\n",
-              "```python\n",
-              "prompt_template = \"Tell me a {adjective} joke\"\n",
-              "```\n",
-              "\n",
-              "3. Create a PromptTemplate object passing the input_variables and template:\n",
-              "\n",
-              "```python\n",
-              "prompt = PromptTemplate(input_variables=[\"adjective\"], template=prompt_template)\n",
-              "```\n",
-              "\n",
-              "4. Instantiate the OpenAI language model:\n",
-              "\n",
-              "```python\n",
-              "llm = OpenAI()\n",
-              "```\n",
-              "\n",
-              "5. Create the LLMChain object using the OpenAI language model and the created prompt:\n",
-              "\n",
-              "```python\n",
-              "llm_chain = LLMChain(llm=llm, prompt=prompt)\n",
-              "```\n",
-              "\n",
-              "Now you can use the `llm_chain` object to generate jokes based on provided adjectives. For example:\n",
-              "\n",
-              "```python\n",
-              "response = llm_chain.run(adjective=\"funny\")\n",
-              "print(response)\n",
-              "```\n",
-              "\n",
-              "This will generate and print a funny joke based on the predefined prompt template. Replace `\"funny\"` with any other adjective to get a different result."
-            ],
-            "text/plain": [
-              "<IPython.core.display.Markdown object>"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        }
+     "data": {
+      "text/markdown": [
+       "To use the LLMChain in LangChain, you need to first import the necessary modules and classes. In this example, we will use the OpenAI language model. Follow the steps below:\n",
+       "\n",
+       "1. Import all required modules and classes:\n",
+       "\n",
+       "```python\n",
+       "from langchain import LLMChain, OpenAI, PromptTemplate\n",
+       "```\n",
+       "\n",
+       "2. Define the prompt template you want to use with the language model. For example, if you want to create jokes based on provided adjectives:\n",
+       "\n",
+       "```python\n",
+       "prompt_template = \"Tell me a {adjective} joke\"\n",
+       "```\n",
+       "\n",
+       "3. Create a PromptTemplate object passing the input_variables and template:\n",
+       "\n",
+       "```python\n",
+       "prompt = PromptTemplate(input_variables=[\"adjective\"], template=prompt_template)\n",
+       "```\n",
+       "\n",
+       "4. Instantiate the OpenAI language model:\n",
+       "\n",
+       "```python\n",
+       "llm = OpenAI()\n",
+       "```\n",
+       "\n",
+       "5. Create the LLMChain object using the OpenAI language model and the created prompt:\n",
+       "\n",
+       "```python\n",
+       "llm_chain = LLMChain(llm=llm, prompt=prompt)\n",
+       "```\n",
+       "\n",
+       "Now you can use the `llm_chain` object to generate jokes based on provided adjectives. For example:\n",
+       "\n",
+       "```python\n",
+       "response = llm_chain.run(adjective=\"funny\")\n",
+       "print(response)\n",
+       "```\n",
+       "\n",
+       "This will generate and print a funny joke based on the predefined prompt template. Replace `\"funny\"` with any other adjective to get a different result."
       ],
-      "source": [
-        "from IPython.display import Markdown\n",
-        "\n",
-        "display(Markdown(res['choices'][0]['message']['content']))"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "eJ-a8MHg0eYQ"
-      },
-      "source": [
-        "Let's compare this to a non-augmented query..."
+      "text/plain": [
+       "<IPython.core.display.Markdown object>"
       ]
+     },
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "from IPython.display import Markdown\n",
+    "\n",
+    "display(Markdown(res[\"choices\"][0][\"message\"][\"content\"]))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "eJ-a8MHg0eYQ"
+   },
+   "source": [
+    "Let's compare this to a non-augmented query..."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 46
     },
+    "id": "vwhaSgdF0ZDX",
+    "outputId": "ce085b0f-e0da-4c00-f3f5-43b49e64568c"
+   },
+   "outputs": [
     {
-      "cell_type": "code",
-      "execution_count": 30,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 46
-        },
-        "id": "vwhaSgdF0ZDX",
-        "outputId": "ce085b0f-e0da-4c00-f3f5-43b49e64568c"
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/markdown": [
-              "I don't know."
-            ],
-            "text/plain": [
-              "<IPython.core.display.Markdown object>"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        }
+     "data": {
+      "text/markdown": [
+       "I don't know."
       ],
-      "source": [
-        "res = openai.ChatCompletion.create(\n",
-        "    model=\"gpt-4\",\n",
-        "    messages=[\n",
-        "        {\"role\": \"system\", \"content\": primer},\n",
-        "        {\"role\": \"user\", \"content\": query}\n",
-        "    ]\n",
-        ")\n",
-        "display(Markdown(res['choices'][0]['message']['content']))"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "5CSsA-dW0m_P"
-      },
-      "source": [
-        "If we drop the `\"I don't know\"` part of the `primer`?"
+      "text/plain": [
+       "<IPython.core.display.Markdown object>"
       ]
+     },
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "res = openai.ChatCompletion.create(\n",
+    "    model=\"gpt-4\",\n",
+    "    messages=[\n",
+    "        {\"role\": \"system\", \"content\": primer},\n",
+    "        {\"role\": \"user\", \"content\": query},\n",
+    "    ],\n",
+    ")\n",
+    "display(Markdown(res[\"choices\"][0][\"message\"][\"content\"]))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "5CSsA-dW0m_P"
+   },
+   "source": [
+    "If we drop the `\"I don't know\"` part of the `primer`?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 371
     },
+    "id": "Z3svdTCZ0iJ2",
+    "outputId": "19673965-a2f8-45be-b82a-6e491aa88416"
+   },
+   "outputs": [
     {
-      "cell_type": "code",
-      "execution_count": 31,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 371
-        },
-        "id": "Z3svdTCZ0iJ2",
-        "outputId": "19673965-a2f8-45be-b82a-6e491aa88416"
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/markdown": [
-              "LLMChain, which stands for LangChain's Language Model Chain, is a feature within the LangChain ecosystem that allows connecting multiple language models to achieve more accurate translations and processing of natural language data.\n",
-              "\n",
-              "To use the LLMChain in LangChain, follow these steps:\n",
-              "\n",
-              "1. Sign up or log in: If you don't have an account with LangChain, sign up or log in to your existing account.\n",
-              "\n",
-              "2. Configure the LLMChain: Navigate to the LLMChain settings or configuration page (it may be under \"Settings\" or \"LLMChain Configuration\"). Here, you'll add, remove, or re-order language models in your chain.\n",
-              "\n",
-              "3. Add language models: Choose from the available language models and add them to your chain. Typically, language models are selected based on their performance or scope for specific language pairs or types of text.\n",
-              "\n",
-              "4. Set the order of language models: Arrange the order of the language models in your chain based on your preferences or needs. The LLMChain will process the input text in the order you've set, starting from the first model, and pass the output to the subsequent models in the chain.\n",
-              "\n",
-              "5. Test the LLMChain: Once you have configured your LLMChain, test it by inputting text and reviewing the generated translations or processed output. This step will allow you to fine-tune the chain to ensure optimal performance.\n",
-              "\n",
-              "6. Use the LLMChain in your translation projects or language processing tasks: With your LLMChain set up and tested, you can now use it for your translation or language processing needs.\n",
-              "\n",
-              "Remember that the LLMChain is part of the LangChain ecosystem, so any changes or modifications to it may require some knowledge of the platform and its interface. If needed, consult the official documentation or seek support from the community to ensure a seamless experience."
-            ],
-            "text/plain": [
-              "<IPython.core.display.Markdown object>"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        }
+     "data": {
+      "text/markdown": [
+       "LLMChain, which stands for LangChain's Language Model Chain, is a feature within the LangChain ecosystem that allows connecting multiple language models to achieve more accurate translations and processing of natural language data.\n",
+       "\n",
+       "To use the LLMChain in LangChain, follow these steps:\n",
+       "\n",
+       "1. Sign up or log in: If you don't have an account with LangChain, sign up or log in to your existing account.\n",
+       "\n",
+       "2. Configure the LLMChain: Navigate to the LLMChain settings or configuration page (it may be under \"Settings\" or \"LLMChain Configuration\"). Here, you'll add, remove, or re-order language models in your chain.\n",
+       "\n",
+       "3. Add language models: Choose from the available language models and add them to your chain. Typically, language models are selected based on their performance or scope for specific language pairs or types of text.\n",
+       "\n",
+       "4. Set the order of language models: Arrange the order of the language models in your chain based on your preferences or needs. The LLMChain will process the input text in the order you've set, starting from the first model, and pass the output to the subsequent models in the chain.\n",
+       "\n",
+       "5. Test the LLMChain: Once you have configured your LLMChain, test it by inputting text and reviewing the generated translations or processed output. This step will allow you to fine-tune the chain to ensure optimal performance.\n",
+       "\n",
+       "6. Use the LLMChain in your translation projects or language processing tasks: With your LLMChain set up and tested, you can now use it for your translation or language processing needs.\n",
+       "\n",
+       "Remember that the LLMChain is part of the LangChain ecosystem, so any changes or modifications to it may require some knowledge of the platform and its interface. If needed, consult the official documentation or seek support from the community to ensure a seamless experience."
       ],
-      "source": [
-        "res = openai.ChatCompletion.create(\n",
-        "    model=\"gpt-4\",\n",
-        "    messages=[\n",
-        "        {\"role\": \"system\", \"content\": \"You are Q&A bot. A highly intelligent system that answers user questions\"},\n",
-        "        {\"role\": \"user\", \"content\": query}\n",
-        "    ]\n",
-        ")\n",
-        "display(Markdown(res['choices'][0]['message']['content']))"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "GcGon5672lBb"
-      },
-      "source": [
-        "Then we see something even worse than `\"I don't know\"` \u2014 hallucinations. Clearly augmenting our queries with additional context can make a huge difference to the performance of our system.\n",
-        "\n",
-        "Great, we've seen how to augment GPT-4 with semantic search to allow us to answer LangChain specific queries.\n",
-        "\n",
-        "Once you're finished, we delete the index to save resources."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 32,
-      "metadata": {
-        "id": "Ah_vfEHV2khx"
-      },
-      "outputs": [],
-      "source": [
-        "pc.delete_index(index_name)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "iEUMlO8M2h4Y"
-      },
-      "source": [
-        "---"
+      "text/plain": [
+       "<IPython.core.display.Markdown object>"
       ]
+     },
+     "output_type": "display_data"
     }
-  ],
-  "metadata": {
-    "colab": {
-      "provenance": []
-    },
-    "gpuClass": "standard",
-    "kernelspec": {
-      "display_name": "Python 3",
-      "name": "python3"
-    },
-    "language_info": {
-      "name": "python"
-    }
+   ],
+   "source": [
+    "res = openai.ChatCompletion.create(\n",
+    "    model=\"gpt-4\",\n",
+    "    messages=[\n",
+    "        {\n",
+    "            \"role\": \"system\",\n",
+    "            \"content\": \"You are Q&A bot. A highly intelligent system that answers user questions\",\n",
+    "        },\n",
+    "        {\"role\": \"user\", \"content\": query},\n",
+    "    ],\n",
+    ")\n",
+    "display(Markdown(res[\"choices\"][0][\"message\"][\"content\"]))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "GcGon5672lBb"
+   },
+   "source": [
+    "Then we see something even worse than `\"I don't know\"` — hallucinations. Clearly augmenting our queries with additional context can make a huge difference to the performance of our system.\n",
+    "\n",
+    "Great, we've seen how to augment GPT-4 with semantic search to allow us to answer LangChain specific queries.\n",
+    "\n",
+    "Once you're finished, we delete the index to save resources."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {
+    "id": "Ah_vfEHV2khx"
+   },
+   "outputs": [],
+   "source": [
+    "pc.delete_index(index_name)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "iEUMlO8M2h4Y"
+   },
+   "source": [
+    "---"
+   ]
+  }
+ ],
+ "metadata": {
+  "colab": {
+   "provenance": []
+  },
+  "gpuClass": "standard",
+  "kernelspec": {
+   "display_name": "Python 3",
+   "name": "python3"
   },
-  "nbformat": 4,
-  "nbformat_minor": 0
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
 }
\ No newline at end of file

From a5e5c72184347ed55650dd311959b22d7964bb68 Mon Sep 17 00:00:00 2001
From: Jen Hamon <jhamon@pinecone.io>
Date: Thu, 29 Jan 2026 07:51:47 -0500
Subject: [PATCH 2/8] fix: resolve linting errors

- Replace bare except clauses with except Exception
- Remove f-string prefix from string without placeholders
- Remove unused imports (Concatenate, uuid4)
- Fix import sorting
---
 learn/generation/openai/gpt-4-langchain-docs.ipynb | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/learn/generation/openai/gpt-4-langchain-docs.ipynb b/learn/generation/openai/gpt-4-langchain-docs.ipynb
index 74393bf8..d3a4f2f5 100644
--- a/learn/generation/openai/gpt-4-langchain-docs.ipynb
+++ b/learn/generation/openai/gpt-4-langchain-docs.ipynb
@@ -433,8 +433,6 @@
     }
    ],
    "source": [
-    "from typing_extensions import Concatenate\n",
-    "from uuid import uuid4\n",
     "from tqdm.auto import tqdm\n",
     "\n",
     "chunks = []\n",
@@ -952,6 +950,7 @@
    ],
    "source": [
     "import os\n",
+    "\n",
     "import openai\n",
     "\n",
     "# get API key from top-right dropdown on OpenAI website\n",
@@ -1103,6 +1102,7 @@
    "outputs": [],
    "source": [
     "import os\n",
+    "\n",
     "from pinecone import Pinecone\n",
     "\n",
     "# initialize connection to pinecone (get API key at app.pinecone.io)\n",
@@ -1238,14 +1238,14 @@
     "    # create embeddings (try-except added to avoid RateLimitError)\n",
     "    try:\n",
     "        res = openai.Embedding.create(input=texts, engine=embed_model)\n",
-    "    except:\n",
+    "    except Exception:\n",
     "        done = False\n",
     "        while not done:\n",
     "            time.sleep(5)\n",
     "            try:\n",
     "                res = openai.Embedding.create(input=texts, engine=embed_model)\n",
     "                done = True\n",
-    "            except:\n",
+    "            except Exception:\n",
     "                pass\n",
     "    embeds = [record[\"embedding\"] for record in res[\"data\"]]\n",
     "    # cleanup metadata\n",
@@ -1955,7 +1955,7 @@
    "outputs": [],
    "source": [
     "# system message to 'prime' the model\n",
-    "primer = f\"\"\"You are Q&A bot. A highly intelligent system that answers\n",
+    "primer = \"\"\"You are Q&A bot. A highly intelligent system that answers\n",
     "user questions based on the information provided by the user above\n",
     "each question. If the information can not be found in the information\n",
     "provided by the user you truthfully say \"I don't know\".\n",

From 84c62c5fda4dde554d1690b627fbc258c5b8df8b Mon Sep 17 00:00:00 2001
From: Jen Hamon <jhamon@pinecone.io>
Date: Thu, 29 Jan 2026 07:53:43 -0500
Subject: [PATCH 3/8] refactor: consolidate imports into first code cell

Move all imports to a dedicated imports cell at the top of the notebook
to follow notebook guidelines.
---
 .../openai/gpt-4-langchain-docs.ipynb         | 4246 ++++++++---------
 1 file changed, 2120 insertions(+), 2126 deletions(-)

diff --git a/learn/generation/openai/gpt-4-langchain-docs.ipynb b/learn/generation/openai/gpt-4-langchain-docs.ipynb
index d3a4f2f5..e9f5d056 100644
--- a/learn/generation/openai/gpt-4-langchain-docs.ipynb
+++ b/learn/generation/openai/gpt-4-langchain-docs.ipynb
@@ -1,2207 +1,2201 @@
 {
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "GFLLl1Agum8O"
-   },
-   "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pinecone-io/examples/blob/master/learn/generation/openai/gpt-4-langchain-docs.ipynb) [![Open nbviewer](https://raw.githubusercontent.com/pinecone-io/examples/master/assets/nbviewer-shield.svg)](https://nbviewer.org/github/pinecone-io/examples/blob/master/learn/generation/openai/gpt-4-langchain-docs.ipynb)\n",
-    "\n",
-    "# GPT4 with Retrieval Augmentation over LangChain Docs\n",
-    "\n",
-    "[![Open nbviewer](https://raw.githubusercontent.com/pinecone-io/examples/master/assets/fast-link.svg)](https://github.com/pinecone-io/examples/blob/master/docs/gpt-4-langchain-docs.ipynb)\n",
-    "\n",
-    "In this notebook we'll work through an example of using GPT-4 with retrieval augmentation to answer questions about the LangChain Python library."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {
-    "id": "_HDKlQO5svqI"
-   },
-   "outputs": [],
-   "source": [
-    "!pip install -qU \\\n",
-    "  tiktoken==0.4.0 \\\n",
-    "  openai==0.27.7 \\\n",
-    "  langchain==0.0.179 \\\n",
-    "  pinecone \\\n",
-    "  datasets==2.13.1"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "7c1EpQ-jq7SU"
-   },
-   "source": [
-    "---\n",
-    "\n",
-    "🚨 _Note: the above `pip install` is formatted for Jupyter notebooks. If running elsewhere you may need to drop the `!`._\n",
-    "\n",
-    "---"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "NgUEJ6vDum8q"
-   },
-   "source": [
-    "In this example, we will download the LangChain docs, we can find a static version of the docs on Hugging Face datasets in `jamescalam/langchain-docs-23-06-27`. To download them we do:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 237,
-     "referenced_widgets": [
-      "63de2154fea24b49a87bf4b8428fa630",
-      "4b4cfb1a834342198c75a02d28448b57",
-      "a9d471008dc34f67a5307bbb26d6123c",
-      "580e5dd4c9d9497caa40802d5918e75c",
-      "bd09981e486d461eaa2cf166b32921e1",
-      "bed2dd81769b4910831cb34a7b475c72",
-      "ccad7c2aec604ee29b41497ec0f37fa7",
-      "390f06d63dd547d395dcf18f1ebe265d",
-      "6545006e51824be9b6cb5cdb2cb2ba5a",
-      "241b0de59e53465f8acad4ac74b17b57",
-      "05199362d95449699254c45c1d5cee94",
-      "6881722e02fe4395a5fcaf668cb7ebcb",
-      "2b960a7f46444ad3bd3392517b415f2d",
-      "a3e8499ed740449586ca31500038c7a8",
-      "08c52a0369b74e7da99574ec29612189",
-      "ffb822b2f739434dbe99e8a992716c30",
-      "7e2b88be1cae49da824e6c6c0782cb50",
-      "9f4e9da63bb64d279ded5ee1730b5cba",
-      "3b319c7a4f6f41ea9ea6e6268cd29343",
-      "908935a03fea42efbded99cd81de54c5",
-      "dd3ece4c242d4eae946f8bc4f95d1dbf",
-      "ae71cc7e26ee4b51b7eb67520f66c9bd",
-      "d83b0b3089c34bb58ddb1272a240c2f9",
-      "34d21f61f6dc499a9d1504634e470bdd",
-      "64aae9675d394df48d233b31e5f0eb3c",
-      "d1d3dde6ec3b483f8b14139a7d6a9ae0",
-      "690ca50e9785402bb17fa266f8e40ea9",
-      "482f891d61ab4c2080d95a9b84ea5c6d",
-      "622987b045e74a13b79553d3d062e72a",
-      "6c7236b0655e4397b3a9d5f4d83c03fe",
-      "6f7e876e10fd4c58aa2d1f1ed4ff2762",
-      "9a8b01998f8a4c6bb0bfe71e02b3352c",
-      "ec224feb9828415eb018831e985d22c0",
-      "a532b2307c734cf188092d40299c40ad",
-      "fab781bfae4647968aa69f19ae6a5754",
-      "5961b9e44ce14a2a8eb65a9e5b6be90d",
-      "5f15e4b12305489180e54c61769dcebe",
-      "324465ed674740c2a18a88a2633f2093",
-      "f82b21e87eba4e06a0531c791dc09b3f",
-      "5c0bb7407c844ae19479416752f66190",
-      "5ef6d125261b49679dcb4d886b3e382c",
-      "294d5fc4fa1e40429e08137934481ba2",
-      "f5d992e8c1224879be5e5464a424a3a4",
-      "7e828bf7b91e4029bc2093876128a78b"
-     ]
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "GFLLl1Agum8O"
+      },
+      "source": [
+        "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pinecone-io/examples/blob/master/learn/generation/openai/gpt-4-langchain-docs.ipynb) [![Open nbviewer](https://raw.githubusercontent.com/pinecone-io/examples/master/assets/nbviewer-shield.svg)](https://nbviewer.org/github/pinecone-io/examples/blob/master/learn/generation/openai/gpt-4-langchain-docs.ipynb)\n",
+        "\n",
+        "# GPT4 with Retrieval Augmentation over LangChain Docs\n",
+        "\n",
+        "[![Open nbviewer](https://raw.githubusercontent.com/pinecone-io/examples/master/assets/fast-link.svg)](https://github.com/pinecone-io/examples/blob/master/docs/gpt-4-langchain-docs.ipynb)\n",
+        "\n",
+        "In this notebook we'll work through an example of using GPT-4 with retrieval augmentation to answer questions about the LangChain Python library."
+      ]
     },
-    "id": "xo9gYhGPr_DQ",
-    "outputId": "016b896d-87a6-4d17-bad1-027475510a8b"
-   },
-   "outputs": [
     {
-     "output_type": "stream",
-     "text": [
-      "Downloading and preparing dataset json/jamescalam--langchain-docs-23-06-27 to /root/.cache/huggingface/datasets/jamescalam___json/jamescalam--langchain-docs-23-06-27-4631410d07444b03/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96...\n"
-     ]
+      "cell_type": "code",
+      "metadata": {
+        "id": "_HDKlQO5svqI"
+      },
+      "source": [
+        "!pip install -qU \\\n",
+        "  tiktoken==0.4.0 \\\n",
+        "  openai==0.27.7 \\\n",
+        "  langchain==0.0.179 \\\n",
+        "  pinecone \\\n",
+        "  datasets==2.13.1"
+      ],
+      "execution_count": 1,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {},
+      "source": [
+        "import os\n",
+        "import time\n",
+        "\n",
+        "import openai\n",
+        "import tiktoken\n",
+        "from datasets import load_dataset\n",
+        "from IPython.display import Markdown\n",
+        "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
+        "from pinecone import Pinecone, ServerlessSpec\n",
+        "from tqdm.auto import tqdm"
+      ],
+      "execution_count": null,
+      "outputs": []
     },
     {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "63de2154fea24b49a87bf4b8428fa630",
-       "version_major": 2,
-       "version_minor": 0
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "7c1EpQ-jq7SU"
       },
-      "text/plain": [
-       "Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]"
+      "source": [
+        "---\n",
+        "\n",
+        "🚨 _Note: the above `pip install` is formatted for Jupyter notebooks. If running elsewhere you may need to drop the `!`._\n",
+        "\n",
+        "---"
       ]
-     },
-     "output_type": "display_data"
     },
     {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "6881722e02fe4395a5fcaf668cb7ebcb",
-       "version_major": 2,
-       "version_minor": 0
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "NgUEJ6vDum8q"
       },
-      "text/plain": [
-       "Downloading data:   0%|          | 0.00/4.68M [00:00<?, ?B/s]"
+      "source": [
+        "In this example, we will download the LangChain docs, we can find a static version of the docs on Hugging Face datasets in `jamescalam/langchain-docs-23-06-27`. To download them we do:"
       ]
-     },
-     "output_type": "display_data"
     },
     {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "d83b0b3089c34bb58ddb1272a240c2f9",
-       "version_major": 2,
-       "version_minor": 0
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 237,
+          "referenced_widgets": [
+            "63de2154fea24b49a87bf4b8428fa630",
+            "4b4cfb1a834342198c75a02d28448b57",
+            "a9d471008dc34f67a5307bbb26d6123c",
+            "580e5dd4c9d9497caa40802d5918e75c",
+            "bd09981e486d461eaa2cf166b32921e1",
+            "bed2dd81769b4910831cb34a7b475c72",
+            "ccad7c2aec604ee29b41497ec0f37fa7",
+            "390f06d63dd547d395dcf18f1ebe265d",
+            "6545006e51824be9b6cb5cdb2cb2ba5a",
+            "241b0de59e53465f8acad4ac74b17b57",
+            "05199362d95449699254c45c1d5cee94",
+            "6881722e02fe4395a5fcaf668cb7ebcb",
+            "2b960a7f46444ad3bd3392517b415f2d",
+            "a3e8499ed740449586ca31500038c7a8",
+            "08c52a0369b74e7da99574ec29612189",
+            "ffb822b2f739434dbe99e8a992716c30",
+            "7e2b88be1cae49da824e6c6c0782cb50",
+            "9f4e9da63bb64d279ded5ee1730b5cba",
+            "3b319c7a4f6f41ea9ea6e6268cd29343",
+            "908935a03fea42efbded99cd81de54c5",
+            "dd3ece4c242d4eae946f8bc4f95d1dbf",
+            "ae71cc7e26ee4b51b7eb67520f66c9bd",
+            "d83b0b3089c34bb58ddb1272a240c2f9",
+            "34d21f61f6dc499a9d1504634e470bdd",
+            "64aae9675d394df48d233b31e5f0eb3c",
+            "d1d3dde6ec3b483f8b14139a7d6a9ae0",
+            "690ca50e9785402bb17fa266f8e40ea9",
+            "482f891d61ab4c2080d95a9b84ea5c6d",
+            "622987b045e74a13b79553d3d062e72a",
+            "6c7236b0655e4397b3a9d5f4d83c03fe",
+            "6f7e876e10fd4c58aa2d1f1ed4ff2762",
+            "9a8b01998f8a4c6bb0bfe71e02b3352c",
+            "ec224feb9828415eb018831e985d22c0",
+            "a532b2307c734cf188092d40299c40ad",
+            "fab781bfae4647968aa69f19ae6a5754",
+            "5961b9e44ce14a2a8eb65a9e5b6be90d",
+            "5f15e4b12305489180e54c61769dcebe",
+            "324465ed674740c2a18a88a2633f2093",
+            "f82b21e87eba4e06a0531c791dc09b3f",
+            "5c0bb7407c844ae19479416752f66190",
+            "5ef6d125261b49679dcb4d886b3e382c",
+            "294d5fc4fa1e40429e08137934481ba2",
+            "f5d992e8c1224879be5e5464a424a3a4",
+            "7e828bf7b91e4029bc2093876128a78b"
+          ]
+        },
+        "id": "xo9gYhGPr_DQ",
+        "outputId": "016b896d-87a6-4d17-bad1-027475510a8b"
       },
-      "text/plain": [
-       "Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]"
+      "source": [
+        "docs = load_dataset(\"jamescalam/langchain-docs-23-06-27\", split=\"train\")\n",
+        "docs"
+      ],
+      "execution_count": 2,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Downloading and preparing dataset json/jamescalam--langchain-docs-23-06-27 to /root/.cache/huggingface/datasets/jamescalam___json/jamescalam--langchain-docs-23-06-27-4631410d07444b03/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96...\n"
+          ]
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "63de2154fea24b49a87bf4b8428fa630",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]"
+            ]
+          }
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "6881722e02fe4395a5fcaf668cb7ebcb",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "Downloading data:   0%|          | 0.00/4.68M [00:00<?, ?B/s]"
+            ]
+          }
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "d83b0b3089c34bb58ddb1272a240c2f9",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]"
+            ]
+          }
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "a532b2307c734cf188092d40299c40ad",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "Generating train split: 0 examples [00:00, ? examples/s]"
+            ]
+          }
+        },
+        {
+          "output_type": "stream",
+          "text": [
+            "Dataset json downloaded and prepared to /root/.cache/huggingface/datasets/jamescalam___json/jamescalam--langchain-docs-23-06-27-4631410d07444b03/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96. Subsequent calls will reuse this data.\n"
+          ]
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "Dataset({\n",
+              "    features: ['id', 'text', 'url'],\n",
+              "    num_rows: 505\n",
+              "})"
+            ]
+          }
+        }
       ]
-     },
-     "output_type": "display_data"
     },
     {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "a532b2307c734cf188092d40299c40ad",
-       "version_major": 2,
-       "version_minor": 0
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ahFEI4U3vdxV"
       },
-      "text/plain": [
-       "Generating train split: 0 examples [00:00, ? examples/s]"
+      "source": [
+        "This leaves us with `505` doc pages. Let's take a look at the format each one contains:"
       ]
-     },
-     "output_type": "display_data"
     },
     {
-     "output_type": "stream",
-     "text": [
-      "Dataset json downloaded and prepared to /root/.cache/huggingface/datasets/jamescalam___json/jamescalam--langchain-docs-23-06-27-4631410d07444b03/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96. Subsequent calls will reuse this data.\n"
-     ]
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 52
+        },
+        "id": "BJuef8z1vfz4",
+        "outputId": "6c62ecbe-cf82-475d-b39f-d97ff02422fe"
+      },
+      "source": [
+        "docs[20][\"text\"][:200]"
+      ],
+      "execution_count": 3,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "string"
+            },
+            "text/plain": [
+              "'Example Selector\\uf0c1\\nLogic for selecting examples to include in prompts.\\nclass langchain.prompts.example_selector.LengthBasedExampleSelector(*, examples, example_prompt, get_text_length=<function _get_le'"
+            ]
+          }
+        }
+      ]
     },
     {
-     "data": {
-      "text/plain": [
-       "Dataset({\n",
-       "    features: ['id', 'text', 'url'],\n",
-       "    num_rows: 505\n",
-       "})"
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "jNfppr8fvhOX"
+      },
+      "source": [
+        "We access the plaintext page content like so:"
       ]
-     },
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from datasets import load_dataset\n",
-    "\n",
-    "docs = load_dataset(\"jamescalam/langchain-docs-23-06-27\", split=\"train\")\n",
-    "docs"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "ahFEI4U3vdxV"
-   },
-   "source": [
-    "This leaves us with `505` doc pages. Let's take a look at the format each one contains:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 52
     },
-    "id": "BJuef8z1vfz4",
-    "outputId": "6c62ecbe-cf82-475d-b39f-d97ff02422fe"
-   },
-   "outputs": [
     {
-     "data": {
-      "application/vnd.google.colaboratory.intrinsic+json": {
-       "type": "string"
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "vfdQLriyvjDk",
+        "outputId": "b7644566-ac2d-4dcf-dab3-1736191d357a"
       },
-      "text/plain": [
-       "'Example Selector\\uf0c1\\nLogic for selecting examples to include in prompts.\\nclass langchain.prompts.example_selector.LengthBasedExampleSelector(*, examples, example_prompt, get_text_length=<function _get_le'"
+      "source": [
+        "print(docs[20][\"text\"][:200])"
+      ],
+      "execution_count": 4,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Example Selector\n",
+            "Logic for selecting examples to include in prompts.\n",
+            "class langchain.prompts.example_selector.LengthBasedExampleSelector(*, examples, example_prompt, get_text_length=<function _get_le\n"
+          ]
+        }
       ]
-     },
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "docs[20][\"text\"][:200]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "jNfppr8fvhOX"
-   },
-   "source": [
-    "We access the plaintext page content like so:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/"
     },
-    "id": "vfdQLriyvjDk",
-    "outputId": "b7644566-ac2d-4dcf-dab3-1736191d357a"
-   },
-   "outputs": [
     {
-     "output_type": "stream",
-     "text": [
-      "Example Selector\n",
-      "Logic for selecting examples to include in prompts.\n",
-      "class langchain.prompts.example_selector.LengthBasedExampleSelector(*, examples, example_prompt, get_text_length=<function _get_le\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(docs[20][\"text\"][:200])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "r-mxgm-6vo9s"
-   },
-   "source": [
-    "We can also find the source of each document:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 35
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "r-mxgm-6vo9s"
+      },
+      "source": [
+        "We can also find the source of each document:"
+      ]
     },
-    "id": "NGUGao9_uNH3",
-    "outputId": "f3efbe72-d10c-4223-dd21-f38d02ad96d5"
-   },
-   "outputs": [
     {
-     "data": {
-      "application/vnd.google.colaboratory.intrinsic+json": {
-       "type": "string"
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 35
+        },
+        "id": "NGUGao9_uNH3",
+        "outputId": "f3efbe72-d10c-4223-dd21-f38d02ad96d5"
       },
-      "text/plain": [
-       "'https://api.python.langchain.com/en/latest/modules/example_selector.html'"
+      "source": [
+        "docs[20][\"url\"]"
+      ],
+      "execution_count": 5,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "string"
+            },
+            "text/plain": [
+              "'https://api.python.langchain.com/en/latest/modules/example_selector.html'"
+            ]
+          }
+        }
       ]
-     },
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "docs[20][\"url\"]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "ouY4rcx7z2oa"
-   },
-   "source": [
-    "Now let's see how we can process all of these. We will chunk everything into ~500 token chunks, we can do this easily with `langchain` and `tiktoken`:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 35
     },
-    "id": "Rb7KxUqYzsuV",
-    "outputId": "af54a189-b2e5-4b6d-9752-e74edd01b5e4"
-   },
-   "outputs": [
     {
-     "data": {
-      "application/vnd.google.colaboratory.intrinsic+json": {
-       "type": "string"
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ouY4rcx7z2oa"
       },
-      "text/plain": [
-       "'cl100k_base'"
+      "source": [
+        "Now let's see how we can process all of these. We will chunk everything into ~500 token chunks, we can do this easily with `langchain` and `tiktoken`:"
       ]
-     },
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "import tiktoken\n",
-    "\n",
-    "tokenizer_name = tiktoken.encoding_for_model(\"gpt-4\")\n",
-    "tokenizer_name.name"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {
-    "id": "N635Sgsbx_ME"
-   },
-   "outputs": [],
-   "source": [
-    "tokenizer = tiktoken.get_encoding(tokenizer_name.name)\n",
-    "\n",
-    "\n",
-    "# create the length function\n",
-    "def tiktoken_len(text):\n",
-    "    tokens = tokenizer.encode(text, disallowed_special=())\n",
-    "    return len(tokens)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {
-    "id": "OKO8e3Dp0dQS"
-   },
-   "outputs": [],
-   "source": [
-    "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
-    "\n",
-    "text_splitter = RecursiveCharacterTextSplitter(\n",
-    "    chunk_size=500,\n",
-    "    chunk_overlap=20,\n",
-    "    length_function=tiktoken_len,\n",
-    "    separators=[\"\\n\\n\", \"\\n\", \" \", \"\"],\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "bLdvW8eq06Zd"
-   },
-   "source": [
-    "Process the `docs` into more chunks using this approach."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 66,
-     "referenced_widgets": [
-      "01296cac12234000a13bdca80b31ba8b",
-      "930601ee00454f71b1114c4aaff0175b",
-      "e976d05935374e47b86773ca852cfa9e",
-      "bf9b29814dd04a22a7ff4ca1c6160c21",
-      "6d110cd070fe4776b9449de74759dff3",
-      "d670714b504847e3b72cd84510219ec7",
-      "037869180d9d4b1eb1bdbed67337e349",
-      "894a9b32ecc3404eb1213a8fa9ea38e2",
-      "5b14b2d018c74766954d580853eae7fc",
-      "41920d8d2aa44511814576dab37d96e7",
-      "d4c5704e6136468b910684e418074271"
-     ]
     },
-    "id": "uOdPyiAQ0uWs",
-    "outputId": "a36d52b2-810d-4422-cc82-105af8d1c83b"
-   },
-   "outputs": [
     {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "01296cac12234000a13bdca80b31ba8b",
-       "version_major": 2,
-       "version_minor": 0
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 35
+        },
+        "id": "Rb7KxUqYzsuV",
+        "outputId": "af54a189-b2e5-4b6d-9752-e74edd01b5e4"
       },
-      "text/plain": [
-       "  0%|          | 0/505 [00:00<?, ?it/s]"
+      "source": [
+        "tokenizer_name = tiktoken.encoding_for_model(\"gpt-4\")\n",
+        "tokenizer_name.name"
+      ],
+      "execution_count": 6,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "string"
+            },
+            "text/plain": [
+              "'cl100k_base'"
+            ]
+          }
+        }
       ]
-     },
-     "output_type": "display_data"
     },
     {
-     "data": {
-      "text/plain": [
-       "2482"
+      "cell_type": "code",
+      "metadata": {
+        "id": "N635Sgsbx_ME"
+      },
+      "source": [
+        "tokenizer = tiktoken.get_encoding(tokenizer_name.name)\n",
+        "\n",
+        "\n",
+        "# create the length function\n",
+        "def tiktoken_len(text):\n",
+        "    tokens = tokenizer.encode(text, disallowed_special=())\n",
+        "    return len(tokens)"
+      ],
+      "execution_count": 7,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "OKO8e3Dp0dQS"
+      },
+      "source": [
+        "text_splitter = RecursiveCharacterTextSplitter(\n",
+        "    chunk_size=500,\n",
+        "    chunk_overlap=20,\n",
+        "    length_function=tiktoken_len,\n",
+        "    separators=[\"\\n\\n\", \"\\n\", \" \", \"\"],\n",
+        ")"
+      ],
+      "execution_count": 8,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "bLdvW8eq06Zd"
+      },
+      "source": [
+        "Process the `docs` into more chunks using this approach."
       ]
-     },
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from tqdm.auto import tqdm\n",
-    "\n",
-    "chunks = []\n",
-    "\n",
-    "for page in tqdm(docs):\n",
-    "    if len(page[\"text\"]) < 200:\n",
-    "        # if page content is short we can skip\n",
-    "        continue\n",
-    "    texts = text_splitter.split_text(page[\"text\"])\n",
-    "    chunks.extend(\n",
-    "        [\n",
-    "            {\n",
-    "                \"id\": page[\"id\"] + f\"-{i}\",\n",
-    "                \"text\": texts[i],\n",
-    "                \"url\": page[\"url\"],\n",
-    "                \"chunk\": i,\n",
-    "            }\n",
-    "            for i in range(len(texts))\n",
-    "        ]\n",
-    "    )\n",
-    "len(chunks)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "JegURaAg2PuN"
-   },
-   "source": [
-    "Our chunks are ready so now we move onto embedding and indexing everything."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "zGIZbQqJ2WBh"
-   },
-   "source": [
-    "## Initialize Embedding Model\n",
-    "\n",
-    "We use `text-embedding-ada-002` as the embedding model. We can embed text like so:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/"
     },
-    "id": "p0U9_7Fium8u",
-    "outputId": "e2b285d4-dbde-4b2b-8624-bb515f6206cf"
-   },
-   "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "<OpenAIObject list at 0x7fc82e74dee0> JSON: {\n",
-       "  \"data\": [\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"whisper-1\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai-internal\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    },\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"babbage\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    },\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"davinci\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    },\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"text-davinci-edit-001\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    },\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"babbage-code-search-code\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai-dev\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    },\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"text-similarity-babbage-001\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai-dev\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    },\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"text-embedding-ada-002\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai-internal\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    },\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"code-davinci-edit-001\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    },\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"text-davinci-001\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    },\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"ada\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    },\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"babbage-code-search-text\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai-dev\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    },\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"babbage-similarity\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai-dev\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    },\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"code-search-babbage-text-001\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai-dev\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    },\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"text-curie-001\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    },\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"gpt-4-0314\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    },\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"gpt-4-0613\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    },\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"code-search-babbage-code-001\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai-dev\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    },\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"text-ada-001\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    },\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"text-similarity-ada-001\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai-dev\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    },\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"curie-instruct-beta\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    },\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"gpt-4\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    },\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"ada-code-search-code\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai-dev\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    },\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"ada-similarity\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai-dev\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    },\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"code-search-ada-text-001\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai-dev\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    },\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"text-search-ada-query-001\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai-dev\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    },\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"davinci-search-document\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai-dev\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    },\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"ada-code-search-text\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai-dev\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    },\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"text-search-ada-doc-001\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai-dev\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    },\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"davinci-instruct-beta\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    },\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"text-similarity-curie-001\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai-dev\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    },\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"code-search-ada-code-001\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai-dev\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    },\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"ada-search-query\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai-dev\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    },\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"text-search-davinci-query-001\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai-dev\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    },\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"curie-search-query\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai-dev\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    },\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"davinci-search-query\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai-dev\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    },\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"babbage-search-document\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai-dev\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    },\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"ada-search-document\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai-dev\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    },\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"text-search-curie-query-001\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai-dev\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    },\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"text-search-babbage-doc-001\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai-dev\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    },\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"curie-search-document\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai-dev\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    },\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"text-search-curie-doc-001\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai-dev\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    },\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"babbage-search-query\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai-dev\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    },\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"text-babbage-001\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    },\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"text-search-davinci-doc-001\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai-dev\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    },\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"text-search-babbage-query-001\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai-dev\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    },\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"curie-similarity\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai-dev\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    },\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"gpt-3.5-turbo-0613\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    },\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"curie\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    },\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"gpt-3.5-turbo-16k-0613\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    },\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"text-similarity-davinci-001\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai-dev\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    },\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"text-davinci-002\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    },\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"gpt-3.5-turbo-0301\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    },\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"text-davinci-003\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai-internal\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    },\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"davinci-similarity\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai-dev\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    },\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"gpt-3.5-turbo\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    },\n",
-       "    {\n",
-       "      \"created\": null,\n",
-       "      \"id\": \"gpt-3.5-turbo-16k\",\n",
-       "      \"object\": \"engine\",\n",
-       "      \"owner\": \"openai-internal\",\n",
-       "      \"permissions\": null,\n",
-       "      \"ready\": true\n",
-       "    }\n",
-       "  ],\n",
-       "  \"object\": \"list\"\n",
-       "}"
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 66,
+          "referenced_widgets": [
+            "01296cac12234000a13bdca80b31ba8b",
+            "930601ee00454f71b1114c4aaff0175b",
+            "e976d05935374e47b86773ca852cfa9e",
+            "bf9b29814dd04a22a7ff4ca1c6160c21",
+            "6d110cd070fe4776b9449de74759dff3",
+            "d670714b504847e3b72cd84510219ec7",
+            "037869180d9d4b1eb1bdbed67337e349",
+            "894a9b32ecc3404eb1213a8fa9ea38e2",
+            "5b14b2d018c74766954d580853eae7fc",
+            "41920d8d2aa44511814576dab37d96e7",
+            "d4c5704e6136468b910684e418074271"
+          ]
+        },
+        "id": "uOdPyiAQ0uWs",
+        "outputId": "a36d52b2-810d-4422-cc82-105af8d1c83b"
+      },
+      "source": [
+        "chunks = []\n",
+        "\n",
+        "for page in tqdm(docs):\n",
+        "    if len(page[\"text\"]) < 200:\n",
+        "        # if page content is short we can skip\n",
+        "        continue\n",
+        "    texts = text_splitter.split_text(page[\"text\"])\n",
+        "    chunks.extend(\n",
+        "        [\n",
+        "            {\n",
+        "                \"id\": page[\"id\"] + f\"-{i}\",\n",
+        "                \"text\": texts[i],\n",
+        "                \"url\": page[\"url\"],\n",
+        "                \"chunk\": i,\n",
+        "            }\n",
+        "            for i in range(len(texts))\n",
+        "        ]\n",
+        "    )\n",
+        "len(chunks)"
+      ],
+      "execution_count": 10,
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "01296cac12234000a13bdca80b31ba8b",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "  0%|          | 0/505 [00:00<?, ?it/s]"
+            ]
+          }
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "2482"
+            ]
+          }
+        }
       ]
-     },
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "import os\n",
-    "\n",
-    "import openai\n",
-    "\n",
-    "# get API key from top-right dropdown on OpenAI website\n",
-    "openai.api_key = os.getenv(\"OPENAI_API_KEY\") or \"OPENAI_API_KEY\"\n",
-    "\n",
-    "openai.Engine.list()  # check we have authenticated"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {
-    "id": "kteZ69Z5M55S"
-   },
-   "outputs": [],
-   "source": [
-    "embed_model = \"text-embedding-ada-002\"\n",
-    "\n",
-    "res = openai.Embedding.create(\n",
-    "    input=[\n",
-    "        \"Sample document text goes here\",\n",
-    "        \"there will be several phrases in each batch\",\n",
-    "    ],\n",
-    "    engine=embed_model,\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "aNZ7IWekNLbu"
-   },
-   "source": [
-    "In the response `res` we will find a JSON-like object containing our new embeddings within the `'data'` field."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/"
     },
-    "id": "esagZj6iNLPZ",
-    "outputId": "8e26f18a-4890-43ca-95e7-9e256e29e3be"
-   },
-   "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "dict_keys(['object', 'data', 'model', 'usage'])"
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "JegURaAg2PuN"
+      },
+      "source": [
+        "Our chunks are ready so now we move onto embedding and indexing everything."
       ]
-     },
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "res.keys()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "zStnHFpkNVIU"
-   },
-   "source": [
-    "Inside `'data'` we will find two records, one for each of the two sentences we just embedded. Each vector embedding contains `1536` dimensions (the output dimensionality of the `text-embedding-ada-002` model."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/"
     },
-    "id": "uVoP9VcINWAC",
-    "outputId": "d9f797af-0df8-4ee9-f779-8d8a62589134"
-   },
-   "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "2"
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "zGIZbQqJ2WBh"
+      },
+      "source": [
+        "## Initialize Embedding Model\n",
+        "\n",
+        "We use `text-embedding-ada-002` as the embedding model. We can embed text like so:"
       ]
-     },
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "len(res[\"data\"])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/"
     },
-    "id": "s-zraDCjNeC6",
-    "outputId": "5f09e471-28de-4c39-d040-a80def97708e"
-   },
-   "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "(1536, 1536)"
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "p0U9_7Fium8u",
+        "outputId": "e2b285d4-dbde-4b2b-8624-bb515f6206cf"
+      },
+      "source": [
+        "# get API key from top-right dropdown on OpenAI website\n",
+        "openai.api_key = os.getenv(\"OPENAI_API_KEY\") or \"OPENAI_API_KEY\"\n",
+        "\n",
+        "openai.Engine.list()  # check we have authenticated"
+      ],
+      "execution_count": 11,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "<OpenAIObject list at 0x7fc82e74dee0> JSON: {\n",
+              "  \"data\": [\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"whisper-1\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai-internal\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    },\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"babbage\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    },\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"davinci\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    },\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"text-davinci-edit-001\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    },\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"babbage-code-search-code\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai-dev\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    },\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"text-similarity-babbage-001\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai-dev\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    },\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"text-embedding-ada-002\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai-internal\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    },\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"code-davinci-edit-001\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    },\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"text-davinci-001\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    },\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"ada\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    },\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"babbage-code-search-text\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai-dev\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    },\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"babbage-similarity\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai-dev\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    },\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"code-search-babbage-text-001\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai-dev\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    },\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"text-curie-001\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    },\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"gpt-4-0314\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    },\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"gpt-4-0613\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    },\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"code-search-babbage-code-001\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai-dev\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    },\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"text-ada-001\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    },\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"text-similarity-ada-001\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai-dev\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    },\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"curie-instruct-beta\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    },\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"gpt-4\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    },\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"ada-code-search-code\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai-dev\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    },\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"ada-similarity\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai-dev\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    },\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"code-search-ada-text-001\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai-dev\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    },\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"text-search-ada-query-001\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai-dev\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    },\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"davinci-search-document\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai-dev\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    },\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"ada-code-search-text\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai-dev\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    },\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"text-search-ada-doc-001\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai-dev\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    },\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"davinci-instruct-beta\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    },\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"text-similarity-curie-001\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai-dev\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    },\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"code-search-ada-code-001\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai-dev\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    },\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"ada-search-query\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai-dev\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    },\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"text-search-davinci-query-001\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai-dev\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    },\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"curie-search-query\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai-dev\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    },\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"davinci-search-query\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai-dev\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    },\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"babbage-search-document\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai-dev\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    },\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"ada-search-document\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai-dev\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    },\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"text-search-curie-query-001\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai-dev\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    },\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"text-search-babbage-doc-001\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai-dev\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    },\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"curie-search-document\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai-dev\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    },\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"text-search-curie-doc-001\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai-dev\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    },\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"babbage-search-query\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai-dev\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    },\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"text-babbage-001\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    },\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"text-search-davinci-doc-001\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai-dev\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    },\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"text-search-babbage-query-001\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai-dev\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    },\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"curie-similarity\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai-dev\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    },\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"gpt-3.5-turbo-0613\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    },\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"curie\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    },\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"gpt-3.5-turbo-16k-0613\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    },\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"text-similarity-davinci-001\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai-dev\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    },\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"text-davinci-002\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    },\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"gpt-3.5-turbo-0301\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    },\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"text-davinci-003\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai-internal\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    },\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"davinci-similarity\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai-dev\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    },\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"gpt-3.5-turbo\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    },\n",
+              "    {\n",
+              "      \"created\": null,\n",
+              "      \"id\": \"gpt-3.5-turbo-16k\",\n",
+              "      \"object\": \"engine\",\n",
+              "      \"owner\": \"openai-internal\",\n",
+              "      \"permissions\": null,\n",
+              "      \"ready\": true\n",
+              "    }\n",
+              "  ],\n",
+              "  \"object\": \"list\"\n",
+              "}"
+            ]
+          }
+        }
       ]
-     },
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "len(res[\"data\"][0][\"embedding\"]), len(res[\"data\"][1][\"embedding\"])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "XPd41MjANhmp"
-   },
-   "source": [
-    "We will apply this same embedding logic to the langchain docs dataset we've just scraped. But before doing so we must create a place to store the embeddings."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "WPi4MZvMNvUH"
-   },
-   "source": [
-    "## Initializing the Index"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "H5RRQArrN2lN"
-   },
-   "source": [
-    "Now we need a place to store these embeddings and enable a efficient vector search through them all. To do that we use Pinecone, we can get a [free API key](https://app.pinecone.io/) and enter it below where we will initialize our connection to Pinecone and create a new index."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "\n",
-    "from pinecone import Pinecone\n",
-    "\n",
-    "# initialize connection to pinecone (get API key at app.pinecone.io)\n",
-    "api_key = os.environ.get(\"PINECONE_API_KEY\") or \"PINECONE_API_KEY\"\n",
-    "\n",
-    "# configure client\n",
-    "pc = Pinecone(api_key=api_key)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Now we setup our index specification, this allows us to define the cloud provider and region where we want to deploy our index. You can find a list of all [available providers and regions here](https://docs.pinecone.io/docs/projects)."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from pinecone import ServerlessSpec\n",
-    "\n",
-    "cloud = os.environ.get(\"PINECONE_CLOUD\") or \"aws\"\n",
-    "region = os.environ.get(\"PINECONE_REGION\") or \"us-east-1\"\n",
-    "\n",
-    "spec = ServerlessSpec(cloud=cloud, region=region)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {
-    "id": "2GQAnohhum8v",
-    "tags": [
-     "parameters"
-    ]
-   },
-   "outputs": [],
-   "source": [
-    "index_name = \"gpt-4-langchain-docs\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import time\n",
-    "\n",
-    "# check if index already exists (it shouldn't if this is first time)\n",
-    "if index_name not in pc.list_indexes().names():\n",
-    "    # if does not exist, create index\n",
-    "    pc.create_index(\n",
-    "        index_name,\n",
-    "        dimension=len(res[\"data\"][0][\"embedding\"]),\n",
-    "        metric=\"cosine\",\n",
-    "        spec=spec,\n",
-    "    )\n",
-    "    # wait for index to be initialized\n",
-    "    while not pc.describe_index(index_name).status[\"ready\"]:\n",
-    "        time.sleep(1)\n",
-    "\n",
-    "# connect to index\n",
-    "index = pc.Index(index_name)\n",
-    "# view index stats\n",
-    "index.describe_index_stats()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "ezSTzN2rPa2o"
-   },
-   "source": [
-    "We can see the index is currently empty with a `total_vector_count` of `0`. We can begin populating it with OpenAI `text-embedding-ada-002` built embeddings like so:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 49,
-     "referenced_widgets": [
-      "760c608de89946298cb6845d5ff1b020",
-      "f6f7d673d7a145bda593848f7e87ca2c",
-      "effb0c1b07574547aca5956963b371c8",
-      "e6e0b0054fb5449c84ad745308510ddb",
-      "b1e6d4d46b334bcf96efcab6f57c7536",
-      "e5a120d5b9494d14a142fbf519bcbbdf",
-      "78fe5eb48ae748bda91ddc70f422212c",
-      "34e43d6a7a92453490c45e39498afd64",
-      "45c7fb32593141abb8168b8077e31f59",
-      "0ed96243151440a18994669e2f85e819",
-      "05a0a1ebc92f463d9f3e953e51742a85"
-     ]
     },
-    "id": "iZbFbulAPeop",
-    "outputId": "97cbb020-f6f9-4914-ff14-dd472354f64a"
-   },
-   "outputs": [
     {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "760c608de89946298cb6845d5ff1b020",
-       "version_major": 2,
-       "version_minor": 0
+      "cell_type": "code",
+      "metadata": {
+        "id": "kteZ69Z5M55S"
       },
-      "text/plain": [
-       "  0%|          | 0/25 [00:00<?, ?it/s]"
+      "source": [
+        "embed_model = \"text-embedding-ada-002\"\n",
+        "\n",
+        "res = openai.Embedding.create(\n",
+        "    input=[\n",
+        "        \"Sample document text goes here\",\n",
+        "        \"there will be several phrases in each batch\",\n",
+        "    ],\n",
+        "    engine=embed_model,\n",
+        ")"
+      ],
+      "execution_count": 12,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "aNZ7IWekNLbu"
+      },
+      "source": [
+        "In the response `res` we will find a JSON-like object containing our new embeddings within the `'data'` field."
       ]
-     },
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "from tqdm.auto import tqdm\n",
-    "\n",
-    "batch_size = 100  # how many embeddings we create and insert at once\n",
-    "\n",
-    "for i in tqdm(range(0, len(chunks), batch_size)):\n",
-    "    # find end of batch\n",
-    "    i_end = min(len(chunks), i + batch_size)\n",
-    "    meta_batch = chunks[i:i_end]\n",
-    "    # get ids\n",
-    "    ids_batch = [x[\"id\"] for x in meta_batch]\n",
-    "    # get texts to encode\n",
-    "    texts = [x[\"text\"] for x in meta_batch]\n",
-    "    # create embeddings (try-except added to avoid RateLimitError)\n",
-    "    try:\n",
-    "        res = openai.Embedding.create(input=texts, engine=embed_model)\n",
-    "    except Exception:\n",
-    "        done = False\n",
-    "        while not done:\n",
-    "            time.sleep(5)\n",
-    "            try:\n",
-    "                res = openai.Embedding.create(input=texts, engine=embed_model)\n",
-    "                done = True\n",
-    "            except Exception:\n",
-    "                pass\n",
-    "    embeds = [record[\"embedding\"] for record in res[\"data\"]]\n",
-    "    # cleanup metadata\n",
-    "    meta_batch = [\n",
-    "        {\"text\": x[\"text\"], \"chunk\": x[\"chunk\"], \"url\": x[\"url\"]} for x in meta_batch\n",
-    "    ]\n",
-    "    to_upsert = list(zip(ids_batch, embeds, meta_batch))\n",
-    "    # upsert to Pinecone\n",
-    "    index.upsert(vectors=to_upsert)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "YttJOrEtQIF9"
-   },
-   "source": [
-    "Now we've added all of our langchain docs to the index. With that we can move on to retrieval and then answer generation using GPT-4."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "FumVmMRlQQ7w"
-   },
-   "source": [
-    "## Retrieval"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "nLRODeL-QTJ9"
-   },
-   "source": [
-    "To search through our documents we first need to create a query vector `xq`. Using `xq` we will retrieve the most relevant chunks from the LangChain docs, like so:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {
-    "id": "FMUPdX9cQQYC"
-   },
-   "outputs": [],
-   "source": [
-    "query = \"how do I use the LLMChain in LangChain?\"\n",
-    "\n",
-    "res = openai.Embedding.create(input=[query], engine=embed_model)\n",
-    "\n",
-    "# retrieve from Pinecone\n",
-    "xq = res[\"data\"][0][\"embedding\"]\n",
-    "\n",
-    "# get relevant contexts (including the questions)\n",
-    "res = index.query(vector=xq, top_k=5, include_metadata=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/"
     },
-    "id": "zl9SrFPkQjg-",
-    "outputId": "f7e6e60b-ad81-4aae-89a9-3e2f04a65ccd"
-   },
-   "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "{'matches': [{'id': '35afffd0-a42a-42ee-ac6f-92b5491183fb-0',\n",
-       "              'metadata': {'chunk': 0.0,\n",
-       "                           'text': 'Source code for langchain.chains.llm\\n'\n",
-       "                                   '\"\"\"Chain that just formats a prompt and '\n",
-       "                                   'calls an LLM.\"\"\"\\n'\n",
-       "                                   'from __future__ import annotations\\n'\n",
-       "                                   'import warnings\\n'\n",
-       "                                   'from typing import Any, Dict, List, '\n",
-       "                                   'Optional, Sequence, Tuple, Union\\n'\n",
-       "                                   'from pydantic import Extra, Field\\n'\n",
-       "                                   'from langchain.base_language import '\n",
-       "                                   'BaseLanguageModel\\n'\n",
-       "                                   'from langchain.callbacks.manager import (\\n'\n",
-       "                                   '    AsyncCallbackManager,\\n'\n",
-       "                                   '    AsyncCallbackManagerForChainRun,\\n'\n",
-       "                                   '    CallbackManager,\\n'\n",
-       "                                   '    CallbackManagerForChainRun,\\n'\n",
-       "                                   '    Callbacks,\\n'\n",
-       "                                   ')\\n'\n",
-       "                                   'from langchain.chains.base import Chain\\n'\n",
-       "                                   'from langchain.input import '\n",
-       "                                   'get_colored_text\\n'\n",
-       "                                   'from langchain.load.dump import dumpd\\n'\n",
-       "                                   'from langchain.prompts.base import '\n",
-       "                                   'BasePromptTemplate\\n'\n",
-       "                                   'from langchain.prompts.prompt import '\n",
-       "                                   'PromptTemplate\\n'\n",
-       "                                   'from langchain.schema import (\\n'\n",
-       "                                   '    BaseLLMOutputParser,\\n'\n",
-       "                                   '    LLMResult,\\n'\n",
-       "                                   '    NoOpOutputParser,\\n'\n",
-       "                                   '    PromptValue,\\n'\n",
-       "                                   ')\\n'\n",
-       "                                   '[docs]class LLMChain(Chain):\\n'\n",
-       "                                   '    \"\"\"Chain to run queries against LLMs.\\n'\n",
-       "                                   '    Example:\\n'\n",
-       "                                   '        .. code-block:: python\\n'\n",
-       "                                   '            from langchain import '\n",
-       "                                   'LLMChain, OpenAI, PromptTemplate\\n'\n",
-       "                                   '            prompt_template = \"Tell me a '\n",
-       "                                   '{adjective} joke\"\\n'\n",
-       "                                   '            prompt = PromptTemplate(\\n'\n",
-       "                                   '                '\n",
-       "                                   'input_variables=[\"adjective\"], '\n",
-       "                                   'template=prompt_template\\n'\n",
-       "                                   '            )\\n'\n",
-       "                                   '            llm = LLMChain(llm=OpenAI(), '\n",
-       "                                   'prompt=prompt)\\n'\n",
-       "                                   '    \"\"\"\\n'\n",
-       "                                   '    @property\\n'\n",
-       "                                   '    def lc_serializable(self) -> bool:\\n'\n",
-       "                                   '        return True\\n'\n",
-       "                                   '    prompt: BasePromptTemplate\\n'\n",
-       "                                   '    \"\"\"Prompt object to use.\"\"\"\\n'\n",
-       "                                   '    llm: BaseLanguageModel\\n'\n",
-       "                                   '    \"\"\"Language model to call.\"\"\"\\n'\n",
-       "                                   '    output_key: str = \"text\"  #: :meta '\n",
-       "                                   'private:\\n'\n",
-       "                                   '    output_parser: BaseLLMOutputParser = '\n",
-       "                                   'Field(default_factory=NoOpOutputParser)\\n'\n",
-       "                                   '    \"\"\"Output parser to use.\\n'\n",
-       "                                   '    Defaults to one that takes the most '\n",
-       "                                   'likely string but does not change it \\n'\n",
-       "                                   '    otherwise.\"\"\"\\n'\n",
-       "                                   '    return_final_only: bool = True\\n'\n",
-       "                                   '    \"\"\"Whether to return only the final '\n",
-       "                                   'parsed result. Defaults to True.\\n'\n",
-       "                                   '    If false, will return a bunch of extra '\n",
-       "                                   'information about the generation.\"\"\"\\n'\n",
-       "                                   '    llm_kwargs: dict = '\n",
-       "                                   'Field(default_factory=dict)\\n'\n",
-       "                                   '    class Config:\\n'\n",
-       "                                   '        \"\"\"Configuration for this pydantic '\n",
-       "                                   'object.\"\"\"\\n'\n",
-       "                                   '        extra = Extra.forbid\\n'\n",
-       "                                   '        arbitrary_types_allowed = True',\n",
-       "                           'url': 'https://api.python.langchain.com/en/latest/_modules/langchain/chains/llm.html'},\n",
-       "              'score': 0.800940871,\n",
-       "              'values': []},\n",
-       "             {'id': '35cde68a-b909-43b6-b918-81c4eb2db5cd-82',\n",
-       "              'metadata': {'chunk': 82.0,\n",
-       "                           'text': 'Bases: langchain.chains.base.Chain\\n'\n",
-       "                                   'Chain for question-answering with '\n",
-       "                                   'self-verification.\\n'\n",
-       "                                   'Example\\n'\n",
-       "                                   'from langchain import OpenAI, '\n",
-       "                                   'LLMSummarizationCheckerChain\\n'\n",
-       "                                   'llm = OpenAI(temperature=0.0)\\n'\n",
-       "                                   'checker_chain = '\n",
-       "                                   'LLMSummarizationCheckerChain.from_llm(llm)\\n'\n",
-       "                                   'Parameters\\n'\n",
-       "                                   'memory '\n",
-       "                                   '(Optional[langchain.schema.BaseMemory]) '\n",
-       "                                   '– \\n'\n",
-       "                                   'callbacks '\n",
-       "                                   '(Optional[Union[List[langchain.callbacks.base.BaseCallbackHandler], '\n",
-       "                                   'langchain.callbacks.base.BaseCallbackManager]]) '\n",
-       "                                   '– \\n'\n",
-       "                                   'callback_manager '\n",
-       "                                   '(Optional[langchain.callbacks.base.BaseCallbackManager]) '\n",
-       "                                   '– \\n'\n",
-       "                                   'verbose (bool) – \\n'\n",
-       "                                   'tags (Optional[List[str]]) – \\n'\n",
-       "                                   'sequential_chain '\n",
-       "                                   '(langchain.chains.sequential.SequentialChain) '\n",
-       "                                   '– \\n'\n",
-       "                                   'llm '\n",
-       "                                   '(Optional[langchain.base_language.BaseLanguageModel]) '\n",
-       "                                   '– \\n'\n",
-       "                                   'create_assertions_prompt '\n",
-       "                                   '(langchain.prompts.prompt.PromptTemplate) '\n",
-       "                                   '– \\n'\n",
-       "                                   'check_assertions_prompt '\n",
-       "                                   '(langchain.prompts.prompt.PromptTemplate) '\n",
-       "                                   '– \\n'\n",
-       "                                   'revised_summary_prompt '\n",
-       "                                   '(langchain.prompts.prompt.PromptTemplate) '\n",
-       "                                   '– \\n'\n",
-       "                                   'are_all_true_prompt '\n",
-       "                                   '(langchain.prompts.prompt.PromptTemplate) '\n",
-       "                                   '– \\n'\n",
-       "                                   'input_key (str) – \\n'\n",
-       "                                   'output_key (str) – \\n'\n",
-       "                                   'max_checks (int) – \\n'\n",
-       "                                   'Return type\\n'\n",
-       "                                   'None',\n",
-       "                           'url': 'https://api.python.langchain.com/en/latest/modules/chains.html'},\n",
-       "              'score': 0.79580605,\n",
-       "              'values': []},\n",
-       "             {'id': '993db45b-4e3b-431d-a2a6-48ed5944912a-1',\n",
-       "              'metadata': {'chunk': 1.0,\n",
-       "                           'text': '[docs]    @classmethod\\n'\n",
-       "                                   '    def from_llm(\\n'\n",
-       "                                   '        cls,\\n'\n",
-       "                                   '        llm: BaseLanguageModel,\\n'\n",
-       "                                   '        chain: LLMChain,\\n'\n",
-       "                                   '        critique_prompt: '\n",
-       "                                   'BasePromptTemplate = CRITIQUE_PROMPT,\\n'\n",
-       "                                   '        revision_prompt: '\n",
-       "                                   'BasePromptTemplate = REVISION_PROMPT,\\n'\n",
-       "                                   '        **kwargs: Any,\\n'\n",
-       "                                   '    ) -> \"ConstitutionalChain\":\\n'\n",
-       "                                   '        \"\"\"Create a chain from an LLM.\"\"\"\\n'\n",
-       "                                   '        critique_chain = LLMChain(llm=llm, '\n",
-       "                                   'prompt=critique_prompt)\\n'\n",
-       "                                   '        revision_chain = LLMChain(llm=llm, '\n",
-       "                                   'prompt=revision_prompt)\\n'\n",
-       "                                   '        return cls(\\n'\n",
-       "                                   '            chain=chain,\\n'\n",
-       "                                   '            '\n",
-       "                                   'critique_chain=critique_chain,\\n'\n",
-       "                                   '            '\n",
-       "                                   'revision_chain=revision_chain,\\n'\n",
-       "                                   '            **kwargs,\\n'\n",
-       "                                   '        )\\n'\n",
-       "                                   '    @property\\n'\n",
-       "                                   '    def input_keys(self) -> List[str]:\\n'\n",
-       "                                   '        \"\"\"Defines the input keys.\"\"\"\\n'\n",
-       "                                   '        return self.chain.input_keys\\n'\n",
-       "                                   '    @property\\n'\n",
-       "                                   '    def output_keys(self) -> List[str]:\\n'\n",
-       "                                   '        \"\"\"Defines the output keys.\"\"\"\\n'\n",
-       "                                   '        if '\n",
-       "                                   'self.return_intermediate_steps:\\n'\n",
-       "                                   '            return [\"output\", '\n",
-       "                                   '\"critiques_and_revisions\", '\n",
-       "                                   '\"initial_output\"]\\n'\n",
-       "                                   '        return [\"output\"]\\n'\n",
-       "                                   '    def _call(\\n'\n",
-       "                                   '        self,\\n'\n",
-       "                                   '        inputs: Dict[str, Any],\\n'\n",
-       "                                   '        run_manager: '\n",
-       "                                   'Optional[CallbackManagerForChainRun] = '\n",
-       "                                   'None,\\n'\n",
-       "                                   '    ) -> Dict[str, Any]:\\n'\n",
-       "                                   '        _run_manager = run_manager or '\n",
-       "                                   'CallbackManagerForChainRun.get_noop_manager()\\n'\n",
-       "                                   '        response = self.chain.run(\\n'\n",
-       "                                   '            **inputs,\\n'\n",
-       "                                   '            '\n",
-       "                                   'callbacks=_run_manager.get_child(\"original\"),\\n'\n",
-       "                                   '        )\\n'\n",
-       "                                   '        initial_response = response\\n'\n",
-       "                                   '        input_prompt = '\n",
-       "                                   'self.chain.prompt.format(**inputs)\\n'\n",
-       "                                   '        _run_manager.on_text(\\n'\n",
-       "                                   '            text=\"Initial response: \" + '\n",
-       "                                   'response + \"\\\\n\\\\n\",\\n'\n",
-       "                                   '            verbose=self.verbose,\\n'\n",
-       "                                   '            color=\"yellow\",\\n'\n",
-       "                                   '        )\\n'\n",
-       "                                   '        critiques_and_revisions = []\\n'\n",
-       "                                   '        for constitutional_principle in '\n",
-       "                                   'self.constitutional_principles:\\n'\n",
-       "                                   '            # Do critique\\n'\n",
-       "                                   '            raw_critique = '\n",
-       "                                   'self.critique_chain.run(\\n'\n",
-       "                                   '                '\n",
-       "                                   'input_prompt=input_prompt,\\n'\n",
-       "                                   '                '\n",
-       "                                   'output_from_model=response,\\n'\n",
-       "                                   '                '\n",
-       "                                   'critique_request=constitutional_principle.critique_request,\\n'\n",
-       "                                   '                '\n",
-       "                                   'callbacks=_run_manager.get_child(\"critique\"),\\n'\n",
-       "                                   '            )\\n'\n",
-       "                                   '            critique = '\n",
-       "                                   'self._parse_critique(\\n'\n",
-       "                                   '                '\n",
-       "                                   'output_string=raw_critique,',\n",
-       "                           'url': 'https://api.python.langchain.com/en/latest/_modules/langchain/chains/constitutional_ai/base.html'},\n",
-       "              'score': 0.79369247,\n",
-       "              'values': []},\n",
-       "             {'id': 'adea5d40-2691-4bc9-9403-3360345bc25e-0',\n",
-       "              'metadata': {'chunk': 0.0,\n",
-       "                           'text': 'Source code for '\n",
-       "                                   'langchain.chains.conversation.base\\n'\n",
-       "                                   '\"\"\"Chain that carries on a conversation '\n",
-       "                                   'and calls an LLM.\"\"\"\\n'\n",
-       "                                   'from typing import Dict, List\\n'\n",
-       "                                   'from pydantic import Extra, Field, '\n",
-       "                                   'root_validator\\n'\n",
-       "                                   'from langchain.chains.conversation.prompt '\n",
-       "                                   'import PROMPT\\n'\n",
-       "                                   'from langchain.chains.llm import LLMChain\\n'\n",
-       "                                   'from langchain.memory.buffer import '\n",
-       "                                   'ConversationBufferMemory\\n'\n",
-       "                                   'from langchain.prompts.base import '\n",
-       "                                   'BasePromptTemplate\\n'\n",
-       "                                   'from langchain.schema import BaseMemory\\n'\n",
-       "                                   '[docs]class ConversationChain(LLMChain):\\n'\n",
-       "                                   '    \"\"\"Chain to have a conversation and '\n",
-       "                                   'load context from memory.\\n'\n",
-       "                                   '    Example:\\n'\n",
-       "                                   '        .. code-block:: python\\n'\n",
-       "                                   '            from langchain import '\n",
-       "                                   'ConversationChain, OpenAI\\n'\n",
-       "                                   '            conversation = '\n",
-       "                                   'ConversationChain(llm=OpenAI())\\n'\n",
-       "                                   '    \"\"\"\\n'\n",
-       "                                   '    memory: BaseMemory = '\n",
-       "                                   'Field(default_factory=ConversationBufferMemory)\\n'\n",
-       "                                   '    \"\"\"Default memory store.\"\"\"\\n'\n",
-       "                                   '    prompt: BasePromptTemplate = PROMPT\\n'\n",
-       "                                   '    \"\"\"Default conversation prompt to '\n",
-       "                                   'use.\"\"\"\\n'\n",
-       "                                   '    input_key: str = \"input\"  #: :meta '\n",
-       "                                   'private:\\n'\n",
-       "                                   '    output_key: str = \"response\"  #: :meta '\n",
-       "                                   'private:\\n'\n",
-       "                                   '    class Config:\\n'\n",
-       "                                   '        \"\"\"Configuration for this pydantic '\n",
-       "                                   'object.\"\"\"\\n'\n",
-       "                                   '        extra = Extra.forbid\\n'\n",
-       "                                   '        arbitrary_types_allowed = True\\n'\n",
-       "                                   '    @property\\n'\n",
-       "                                   '    def input_keys(self) -> List[str]:\\n'\n",
-       "                                   '        \"\"\"Use this since so some prompt '\n",
-       "                                   'vars come from history.\"\"\"\\n'\n",
-       "                                   '        return [self.input_key]\\n'\n",
-       "                                   '    @root_validator()\\n'\n",
-       "                                   '    def '\n",
-       "                                   'validate_prompt_input_variables(cls, '\n",
-       "                                   'values: Dict) -> Dict:\\n'\n",
-       "                                   '        \"\"\"Validate that prompt input '\n",
-       "                                   'variables are consistent.\"\"\"\\n'\n",
-       "                                   '        memory_keys = '\n",
-       "                                   'values[\"memory\"].memory_variables\\n'\n",
-       "                                   '        input_key = values[\"input_key\"]\\n'\n",
-       "                                   '        if input_key in memory_keys:\\n'\n",
-       "                                   '            raise ValueError(\\n'\n",
-       "                                   '                f\"The input key '\n",
-       "                                   '{input_key} was also found in the memory '\n",
-       "                                   'keys \"\\n'\n",
-       "                                   '                f\"({memory_keys}) - please '\n",
-       "                                   'provide keys that don\\'t overlap.\"\\n'\n",
-       "                                   '            )\\n'\n",
-       "                                   '        prompt_variables = '\n",
-       "                                   'values[\"prompt\"].input_variables\\n'\n",
-       "                                   '        expected_keys = memory_keys + '\n",
-       "                                   '[input_key]\\n'\n",
-       "                                   '        if set(expected_keys) != '\n",
-       "                                   'set(prompt_variables):\\n'\n",
-       "                                   '            raise ValueError(\\n'\n",
-       "                                   '                \"Got unexpected prompt '\n",
-       "                                   'input variables. The prompt expects \"\\n'\n",
-       "                                   '                f\"{prompt_variables}, but '\n",
-       "                                   'got {memory_keys} as inputs from \"\\n'\n",
-       "                                   '                f\"memory, and {input_key} '\n",
-       "                                   'as the normal input key.\"\\n'\n",
-       "                                   '            )\\n'\n",
-       "                                   '        return values',\n",
-       "                           'url': 'https://api.python.langchain.com/en/latest/_modules/langchain/chains/conversation/base.html'},\n",
-       "              'score': 0.792259932,\n",
-       "              'values': []},\n",
-       "             {'id': '3b6f9660-0346-4992-a6f5-b9cc2977f446-5',\n",
-       "              'metadata': {'chunk': 5.0,\n",
-       "                           'text': 'callbacks: Callbacks = None,\\n'\n",
-       "                                   '        **kwargs: Any,\\n'\n",
-       "                                   '    ) -> '\n",
-       "                                   'BaseConversationalRetrievalChain:\\n'\n",
-       "                                   '        \"\"\"Load chain from LLM.\"\"\"\\n'\n",
-       "                                   '        combine_docs_chain_kwargs = '\n",
-       "                                   'combine_docs_chain_kwargs or {}\\n'\n",
-       "                                   '        doc_chain = load_qa_chain(\\n'\n",
-       "                                   '            llm,\\n'\n",
-       "                                   '            chain_type=chain_type,\\n'\n",
-       "                                   '            callbacks=callbacks,\\n'\n",
-       "                                   '            **combine_docs_chain_kwargs,\\n'\n",
-       "                                   '        )\\n'\n",
-       "                                   '        condense_question_chain = '\n",
-       "                                   'LLMChain(\\n'\n",
-       "                                   '            llm=llm, '\n",
-       "                                   'prompt=condense_question_prompt, '\n",
-       "                                   'callbacks=callbacks\\n'\n",
-       "                                   '        )\\n'\n",
-       "                                   '        return cls(\\n'\n",
-       "                                   '            vectorstore=vectorstore,\\n'\n",
-       "                                   '            combine_docs_chain=doc_chain,\\n'\n",
-       "                                   '            '\n",
-       "                                   'question_generator=condense_question_chain,\\n'\n",
-       "                                   '            callbacks=callbacks,\\n'\n",
-       "                                   '            **kwargs,\\n'\n",
-       "                                   '        )',\n",
-       "                           'url': 'https://api.python.langchain.com/en/latest/_modules/langchain/chains/conversational_retrieval/base.html'},\n",
-       "              'score': 0.791279614,\n",
-       "              'values': []}],\n",
-       " 'namespace': ''}"
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "esagZj6iNLPZ",
+        "outputId": "8e26f18a-4890-43ca-95e7-9e256e29e3be"
+      },
+      "source": [
+        "res.keys()"
+      ],
+      "execution_count": 13,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "dict_keys(['object', 'data', 'model', 'usage'])"
+            ]
+          }
+        }
       ]
-     },
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "res"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "MoBSiDLIUADZ"
-   },
-   "source": [
-    "With retrieval complete, we move on to feeding these into GPT-4 to produce answers."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "qfzS4-6-UXgX"
-   },
-   "source": [
-    "## Retrieval Augmented Generation"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "XPC1jQaKUcy0"
-   },
-   "source": [
-    "GPT-4 is currently accessed via the `ChatCompletions` endpoint of OpenAI. To add the information we retrieved into the model, we need to pass it into our user prompts *alongside* our original query. We can do that like so:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "metadata": {
-    "id": "unZstoHNUHeG"
-   },
-   "outputs": [],
-   "source": [
-    "# get list of retrieved text\n",
-    "contexts = [item[\"metadata\"][\"text\"] for item in res[\"matches\"]]\n",
-    "\n",
-    "augmented_query = \"\\n\\n---\\n\\n\".join(contexts) + \"\\n\\n-----\\n\\n\" + query"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/"
     },
-    "id": "LRcEHm0Z9fXE",
-    "outputId": "636c6825-ecd1-4953-ee25-ebabcb3a2fed"
-   },
-   "outputs": [
     {
-     "output_type": "stream",
-     "text": [
-      "Source code for langchain.chains.llm\n",
-      "\"\"\"Chain that just formats a prompt and calls an LLM.\"\"\"\n",
-      "from __future__ import annotations\n",
-      "import warnings\n",
-      "from typing import Any, Dict, List, Optional, Sequence, Tuple, Union\n",
-      "from pydantic import Extra, Field\n",
-      "from langchain.base_language import BaseLanguageModel\n",
-      "from langchain.callbacks.manager import (\n",
-      "    AsyncCallbackManager,\n",
-      "    AsyncCallbackManagerForChainRun,\n",
-      "    CallbackManager,\n",
-      "    CallbackManagerForChainRun,\n",
-      "    Callbacks,\n",
-      ")\n",
-      "from langchain.chains.base import Chain\n",
-      "from langchain.input import get_colored_text\n",
-      "from langchain.load.dump import dumpd\n",
-      "from langchain.prompts.base import BasePromptTemplate\n",
-      "from langchain.prompts.prompt import PromptTemplate\n",
-      "from langchain.schema import (\n",
-      "    BaseLLMOutputParser,\n",
-      "    LLMResult,\n",
-      "    NoOpOutputParser,\n",
-      "    PromptValue,\n",
-      ")\n",
-      "[docs]class LLMChain(Chain):\n",
-      "    \"\"\"Chain to run queries against LLMs.\n",
-      "    Example:\n",
-      "        .. code-block:: python\n",
-      "            from langchain import LLMChain, OpenAI, PromptTemplate\n",
-      "            prompt_template = \"Tell me a {adjective} joke\"\n",
-      "            prompt = PromptTemplate(\n",
-      "                input_variables=[\"adjective\"], template=prompt_template\n",
-      "            )\n",
-      "            llm = LLMChain(llm=OpenAI(), prompt=prompt)\n",
-      "    \"\"\"\n",
-      "    @property\n",
-      "    def lc_serializable(self) -> bool:\n",
-      "        return True\n",
-      "    prompt: BasePromptTemplate\n",
-      "    \"\"\"Prompt object to use.\"\"\"\n",
-      "    llm: BaseLanguageModel\n",
-      "    \"\"\"Language model to call.\"\"\"\n",
-      "    output_key: str = \"text\"  #: :meta private:\n",
-      "    output_parser: BaseLLMOutputParser = Field(default_factory=NoOpOutputParser)\n",
-      "    \"\"\"Output parser to use.\n",
-      "    Defaults to one that takes the most likely string but does not change it \n",
-      "    otherwise.\"\"\"\n",
-      "    return_final_only: bool = True\n",
-      "    \"\"\"Whether to return only the final parsed result. Defaults to True.\n",
-      "    If false, will return a bunch of extra information about the generation.\"\"\"\n",
-      "    llm_kwargs: dict = Field(default_factory=dict)\n",
-      "    class Config:\n",
-      "        \"\"\"Configuration for this pydantic object.\"\"\"\n",
-      "        extra = Extra.forbid\n",
-      "        arbitrary_types_allowed = True\n",
-      "\n",
-      "---\n",
-      "\n",
-      "Bases: langchain.chains.base.Chain\n",
-      "Chain for question-answering with self-verification.\n",
-      "Example\n",
-      "from langchain import OpenAI, LLMSummarizationCheckerChain\n",
-      "llm = OpenAI(temperature=0.0)\n",
-      "checker_chain = LLMSummarizationCheckerChain.from_llm(llm)\n",
-      "Parameters\n",
-      "memory (Optional[langchain.schema.BaseMemory]) – \n",
-      "callbacks (Optional[Union[List[langchain.callbacks.base.BaseCallbackHandler], langchain.callbacks.base.BaseCallbackManager]]) – \n",
-      "callback_manager (Optional[langchain.callbacks.base.BaseCallbackManager]) – \n",
-      "verbose (bool) – \n",
-      "tags (Optional[List[str]]) – \n",
-      "sequential_chain (langchain.chains.sequential.SequentialChain) – \n",
-      "llm (Optional[langchain.base_language.BaseLanguageModel]) – \n",
-      "create_assertions_prompt (langchain.prompts.prompt.PromptTemplate) – \n",
-      "check_assertions_prompt (langchain.prompts.prompt.PromptTemplate) – \n",
-      "revised_summary_prompt (langchain.prompts.prompt.PromptTemplate) – \n",
-      "are_all_true_prompt (langchain.prompts.prompt.PromptTemplate) – \n",
-      "input_key (str) – \n",
-      "output_key (str) – \n",
-      "max_checks (int) – \n",
-      "Return type\n",
-      "None\n",
-      "\n",
-      "---\n",
-      "\n",
-      "[docs]    @classmethod\n",
-      "    def from_llm(\n",
-      "        cls,\n",
-      "        llm: BaseLanguageModel,\n",
-      "        chain: LLMChain,\n",
-      "        critique_prompt: BasePromptTemplate = CRITIQUE_PROMPT,\n",
-      "        revision_prompt: BasePromptTemplate = REVISION_PROMPT,\n",
-      "        **kwargs: Any,\n",
-      "    ) -> \"ConstitutionalChain\":\n",
-      "        \"\"\"Create a chain from an LLM.\"\"\"\n",
-      "        critique_chain = LLMChain(llm=llm, prompt=critique_prompt)\n",
-      "        revision_chain = LLMChain(llm=llm, prompt=revision_prompt)\n",
-      "        return cls(\n",
-      "            chain=chain,\n",
-      "            critique_chain=critique_chain,\n",
-      "            revision_chain=revision_chain,\n",
-      "            **kwargs,\n",
-      "        )\n",
-      "    @property\n",
-      "    def input_keys(self) -> List[str]:\n",
-      "        \"\"\"Defines the input keys.\"\"\"\n",
-      "        return self.chain.input_keys\n",
-      "    @property\n",
-      "    def output_keys(self) -> List[str]:\n",
-      "        \"\"\"Defines the output keys.\"\"\"\n",
-      "        if self.return_intermediate_steps:\n",
-      "            return [\"output\", \"critiques_and_revisions\", \"initial_output\"]\n",
-      "        return [\"output\"]\n",
-      "    def _call(\n",
-      "        self,\n",
-      "        inputs: Dict[str, Any],\n",
-      "        run_manager: Optional[CallbackManagerForChainRun] = None,\n",
-      "    ) -> Dict[str, Any]:\n",
-      "        _run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()\n",
-      "        response = self.chain.run(\n",
-      "            **inputs,\n",
-      "            callbacks=_run_manager.get_child(\"original\"),\n",
-      "        )\n",
-      "        initial_response = response\n",
-      "        input_prompt = self.chain.prompt.format(**inputs)\n",
-      "        _run_manager.on_text(\n",
-      "            text=\"Initial response: \" + response + \"\\n\\n\",\n",
-      "            verbose=self.verbose,\n",
-      "            color=\"yellow\",\n",
-      "        )\n",
-      "        critiques_and_revisions = []\n",
-      "        for constitutional_principle in self.constitutional_principles:\n",
-      "            # Do critique\n",
-      "            raw_critique = self.critique_chain.run(\n",
-      "                input_prompt=input_prompt,\n",
-      "                output_from_model=response,\n",
-      "                critique_request=constitutional_principle.critique_request,\n",
-      "                callbacks=_run_manager.get_child(\"critique\"),\n",
-      "            )\n",
-      "            critique = self._parse_critique(\n",
-      "                output_string=raw_critique,\n",
-      "\n",
-      "---\n",
-      "\n",
-      "Source code for langchain.chains.conversation.base\n",
-      "\"\"\"Chain that carries on a conversation and calls an LLM.\"\"\"\n",
-      "from typing import Dict, List\n",
-      "from pydantic import Extra, Field, root_validator\n",
-      "from langchain.chains.conversation.prompt import PROMPT\n",
-      "from langchain.chains.llm import LLMChain\n",
-      "from langchain.memory.buffer import ConversationBufferMemory\n",
-      "from langchain.prompts.base import BasePromptTemplate\n",
-      "from langchain.schema import BaseMemory\n",
-      "[docs]class ConversationChain(LLMChain):\n",
-      "    \"\"\"Chain to have a conversation and load context from memory.\n",
-      "    Example:\n",
-      "        .. code-block:: python\n",
-      "            from langchain import ConversationChain, OpenAI\n",
-      "            conversation = ConversationChain(llm=OpenAI())\n",
-      "    \"\"\"\n",
-      "    memory: BaseMemory = Field(default_factory=ConversationBufferMemory)\n",
-      "    \"\"\"Default memory store.\"\"\"\n",
-      "    prompt: BasePromptTemplate = PROMPT\n",
-      "    \"\"\"Default conversation prompt to use.\"\"\"\n",
-      "    input_key: str = \"input\"  #: :meta private:\n",
-      "    output_key: str = \"response\"  #: :meta private:\n",
-      "    class Config:\n",
-      "        \"\"\"Configuration for this pydantic object.\"\"\"\n",
-      "        extra = Extra.forbid\n",
-      "        arbitrary_types_allowed = True\n",
-      "    @property\n",
-      "    def input_keys(self) -> List[str]:\n",
-      "        \"\"\"Use this since so some prompt vars come from history.\"\"\"\n",
-      "        return [self.input_key]\n",
-      "    @root_validator()\n",
-      "    def validate_prompt_input_variables(cls, values: Dict) -> Dict:\n",
-      "        \"\"\"Validate that prompt input variables are consistent.\"\"\"\n",
-      "        memory_keys = values[\"memory\"].memory_variables\n",
-      "        input_key = values[\"input_key\"]\n",
-      "        if input_key in memory_keys:\n",
-      "            raise ValueError(\n",
-      "                f\"The input key {input_key} was also found in the memory keys \"\n",
-      "                f\"({memory_keys}) - please provide keys that don't overlap.\"\n",
-      "            )\n",
-      "        prompt_variables = values[\"prompt\"].input_variables\n",
-      "        expected_keys = memory_keys + [input_key]\n",
-      "        if set(expected_keys) != set(prompt_variables):\n",
-      "            raise ValueError(\n",
-      "                \"Got unexpected prompt input variables. The prompt expects \"\n",
-      "                f\"{prompt_variables}, but got {memory_keys} as inputs from \"\n",
-      "                f\"memory, and {input_key} as the normal input key.\"\n",
-      "            )\n",
-      "        return values\n",
-      "\n",
-      "---\n",
-      "\n",
-      "callbacks: Callbacks = None,\n",
-      "        **kwargs: Any,\n",
-      "    ) -> BaseConversationalRetrievalChain:\n",
-      "        \"\"\"Load chain from LLM.\"\"\"\n",
-      "        combine_docs_chain_kwargs = combine_docs_chain_kwargs or {}\n",
-      "        doc_chain = load_qa_chain(\n",
-      "            llm,\n",
-      "            chain_type=chain_type,\n",
-      "            callbacks=callbacks,\n",
-      "            **combine_docs_chain_kwargs,\n",
-      "        )\n",
-      "        condense_question_chain = LLMChain(\n",
-      "            llm=llm, prompt=condense_question_prompt, callbacks=callbacks\n",
-      "        )\n",
-      "        return cls(\n",
-      "            vectorstore=vectorstore,\n",
-      "            combine_docs_chain=doc_chain,\n",
-      "            question_generator=condense_question_chain,\n",
-      "            callbacks=callbacks,\n",
-      "            **kwargs,\n",
-      "        )\n",
-      "\n",
-      "-----\n",
-      "\n",
-      "how do I use the LLMChain in LangChain?\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(augmented_query)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "sihH_GMiV5_p"
-   },
-   "source": [
-    "Now we ask the question:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 28,
-   "metadata": {
-    "id": "IThBqBi8V70d"
-   },
-   "outputs": [],
-   "source": [
-    "# system message to 'prime' the model\n",
-    "primer = \"\"\"You are Q&A bot. A highly intelligent system that answers\n",
-    "user questions based on the information provided by the user above\n",
-    "each question. If the information can not be found in the information\n",
-    "provided by the user you truthfully say \"I don't know\".\n",
-    "\"\"\"\n",
-    "\n",
-    "res = openai.ChatCompletion.create(\n",
-    "    model=\"gpt-4\",\n",
-    "    messages=[\n",
-    "        {\"role\": \"system\", \"content\": primer},\n",
-    "        {\"role\": \"user\", \"content\": augmented_query},\n",
-    "    ],\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "QvS1yJhOWpiJ"
-   },
-   "source": [
-    "To display this response nicely, we will display it in markdown."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 29,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 465
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "zStnHFpkNVIU"
+      },
+      "source": [
+        "Inside `'data'` we will find two records, one for each of the two sentences we just embedded. Each vector embedding contains `1536` dimensions (the output dimensionality of the `text-embedding-ada-002` model."
+      ]
     },
-    "id": "RDo2qeMHWto1",
-    "outputId": "9a9b677f-9b4f-4f77-822d-80baf75ed04a"
-   },
-   "outputs": [
     {
-     "data": {
-      "text/markdown": [
-       "To use the LLMChain in LangChain, you need to first import the necessary modules and classes. In this example, we will use the OpenAI language model. Follow the steps below:\n",
-       "\n",
-       "1. Import all required modules and classes:\n",
-       "\n",
-       "```python\n",
-       "from langchain import LLMChain, OpenAI, PromptTemplate\n",
-       "```\n",
-       "\n",
-       "2. Define the prompt template you want to use with the language model. For example, if you want to create jokes based on provided adjectives:\n",
-       "\n",
-       "```python\n",
-       "prompt_template = \"Tell me a {adjective} joke\"\n",
-       "```\n",
-       "\n",
-       "3. Create a PromptTemplate object passing the input_variables and template:\n",
-       "\n",
-       "```python\n",
-       "prompt = PromptTemplate(input_variables=[\"adjective\"], template=prompt_template)\n",
-       "```\n",
-       "\n",
-       "4. Instantiate the OpenAI language model:\n",
-       "\n",
-       "```python\n",
-       "llm = OpenAI()\n",
-       "```\n",
-       "\n",
-       "5. Create the LLMChain object using the OpenAI language model and the created prompt:\n",
-       "\n",
-       "```python\n",
-       "llm_chain = LLMChain(llm=llm, prompt=prompt)\n",
-       "```\n",
-       "\n",
-       "Now you can use the `llm_chain` object to generate jokes based on provided adjectives. For example:\n",
-       "\n",
-       "```python\n",
-       "response = llm_chain.run(adjective=\"funny\")\n",
-       "print(response)\n",
-       "```\n",
-       "\n",
-       "This will generate and print a funny joke based on the predefined prompt template. Replace `\"funny\"` with any other adjective to get a different result."
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "uVoP9VcINWAC",
+        "outputId": "d9f797af-0df8-4ee9-f779-8d8a62589134"
+      },
+      "source": [
+        "len(res[\"data\"])"
       ],
-      "text/plain": [
-       "<IPython.core.display.Markdown object>"
+      "execution_count": 14,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "2"
+            ]
+          }
+        }
       ]
-     },
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "from IPython.display import Markdown\n",
-    "\n",
-    "display(Markdown(res[\"choices\"][0][\"message\"][\"content\"]))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "eJ-a8MHg0eYQ"
-   },
-   "source": [
-    "Let's compare this to a non-augmented query..."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 30,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 46
     },
-    "id": "vwhaSgdF0ZDX",
-    "outputId": "ce085b0f-e0da-4c00-f3f5-43b49e64568c"
-   },
-   "outputs": [
     {
-     "data": {
-      "text/markdown": [
-       "I don't know."
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "s-zraDCjNeC6",
+        "outputId": "5f09e471-28de-4c39-d040-a80def97708e"
+      },
+      "source": [
+        "len(res[\"data\"][0][\"embedding\"]), len(res[\"data\"][1][\"embedding\"])"
       ],
-      "text/plain": [
-       "<IPython.core.display.Markdown object>"
+      "execution_count": 15,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "(1536, 1536)"
+            ]
+          }
+        }
       ]
-     },
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "res = openai.ChatCompletion.create(\n",
-    "    model=\"gpt-4\",\n",
-    "    messages=[\n",
-    "        {\"role\": \"system\", \"content\": primer},\n",
-    "        {\"role\": \"user\", \"content\": query},\n",
-    "    ],\n",
-    ")\n",
-    "display(Markdown(res[\"choices\"][0][\"message\"][\"content\"]))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "5CSsA-dW0m_P"
-   },
-   "source": [
-    "If we drop the `\"I don't know\"` part of the `primer`?"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 31,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 371
     },
-    "id": "Z3svdTCZ0iJ2",
-    "outputId": "19673965-a2f8-45be-b82a-6e491aa88416"
-   },
-   "outputs": [
     {
-     "data": {
-      "text/markdown": [
-       "LLMChain, which stands for LangChain's Language Model Chain, is a feature within the LangChain ecosystem that allows connecting multiple language models to achieve more accurate translations and processing of natural language data.\n",
-       "\n",
-       "To use the LLMChain in LangChain, follow these steps:\n",
-       "\n",
-       "1. Sign up or log in: If you don't have an account with LangChain, sign up or log in to your existing account.\n",
-       "\n",
-       "2. Configure the LLMChain: Navigate to the LLMChain settings or configuration page (it may be under \"Settings\" or \"LLMChain Configuration\"). Here, you'll add, remove, or re-order language models in your chain.\n",
-       "\n",
-       "3. Add language models: Choose from the available language models and add them to your chain. Typically, language models are selected based on their performance or scope for specific language pairs or types of text.\n",
-       "\n",
-       "4. Set the order of language models: Arrange the order of the language models in your chain based on your preferences or needs. The LLMChain will process the input text in the order you've set, starting from the first model, and pass the output to the subsequent models in the chain.\n",
-       "\n",
-       "5. Test the LLMChain: Once you have configured your LLMChain, test it by inputting text and reviewing the generated translations or processed output. This step will allow you to fine-tune the chain to ensure optimal performance.\n",
-       "\n",
-       "6. Use the LLMChain in your translation projects or language processing tasks: With your LLMChain set up and tested, you can now use it for your translation or language processing needs.\n",
-       "\n",
-       "Remember that the LLMChain is part of the LangChain ecosystem, so any changes or modifications to it may require some knowledge of the platform and its interface. If needed, consult the official documentation or seek support from the community to ensure a seamless experience."
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "XPd41MjANhmp"
+      },
+      "source": [
+        "We will apply this same embedding logic to the langchain docs dataset we've just scraped. But before doing so we must create a place to store the embeddings."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "WPi4MZvMNvUH"
+      },
+      "source": [
+        "## Initializing the Index"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "H5RRQArrN2lN"
+      },
+      "source": [
+        "Now we need a place to store these embeddings and enable a efficient vector search through them all. To do that we use Pinecone, we can get a [free API key](https://app.pinecone.io/) and enter it below where we will initialize our connection to Pinecone and create a new index."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {},
+      "source": [
+        "# initialize connection to pinecone (get API key at app.pinecone.io)\n",
+        "api_key = os.environ.get(\"PINECONE_API_KEY\") or \"PINECONE_API_KEY\"\n",
+        "\n",
+        "# configure client\n",
+        "pc = Pinecone(api_key=api_key)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Now we setup our index specification, this allows us to define the cloud provider and region where we want to deploy our index. You can find a list of all [available providers and regions here](https://docs.pinecone.io/docs/projects)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {},
+      "source": [
+        "cloud = os.environ.get(\"PINECONE_CLOUD\") or \"aws\"\n",
+        "region = os.environ.get(\"PINECONE_REGION\") or \"us-east-1\"\n",
+        "\n",
+        "spec = ServerlessSpec(cloud=cloud, region=region)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "2GQAnohhum8v",
+        "tags": [
+          "parameters"
+        ]
+      },
+      "source": [
+        "index_name = \"gpt-4-langchain-docs\""
+      ],
+      "execution_count": 17,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {},
+      "source": [
+        "# check if index already exists (it shouldn't if this is first time)\n",
+        "if index_name not in pc.list_indexes().names():\n",
+        "    # if does not exist, create index\n",
+        "    pc.create_index(\n",
+        "        index_name,\n",
+        "        dimension=len(res[\"data\"][0][\"embedding\"]),\n",
+        "        metric=\"cosine\",\n",
+        "        spec=spec,\n",
+        "    )\n",
+        "    # wait for index to be initialized\n",
+        "    while not pc.describe_index(index_name).status[\"ready\"]:\n",
+        "        time.sleep(1)\n",
+        "\n",
+        "# connect to index\n",
+        "index = pc.Index(index_name)\n",
+        "# view index stats\n",
+        "index.describe_index_stats()"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ezSTzN2rPa2o"
+      },
+      "source": [
+        "We can see the index is currently empty with a `total_vector_count` of `0`. We can begin populating it with OpenAI `text-embedding-ada-002` built embeddings like so:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 49,
+          "referenced_widgets": [
+            "760c608de89946298cb6845d5ff1b020",
+            "f6f7d673d7a145bda593848f7e87ca2c",
+            "effb0c1b07574547aca5956963b371c8",
+            "e6e0b0054fb5449c84ad745308510ddb",
+            "b1e6d4d46b334bcf96efcab6f57c7536",
+            "e5a120d5b9494d14a142fbf519bcbbdf",
+            "78fe5eb48ae748bda91ddc70f422212c",
+            "34e43d6a7a92453490c45e39498afd64",
+            "45c7fb32593141abb8168b8077e31f59",
+            "0ed96243151440a18994669e2f85e819",
+            "05a0a1ebc92f463d9f3e953e51742a85"
+          ]
+        },
+        "id": "iZbFbulAPeop",
+        "outputId": "97cbb020-f6f9-4914-ff14-dd472354f64a"
+      },
+      "source": [
+        "batch_size = 100  # how many embeddings we create and insert at once\n",
+        "\n",
+        "for i in tqdm(range(0, len(chunks), batch_size)):\n",
+        "    # find end of batch\n",
+        "    i_end = min(len(chunks), i + batch_size)\n",
+        "    meta_batch = chunks[i:i_end]\n",
+        "    # get ids\n",
+        "    ids_batch = [x[\"id\"] for x in meta_batch]\n",
+        "    # get texts to encode\n",
+        "    texts = [x[\"text\"] for x in meta_batch]\n",
+        "    # create embeddings (try-except added to avoid RateLimitError)\n",
+        "    try:\n",
+        "        res = openai.Embedding.create(input=texts, engine=embed_model)\n",
+        "    except Exception:\n",
+        "        done = False\n",
+        "        while not done:\n",
+        "            time.sleep(5)\n",
+        "            try:\n",
+        "                res = openai.Embedding.create(input=texts, engine=embed_model)\n",
+        "                done = True\n",
+        "            except Exception:\n",
+        "                pass\n",
+        "    embeds = [record[\"embedding\"] for record in res[\"data\"]]\n",
+        "    # cleanup metadata\n",
+        "    meta_batch = [\n",
+        "        {\"text\": x[\"text\"], \"chunk\": x[\"chunk\"], \"url\": x[\"url\"]} for x in meta_batch\n",
+        "    ]\n",
+        "    to_upsert = list(zip(ids_batch, embeds, meta_batch))\n",
+        "    # upsert to Pinecone\n",
+        "    index.upsert(vectors=to_upsert)"
+      ],
+      "execution_count": 19,
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "760c608de89946298cb6845d5ff1b020",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "  0%|          | 0/25 [00:00<?, ?it/s]"
+            ]
+          }
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "YttJOrEtQIF9"
+      },
+      "source": [
+        "Now we've added all of our langchain docs to the index. With that we can move on to retrieval and then answer generation using GPT-4."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "FumVmMRlQQ7w"
+      },
+      "source": [
+        "## Retrieval"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "nLRODeL-QTJ9"
+      },
+      "source": [
+        "To search through our documents we first need to create a query vector `xq`. Using `xq` we will retrieve the most relevant chunks from the LangChain docs, like so:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "FMUPdX9cQQYC"
+      },
+      "source": [
+        "query = \"how do I use the LLMChain in LangChain?\"\n",
+        "\n",
+        "res = openai.Embedding.create(input=[query], engine=embed_model)\n",
+        "\n",
+        "# retrieve from Pinecone\n",
+        "xq = res[\"data\"][0][\"embedding\"]\n",
+        "\n",
+        "# get relevant contexts (including the questions)\n",
+        "res = index.query(vector=xq, top_k=5, include_metadata=True)"
+      ],
+      "execution_count": 20,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "zl9SrFPkQjg-",
+        "outputId": "f7e6e60b-ad81-4aae-89a9-3e2f04a65ccd"
+      },
+      "source": [
+        "res"
+      ],
+      "execution_count": 21,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "{'matches': [{'id': '35afffd0-a42a-42ee-ac6f-92b5491183fb-0',\n",
+              "              'metadata': {'chunk': 0.0,\n",
+              "                           'text': 'Source code for langchain.chains.llm\\n'\n",
+              "                                   '\"\"\"Chain that just formats a prompt and '\n",
+              "                                   'calls an LLM.\"\"\"\\n'\n",
+              "                                   'from __future__ import annotations\\n'\n",
+              "                                   'import warnings\\n'\n",
+              "                                   'from typing import Any, Dict, List, '\n",
+              "                                   'Optional, Sequence, Tuple, Union\\n'\n",
+              "                                   'from pydantic import Extra, Field\\n'\n",
+              "                                   'from langchain.base_language import '\n",
+              "                                   'BaseLanguageModel\\n'\n",
+              "                                   'from langchain.callbacks.manager import (\\n'\n",
+              "                                   '    AsyncCallbackManager,\\n'\n",
+              "                                   '    AsyncCallbackManagerForChainRun,\\n'\n",
+              "                                   '    CallbackManager,\\n'\n",
+              "                                   '    CallbackManagerForChainRun,\\n'\n",
+              "                                   '    Callbacks,\\n'\n",
+              "                                   ')\\n'\n",
+              "                                   'from langchain.chains.base import Chain\\n'\n",
+              "                                   'from langchain.input import '\n",
+              "                                   'get_colored_text\\n'\n",
+              "                                   'from langchain.load.dump import dumpd\\n'\n",
+              "                                   'from langchain.prompts.base import '\n",
+              "                                   'BasePromptTemplate\\n'\n",
+              "                                   'from langchain.prompts.prompt import '\n",
+              "                                   'PromptTemplate\\n'\n",
+              "                                   'from langchain.schema import (\\n'\n",
+              "                                   '    BaseLLMOutputParser,\\n'\n",
+              "                                   '    LLMResult,\\n'\n",
+              "                                   '    NoOpOutputParser,\\n'\n",
+              "                                   '    PromptValue,\\n'\n",
+              "                                   ')\\n'\n",
+              "                                   '[docs]class LLMChain(Chain):\\n'\n",
+              "                                   '    \"\"\"Chain to run queries against LLMs.\\n'\n",
+              "                                   '    Example:\\n'\n",
+              "                                   '        .. code-block:: python\\n'\n",
+              "                                   '            from langchain import '\n",
+              "                                   'LLMChain, OpenAI, PromptTemplate\\n'\n",
+              "                                   '            prompt_template = \"Tell me a '\n",
+              "                                   '{adjective} joke\"\\n'\n",
+              "                                   '            prompt = PromptTemplate(\\n'\n",
+              "                                   '                '\n",
+              "                                   'input_variables=[\"adjective\"], '\n",
+              "                                   'template=prompt_template\\n'\n",
+              "                                   '            )\\n'\n",
+              "                                   '            llm = LLMChain(llm=OpenAI(), '\n",
+              "                                   'prompt=prompt)\\n'\n",
+              "                                   '    \"\"\"\\n'\n",
+              "                                   '    @property\\n'\n",
+              "                                   '    def lc_serializable(self) -> bool:\\n'\n",
+              "                                   '        return True\\n'\n",
+              "                                   '    prompt: BasePromptTemplate\\n'\n",
+              "                                   '    \"\"\"Prompt object to use.\"\"\"\\n'\n",
+              "                                   '    llm: BaseLanguageModel\\n'\n",
+              "                                   '    \"\"\"Language model to call.\"\"\"\\n'\n",
+              "                                   '    output_key: str = \"text\"  #: :meta '\n",
+              "                                   'private:\\n'\n",
+              "                                   '    output_parser: BaseLLMOutputParser = '\n",
+              "                                   'Field(default_factory=NoOpOutputParser)\\n'\n",
+              "                                   '    \"\"\"Output parser to use.\\n'\n",
+              "                                   '    Defaults to one that takes the most '\n",
+              "                                   'likely string but does not change it \\n'\n",
+              "                                   '    otherwise.\"\"\"\\n'\n",
+              "                                   '    return_final_only: bool = True\\n'\n",
+              "                                   '    \"\"\"Whether to return only the final '\n",
+              "                                   'parsed result. Defaults to True.\\n'\n",
+              "                                   '    If false, will return a bunch of extra '\n",
+              "                                   'information about the generation.\"\"\"\\n'\n",
+              "                                   '    llm_kwargs: dict = '\n",
+              "                                   'Field(default_factory=dict)\\n'\n",
+              "                                   '    class Config:\\n'\n",
+              "                                   '        \"\"\"Configuration for this pydantic '\n",
+              "                                   'object.\"\"\"\\n'\n",
+              "                                   '        extra = Extra.forbid\\n'\n",
+              "                                   '        arbitrary_types_allowed = True',\n",
+              "                           'url': 'https://api.python.langchain.com/en/latest/_modules/langchain/chains/llm.html'},\n",
+              "              'score': 0.800940871,\n",
+              "              'values': []},\n",
+              "             {'id': '35cde68a-b909-43b6-b918-81c4eb2db5cd-82',\n",
+              "              'metadata': {'chunk': 82.0,\n",
+              "                           'text': 'Bases: langchain.chains.base.Chain\\n'\n",
+              "                                   'Chain for question-answering with '\n",
+              "                                   'self-verification.\\n'\n",
+              "                                   'Example\\n'\n",
+              "                                   'from langchain import OpenAI, '\n",
+              "                                   'LLMSummarizationCheckerChain\\n'\n",
+              "                                   'llm = OpenAI(temperature=0.0)\\n'\n",
+              "                                   'checker_chain = '\n",
+              "                                   'LLMSummarizationCheckerChain.from_llm(llm)\\n'\n",
+              "                                   'Parameters\\n'\n",
+              "                                   'memory '\n",
+              "                                   '(Optional[langchain.schema.BaseMemory]) '\n",
+              "                                   '– \\n'\n",
+              "                                   'callbacks '\n",
+              "                                   '(Optional[Union[List[langchain.callbacks.base.BaseCallbackHandler], '\n",
+              "                                   'langchain.callbacks.base.BaseCallbackManager]]) '\n",
+              "                                   '– \\n'\n",
+              "                                   'callback_manager '\n",
+              "                                   '(Optional[langchain.callbacks.base.BaseCallbackManager]) '\n",
+              "                                   '– \\n'\n",
+              "                                   'verbose (bool) – \\n'\n",
+              "                                   'tags (Optional[List[str]]) – \\n'\n",
+              "                                   'sequential_chain '\n",
+              "                                   '(langchain.chains.sequential.SequentialChain) '\n",
+              "                                   '– \\n'\n",
+              "                                   'llm '\n",
+              "                                   '(Optional[langchain.base_language.BaseLanguageModel]) '\n",
+              "                                   '– \\n'\n",
+              "                                   'create_assertions_prompt '\n",
+              "                                   '(langchain.prompts.prompt.PromptTemplate) '\n",
+              "                                   '– \\n'\n",
+              "                                   'check_assertions_prompt '\n",
+              "                                   '(langchain.prompts.prompt.PromptTemplate) '\n",
+              "                                   '– \\n'\n",
+              "                                   'revised_summary_prompt '\n",
+              "                                   '(langchain.prompts.prompt.PromptTemplate) '\n",
+              "                                   '– \\n'\n",
+              "                                   'are_all_true_prompt '\n",
+              "                                   '(langchain.prompts.prompt.PromptTemplate) '\n",
+              "                                   '– \\n'\n",
+              "                                   'input_key (str) – \\n'\n",
+              "                                   'output_key (str) – \\n'\n",
+              "                                   'max_checks (int) – \\n'\n",
+              "                                   'Return type\\n'\n",
+              "                                   'None',\n",
+              "                           'url': 'https://api.python.langchain.com/en/latest/modules/chains.html'},\n",
+              "              'score': 0.79580605,\n",
+              "              'values': []},\n",
+              "             {'id': '993db45b-4e3b-431d-a2a6-48ed5944912a-1',\n",
+              "              'metadata': {'chunk': 1.0,\n",
+              "                           'text': '[docs]    @classmethod\\n'\n",
+              "                                   '    def from_llm(\\n'\n",
+              "                                   '        cls,\\n'\n",
+              "                                   '        llm: BaseLanguageModel,\\n'\n",
+              "                                   '        chain: LLMChain,\\n'\n",
+              "                                   '        critique_prompt: '\n",
+              "                                   'BasePromptTemplate = CRITIQUE_PROMPT,\\n'\n",
+              "                                   '        revision_prompt: '\n",
+              "                                   'BasePromptTemplate = REVISION_PROMPT,\\n'\n",
+              "                                   '        **kwargs: Any,\\n'\n",
+              "                                   '    ) -> \"ConstitutionalChain\":\\n'\n",
+              "                                   '        \"\"\"Create a chain from an LLM.\"\"\"\\n'\n",
+              "                                   '        critique_chain = LLMChain(llm=llm, '\n",
+              "                                   'prompt=critique_prompt)\\n'\n",
+              "                                   '        revision_chain = LLMChain(llm=llm, '\n",
+              "                                   'prompt=revision_prompt)\\n'\n",
+              "                                   '        return cls(\\n'\n",
+              "                                   '            chain=chain,\\n'\n",
+              "                                   '            '\n",
+              "                                   'critique_chain=critique_chain,\\n'\n",
+              "                                   '            '\n",
+              "                                   'revision_chain=revision_chain,\\n'\n",
+              "                                   '            **kwargs,\\n'\n",
+              "                                   '        )\\n'\n",
+              "                                   '    @property\\n'\n",
+              "                                   '    def input_keys(self) -> List[str]:\\n'\n",
+              "                                   '        \"\"\"Defines the input keys.\"\"\"\\n'\n",
+              "                                   '        return self.chain.input_keys\\n'\n",
+              "                                   '    @property\\n'\n",
+              "                                   '    def output_keys(self) -> List[str]:\\n'\n",
+              "                                   '        \"\"\"Defines the output keys.\"\"\"\\n'\n",
+              "                                   '        if '\n",
+              "                                   'self.return_intermediate_steps:\\n'\n",
+              "                                   '            return [\"output\", '\n",
+              "                                   '\"critiques_and_revisions\", '\n",
+              "                                   '\"initial_output\"]\\n'\n",
+              "                                   '        return [\"output\"]\\n'\n",
+              "                                   '    def _call(\\n'\n",
+              "                                   '        self,\\n'\n",
+              "                                   '        inputs: Dict[str, Any],\\n'\n",
+              "                                   '        run_manager: '\n",
+              "                                   'Optional[CallbackManagerForChainRun] = '\n",
+              "                                   'None,\\n'\n",
+              "                                   '    ) -> Dict[str, Any]:\\n'\n",
+              "                                   '        _run_manager = run_manager or '\n",
+              "                                   'CallbackManagerForChainRun.get_noop_manager()\\n'\n",
+              "                                   '        response = self.chain.run(\\n'\n",
+              "                                   '            **inputs,\\n'\n",
+              "                                   '            '\n",
+              "                                   'callbacks=_run_manager.get_child(\"original\"),\\n'\n",
+              "                                   '        )\\n'\n",
+              "                                   '        initial_response = response\\n'\n",
+              "                                   '        input_prompt = '\n",
+              "                                   'self.chain.prompt.format(**inputs)\\n'\n",
+              "                                   '        _run_manager.on_text(\\n'\n",
+              "                                   '            text=\"Initial response: \" + '\n",
+              "                                   'response + \"\\\\n\\\\n\",\\n'\n",
+              "                                   '            verbose=self.verbose,\\n'\n",
+              "                                   '            color=\"yellow\",\\n'\n",
+              "                                   '        )\\n'\n",
+              "                                   '        critiques_and_revisions = []\\n'\n",
+              "                                   '        for constitutional_principle in '\n",
+              "                                   'self.constitutional_principles:\\n'\n",
+              "                                   '            # Do critique\\n'\n",
+              "                                   '            raw_critique = '\n",
+              "                                   'self.critique_chain.run(\\n'\n",
+              "                                   '                '\n",
+              "                                   'input_prompt=input_prompt,\\n'\n",
+              "                                   '                '\n",
+              "                                   'output_from_model=response,\\n'\n",
+              "                                   '                '\n",
+              "                                   'critique_request=constitutional_principle.critique_request,\\n'\n",
+              "                                   '                '\n",
+              "                                   'callbacks=_run_manager.get_child(\"critique\"),\\n'\n",
+              "                                   '            )\\n'\n",
+              "                                   '            critique = '\n",
+              "                                   'self._parse_critique(\\n'\n",
+              "                                   '                '\n",
+              "                                   'output_string=raw_critique,',\n",
+              "                           'url': 'https://api.python.langchain.com/en/latest/_modules/langchain/chains/constitutional_ai/base.html'},\n",
+              "              'score': 0.79369247,\n",
+              "              'values': []},\n",
+              "             {'id': 'adea5d40-2691-4bc9-9403-3360345bc25e-0',\n",
+              "              'metadata': {'chunk': 0.0,\n",
+              "                           'text': 'Source code for '\n",
+              "                                   'langchain.chains.conversation.base\\n'\n",
+              "                                   '\"\"\"Chain that carries on a conversation '\n",
+              "                                   'and calls an LLM.\"\"\"\\n'\n",
+              "                                   'from typing import Dict, List\\n'\n",
+              "                                   'from pydantic import Extra, Field, '\n",
+              "                                   'root_validator\\n'\n",
+              "                                   'from langchain.chains.conversation.prompt '\n",
+              "                                   'import PROMPT\\n'\n",
+              "                                   'from langchain.chains.llm import LLMChain\\n'\n",
+              "                                   'from langchain.memory.buffer import '\n",
+              "                                   'ConversationBufferMemory\\n'\n",
+              "                                   'from langchain.prompts.base import '\n",
+              "                                   'BasePromptTemplate\\n'\n",
+              "                                   'from langchain.schema import BaseMemory\\n'\n",
+              "                                   '[docs]class ConversationChain(LLMChain):\\n'\n",
+              "                                   '    \"\"\"Chain to have a conversation and '\n",
+              "                                   'load context from memory.\\n'\n",
+              "                                   '    Example:\\n'\n",
+              "                                   '        .. code-block:: python\\n'\n",
+              "                                   '            from langchain import '\n",
+              "                                   'ConversationChain, OpenAI\\n'\n",
+              "                                   '            conversation = '\n",
+              "                                   'ConversationChain(llm=OpenAI())\\n'\n",
+              "                                   '    \"\"\"\\n'\n",
+              "                                   '    memory: BaseMemory = '\n",
+              "                                   'Field(default_factory=ConversationBufferMemory)\\n'\n",
+              "                                   '    \"\"\"Default memory store.\"\"\"\\n'\n",
+              "                                   '    prompt: BasePromptTemplate = PROMPT\\n'\n",
+              "                                   '    \"\"\"Default conversation prompt to '\n",
+              "                                   'use.\"\"\"\\n'\n",
+              "                                   '    input_key: str = \"input\"  #: :meta '\n",
+              "                                   'private:\\n'\n",
+              "                                   '    output_key: str = \"response\"  #: :meta '\n",
+              "                                   'private:\\n'\n",
+              "                                   '    class Config:\\n'\n",
+              "                                   '        \"\"\"Configuration for this pydantic '\n",
+              "                                   'object.\"\"\"\\n'\n",
+              "                                   '        extra = Extra.forbid\\n'\n",
+              "                                   '        arbitrary_types_allowed = True\\n'\n",
+              "                                   '    @property\\n'\n",
+              "                                   '    def input_keys(self) -> List[str]:\\n'\n",
+              "                                   '        \"\"\"Use this since so some prompt '\n",
+              "                                   'vars come from history.\"\"\"\\n'\n",
+              "                                   '        return [self.input_key]\\n'\n",
+              "                                   '    @root_validator()\\n'\n",
+              "                                   '    def '\n",
+              "                                   'validate_prompt_input_variables(cls, '\n",
+              "                                   'values: Dict) -> Dict:\\n'\n",
+              "                                   '        \"\"\"Validate that prompt input '\n",
+              "                                   'variables are consistent.\"\"\"\\n'\n",
+              "                                   '        memory_keys = '\n",
+              "                                   'values[\"memory\"].memory_variables\\n'\n",
+              "                                   '        input_key = values[\"input_key\"]\\n'\n",
+              "                                   '        if input_key in memory_keys:\\n'\n",
+              "                                   '            raise ValueError(\\n'\n",
+              "                                   '                f\"The input key '\n",
+              "                                   '{input_key} was also found in the memory '\n",
+              "                                   'keys \"\\n'\n",
+              "                                   '                f\"({memory_keys}) - please '\n",
+              "                                   'provide keys that don\\'t overlap.\"\\n'\n",
+              "                                   '            )\\n'\n",
+              "                                   '        prompt_variables = '\n",
+              "                                   'values[\"prompt\"].input_variables\\n'\n",
+              "                                   '        expected_keys = memory_keys + '\n",
+              "                                   '[input_key]\\n'\n",
+              "                                   '        if set(expected_keys) != '\n",
+              "                                   'set(prompt_variables):\\n'\n",
+              "                                   '            raise ValueError(\\n'\n",
+              "                                   '                \"Got unexpected prompt '\n",
+              "                                   'input variables. The prompt expects \"\\n'\n",
+              "                                   '                f\"{prompt_variables}, but '\n",
+              "                                   'got {memory_keys} as inputs from \"\\n'\n",
+              "                                   '                f\"memory, and {input_key} '\n",
+              "                                   'as the normal input key.\"\\n'\n",
+              "                                   '            )\\n'\n",
+              "                                   '        return values',\n",
+              "                           'url': 'https://api.python.langchain.com/en/latest/_modules/langchain/chains/conversation/base.html'},\n",
+              "              'score': 0.792259932,\n",
+              "              'values': []},\n",
+              "             {'id': '3b6f9660-0346-4992-a6f5-b9cc2977f446-5',\n",
+              "              'metadata': {'chunk': 5.0,\n",
+              "                           'text': 'callbacks: Callbacks = None,\\n'\n",
+              "                                   '        **kwargs: Any,\\n'\n",
+              "                                   '    ) -> '\n",
+              "                                   'BaseConversationalRetrievalChain:\\n'\n",
+              "                                   '        \"\"\"Load chain from LLM.\"\"\"\\n'\n",
+              "                                   '        combine_docs_chain_kwargs = '\n",
+              "                                   'combine_docs_chain_kwargs or {}\\n'\n",
+              "                                   '        doc_chain = load_qa_chain(\\n'\n",
+              "                                   '            llm,\\n'\n",
+              "                                   '            chain_type=chain_type,\\n'\n",
+              "                                   '            callbacks=callbacks,\\n'\n",
+              "                                   '            **combine_docs_chain_kwargs,\\n'\n",
+              "                                   '        )\\n'\n",
+              "                                   '        condense_question_chain = '\n",
+              "                                   'LLMChain(\\n'\n",
+              "                                   '            llm=llm, '\n",
+              "                                   'prompt=condense_question_prompt, '\n",
+              "                                   'callbacks=callbacks\\n'\n",
+              "                                   '        )\\n'\n",
+              "                                   '        return cls(\\n'\n",
+              "                                   '            vectorstore=vectorstore,\\n'\n",
+              "                                   '            combine_docs_chain=doc_chain,\\n'\n",
+              "                                   '            '\n",
+              "                                   'question_generator=condense_question_chain,\\n'\n",
+              "                                   '            callbacks=callbacks,\\n'\n",
+              "                                   '            **kwargs,\\n'\n",
+              "                                   '        )',\n",
+              "                           'url': 'https://api.python.langchain.com/en/latest/_modules/langchain/chains/conversational_retrieval/base.html'},\n",
+              "              'score': 0.791279614,\n",
+              "              'values': []}],\n",
+              " 'namespace': ''}"
+            ]
+          }
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MoBSiDLIUADZ"
+      },
+      "source": [
+        "With retrieval complete, we move on to feeding these into GPT-4 to produce answers."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "qfzS4-6-UXgX"
+      },
+      "source": [
+        "## Retrieval Augmented Generation"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "XPC1jQaKUcy0"
+      },
+      "source": [
+        "GPT-4 is currently accessed via the `ChatCompletions` endpoint of OpenAI. To add the information we retrieved into the model, we need to pass it into our user prompts *alongside* our original query. We can do that like so:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "unZstoHNUHeG"
+      },
+      "source": [
+        "# get list of retrieved text\n",
+        "contexts = [item[\"metadata\"][\"text\"] for item in res[\"matches\"]]\n",
+        "\n",
+        "augmented_query = \"\\n\\n---\\n\\n\".join(contexts) + \"\\n\\n-----\\n\\n\" + query"
+      ],
+      "execution_count": 22,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "LRcEHm0Z9fXE",
+        "outputId": "636c6825-ecd1-4953-ee25-ebabcb3a2fed"
+      },
+      "source": [
+        "print(augmented_query)"
+      ],
+      "execution_count": 27,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Source code for langchain.chains.llm\n",
+            "\"\"\"Chain that just formats a prompt and calls an LLM.\"\"\"\n",
+            "from __future__ import annotations\n",
+            "import warnings\n",
+            "from typing import Any, Dict, List, Optional, Sequence, Tuple, Union\n",
+            "from pydantic import Extra, Field\n",
+            "from langchain.base_language import BaseLanguageModel\n",
+            "from langchain.callbacks.manager import (\n",
+            "    AsyncCallbackManager,\n",
+            "    AsyncCallbackManagerForChainRun,\n",
+            "    CallbackManager,\n",
+            "    CallbackManagerForChainRun,\n",
+            "    Callbacks,\n",
+            ")\n",
+            "from langchain.chains.base import Chain\n",
+            "from langchain.input import get_colored_text\n",
+            "from langchain.load.dump import dumpd\n",
+            "from langchain.prompts.base import BasePromptTemplate\n",
+            "from langchain.prompts.prompt import PromptTemplate\n",
+            "from langchain.schema import (\n",
+            "    BaseLLMOutputParser,\n",
+            "    LLMResult,\n",
+            "    NoOpOutputParser,\n",
+            "    PromptValue,\n",
+            ")\n",
+            "[docs]class LLMChain(Chain):\n",
+            "    \"\"\"Chain to run queries against LLMs.\n",
+            "    Example:\n",
+            "        .. code-block:: python\n",
+            "            from langchain import LLMChain, OpenAI, PromptTemplate\n",
+            "            prompt_template = \"Tell me a {adjective} joke\"\n",
+            "            prompt = PromptTemplate(\n",
+            "                input_variables=[\"adjective\"], template=prompt_template\n",
+            "            )\n",
+            "            llm = LLMChain(llm=OpenAI(), prompt=prompt)\n",
+            "    \"\"\"\n",
+            "    @property\n",
+            "    def lc_serializable(self) -> bool:\n",
+            "        return True\n",
+            "    prompt: BasePromptTemplate\n",
+            "    \"\"\"Prompt object to use.\"\"\"\n",
+            "    llm: BaseLanguageModel\n",
+            "    \"\"\"Language model to call.\"\"\"\n",
+            "    output_key: str = \"text\"  #: :meta private:\n",
+            "    output_parser: BaseLLMOutputParser = Field(default_factory=NoOpOutputParser)\n",
+            "    \"\"\"Output parser to use.\n",
+            "    Defaults to one that takes the most likely string but does not change it \n",
+            "    otherwise.\"\"\"\n",
+            "    return_final_only: bool = True\n",
+            "    \"\"\"Whether to return only the final parsed result. Defaults to True.\n",
+            "    If false, will return a bunch of extra information about the generation.\"\"\"\n",
+            "    llm_kwargs: dict = Field(default_factory=dict)\n",
+            "    class Config:\n",
+            "        \"\"\"Configuration for this pydantic object.\"\"\"\n",
+            "        extra = Extra.forbid\n",
+            "        arbitrary_types_allowed = True\n",
+            "\n",
+            "---\n",
+            "\n",
+            "Bases: langchain.chains.base.Chain\n",
+            "Chain for question-answering with self-verification.\n",
+            "Example\n",
+            "from langchain import OpenAI, LLMSummarizationCheckerChain\n",
+            "llm = OpenAI(temperature=0.0)\n",
+            "checker_chain = LLMSummarizationCheckerChain.from_llm(llm)\n",
+            "Parameters\n",
+            "memory (Optional[langchain.schema.BaseMemory]) – \n",
+            "callbacks (Optional[Union[List[langchain.callbacks.base.BaseCallbackHandler], langchain.callbacks.base.BaseCallbackManager]]) – \n",
+            "callback_manager (Optional[langchain.callbacks.base.BaseCallbackManager]) – \n",
+            "verbose (bool) – \n",
+            "tags (Optional[List[str]]) – \n",
+            "sequential_chain (langchain.chains.sequential.SequentialChain) – \n",
+            "llm (Optional[langchain.base_language.BaseLanguageModel]) – \n",
+            "create_assertions_prompt (langchain.prompts.prompt.PromptTemplate) – \n",
+            "check_assertions_prompt (langchain.prompts.prompt.PromptTemplate) – \n",
+            "revised_summary_prompt (langchain.prompts.prompt.PromptTemplate) – \n",
+            "are_all_true_prompt (langchain.prompts.prompt.PromptTemplate) – \n",
+            "input_key (str) – \n",
+            "output_key (str) – \n",
+            "max_checks (int) – \n",
+            "Return type\n",
+            "None\n",
+            "\n",
+            "---\n",
+            "\n",
+            "[docs]    @classmethod\n",
+            "    def from_llm(\n",
+            "        cls,\n",
+            "        llm: BaseLanguageModel,\n",
+            "        chain: LLMChain,\n",
+            "        critique_prompt: BasePromptTemplate = CRITIQUE_PROMPT,\n",
+            "        revision_prompt: BasePromptTemplate = REVISION_PROMPT,\n",
+            "        **kwargs: Any,\n",
+            "    ) -> \"ConstitutionalChain\":\n",
+            "        \"\"\"Create a chain from an LLM.\"\"\"\n",
+            "        critique_chain = LLMChain(llm=llm, prompt=critique_prompt)\n",
+            "        revision_chain = LLMChain(llm=llm, prompt=revision_prompt)\n",
+            "        return cls(\n",
+            "            chain=chain,\n",
+            "            critique_chain=critique_chain,\n",
+            "            revision_chain=revision_chain,\n",
+            "            **kwargs,\n",
+            "        )\n",
+            "    @property\n",
+            "    def input_keys(self) -> List[str]:\n",
+            "        \"\"\"Defines the input keys.\"\"\"\n",
+            "        return self.chain.input_keys\n",
+            "    @property\n",
+            "    def output_keys(self) -> List[str]:\n",
+            "        \"\"\"Defines the output keys.\"\"\"\n",
+            "        if self.return_intermediate_steps:\n",
+            "            return [\"output\", \"critiques_and_revisions\", \"initial_output\"]\n",
+            "        return [\"output\"]\n",
+            "    def _call(\n",
+            "        self,\n",
+            "        inputs: Dict[str, Any],\n",
+            "        run_manager: Optional[CallbackManagerForChainRun] = None,\n",
+            "    ) -> Dict[str, Any]:\n",
+            "        _run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()\n",
+            "        response = self.chain.run(\n",
+            "            **inputs,\n",
+            "            callbacks=_run_manager.get_child(\"original\"),\n",
+            "        )\n",
+            "        initial_response = response\n",
+            "        input_prompt = self.chain.prompt.format(**inputs)\n",
+            "        _run_manager.on_text(\n",
+            "            text=\"Initial response: \" + response + \"\\n\\n\",\n",
+            "            verbose=self.verbose,\n",
+            "            color=\"yellow\",\n",
+            "        )\n",
+            "        critiques_and_revisions = []\n",
+            "        for constitutional_principle in self.constitutional_principles:\n",
+            "            # Do critique\n",
+            "            raw_critique = self.critique_chain.run(\n",
+            "                input_prompt=input_prompt,\n",
+            "                output_from_model=response,\n",
+            "                critique_request=constitutional_principle.critique_request,\n",
+            "                callbacks=_run_manager.get_child(\"critique\"),\n",
+            "            )\n",
+            "            critique = self._parse_critique(\n",
+            "                output_string=raw_critique,\n",
+            "\n",
+            "---\n",
+            "\n",
+            "Source code for langchain.chains.conversation.base\n",
+            "\"\"\"Chain that carries on a conversation and calls an LLM.\"\"\"\n",
+            "from typing import Dict, List\n",
+            "from pydantic import Extra, Field, root_validator\n",
+            "from langchain.chains.conversation.prompt import PROMPT\n",
+            "from langchain.chains.llm import LLMChain\n",
+            "from langchain.memory.buffer import ConversationBufferMemory\n",
+            "from langchain.prompts.base import BasePromptTemplate\n",
+            "from langchain.schema import BaseMemory\n",
+            "[docs]class ConversationChain(LLMChain):\n",
+            "    \"\"\"Chain to have a conversation and load context from memory.\n",
+            "    Example:\n",
+            "        .. code-block:: python\n",
+            "            from langchain import ConversationChain, OpenAI\n",
+            "            conversation = ConversationChain(llm=OpenAI())\n",
+            "    \"\"\"\n",
+            "    memory: BaseMemory = Field(default_factory=ConversationBufferMemory)\n",
+            "    \"\"\"Default memory store.\"\"\"\n",
+            "    prompt: BasePromptTemplate = PROMPT\n",
+            "    \"\"\"Default conversation prompt to use.\"\"\"\n",
+            "    input_key: str = \"input\"  #: :meta private:\n",
+            "    output_key: str = \"response\"  #: :meta private:\n",
+            "    class Config:\n",
+            "        \"\"\"Configuration for this pydantic object.\"\"\"\n",
+            "        extra = Extra.forbid\n",
+            "        arbitrary_types_allowed = True\n",
+            "    @property\n",
+            "    def input_keys(self) -> List[str]:\n",
+            "        \"\"\"Use this since so some prompt vars come from history.\"\"\"\n",
+            "        return [self.input_key]\n",
+            "    @root_validator()\n",
+            "    def validate_prompt_input_variables(cls, values: Dict) -> Dict:\n",
+            "        \"\"\"Validate that prompt input variables are consistent.\"\"\"\n",
+            "        memory_keys = values[\"memory\"].memory_variables\n",
+            "        input_key = values[\"input_key\"]\n",
+            "        if input_key in memory_keys:\n",
+            "            raise ValueError(\n",
+            "                f\"The input key {input_key} was also found in the memory keys \"\n",
+            "                f\"({memory_keys}) - please provide keys that don't overlap.\"\n",
+            "            )\n",
+            "        prompt_variables = values[\"prompt\"].input_variables\n",
+            "        expected_keys = memory_keys + [input_key]\n",
+            "        if set(expected_keys) != set(prompt_variables):\n",
+            "            raise ValueError(\n",
+            "                \"Got unexpected prompt input variables. The prompt expects \"\n",
+            "                f\"{prompt_variables}, but got {memory_keys} as inputs from \"\n",
+            "                f\"memory, and {input_key} as the normal input key.\"\n",
+            "            )\n",
+            "        return values\n",
+            "\n",
+            "---\n",
+            "\n",
+            "callbacks: Callbacks = None,\n",
+            "        **kwargs: Any,\n",
+            "    ) -> BaseConversationalRetrievalChain:\n",
+            "        \"\"\"Load chain from LLM.\"\"\"\n",
+            "        combine_docs_chain_kwargs = combine_docs_chain_kwargs or {}\n",
+            "        doc_chain = load_qa_chain(\n",
+            "            llm,\n",
+            "            chain_type=chain_type,\n",
+            "            callbacks=callbacks,\n",
+            "            **combine_docs_chain_kwargs,\n",
+            "        )\n",
+            "        condense_question_chain = LLMChain(\n",
+            "            llm=llm, prompt=condense_question_prompt, callbacks=callbacks\n",
+            "        )\n",
+            "        return cls(\n",
+            "            vectorstore=vectorstore,\n",
+            "            combine_docs_chain=doc_chain,\n",
+            "            question_generator=condense_question_chain,\n",
+            "            callbacks=callbacks,\n",
+            "            **kwargs,\n",
+            "        )\n",
+            "\n",
+            "-----\n",
+            "\n",
+            "how do I use the LLMChain in LangChain?\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "sihH_GMiV5_p"
+      },
+      "source": [
+        "Now we ask the question:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "IThBqBi8V70d"
+      },
+      "source": [
+        "# system message to 'prime' the model\n",
+        "primer = \"\"\"You are Q&A bot. A highly intelligent system that answers\n",
+        "user questions based on the information provided by the user above\n",
+        "each question. If the information can not be found in the information\n",
+        "provided by the user you truthfully say \"I don't know\".\n",
+        "\"\"\"\n",
+        "\n",
+        "res = openai.ChatCompletion.create(\n",
+        "    model=\"gpt-4\",\n",
+        "    messages=[\n",
+        "        {\"role\": \"system\", \"content\": primer},\n",
+        "        {\"role\": \"user\", \"content\": augmented_query},\n",
+        "    ],\n",
+        ")"
+      ],
+      "execution_count": 28,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "QvS1yJhOWpiJ"
+      },
+      "source": [
+        "To display this response nicely, we will display it in markdown."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 465
+        },
+        "id": "RDo2qeMHWto1",
+        "outputId": "9a9b677f-9b4f-4f77-822d-80baf75ed04a"
+      },
+      "source": [
+        "display(Markdown(res[\"choices\"][0][\"message\"][\"content\"]))"
+      ],
+      "execution_count": 29,
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/markdown": [
+              "To use the LLMChain in LangChain, you need to first import the necessary modules and classes. In this example, we will use the OpenAI language model. Follow the steps below:\n",
+              "\n",
+              "1. Import all required modules and classes:\n",
+              "\n",
+              "```python\n",
+              "from langchain import LLMChain, OpenAI, PromptTemplate\n",
+              "```\n",
+              "\n",
+              "2. Define the prompt template you want to use with the language model. For example, if you want to create jokes based on provided adjectives:\n",
+              "\n",
+              "```python\n",
+              "prompt_template = \"Tell me a {adjective} joke\"\n",
+              "```\n",
+              "\n",
+              "3. Create a PromptTemplate object passing the input_variables and template:\n",
+              "\n",
+              "```python\n",
+              "prompt = PromptTemplate(input_variables=[\"adjective\"], template=prompt_template)\n",
+              "```\n",
+              "\n",
+              "4. Instantiate the OpenAI language model:\n",
+              "\n",
+              "```python\n",
+              "llm = OpenAI()\n",
+              "```\n",
+              "\n",
+              "5. Create the LLMChain object using the OpenAI language model and the created prompt:\n",
+              "\n",
+              "```python\n",
+              "llm_chain = LLMChain(llm=llm, prompt=prompt)\n",
+              "```\n",
+              "\n",
+              "Now you can use the `llm_chain` object to generate jokes based on provided adjectives. For example:\n",
+              "\n",
+              "```python\n",
+              "response = llm_chain.run(adjective=\"funny\")\n",
+              "print(response)\n",
+              "```\n",
+              "\n",
+              "This will generate and print a funny joke based on the predefined prompt template. Replace `\"funny\"` with any other adjective to get a different result."
+            ],
+            "text/plain": [
+              "<IPython.core.display.Markdown object>"
+            ]
+          }
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "eJ-a8MHg0eYQ"
+      },
+      "source": [
+        "Let's compare this to a non-augmented query..."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 46
+        },
+        "id": "vwhaSgdF0ZDX",
+        "outputId": "ce085b0f-e0da-4c00-f3f5-43b49e64568c"
+      },
+      "source": [
+        "res = openai.ChatCompletion.create(\n",
+        "    model=\"gpt-4\",\n",
+        "    messages=[\n",
+        "        {\"role\": \"system\", \"content\": primer},\n",
+        "        {\"role\": \"user\", \"content\": query},\n",
+        "    ],\n",
+        ")\n",
+        "display(Markdown(res[\"choices\"][0][\"message\"][\"content\"]))"
+      ],
+      "execution_count": 30,
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/markdown": [
+              "I don't know."
+            ],
+            "text/plain": [
+              "<IPython.core.display.Markdown object>"
+            ]
+          }
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "5CSsA-dW0m_P"
+      },
+      "source": [
+        "If we drop the `\"I don't know\"` part of the `primer`?"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 371
+        },
+        "id": "Z3svdTCZ0iJ2",
+        "outputId": "19673965-a2f8-45be-b82a-6e491aa88416"
+      },
+      "source": [
+        "res = openai.ChatCompletion.create(\n",
+        "    model=\"gpt-4\",\n",
+        "    messages=[\n",
+        "        {\n",
+        "            \"role\": \"system\",\n",
+        "            \"content\": \"You are Q&A bot. A highly intelligent system that answers user questions\",\n",
+        "        },\n",
+        "        {\"role\": \"user\", \"content\": query},\n",
+        "    ],\n",
+        ")\n",
+        "display(Markdown(res[\"choices\"][0][\"message\"][\"content\"]))"
+      ],
+      "execution_count": 31,
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/markdown": [
+              "LLMChain, which stands for LangChain's Language Model Chain, is a feature within the LangChain ecosystem that allows connecting multiple language models to achieve more accurate translations and processing of natural language data.\n",
+              "\n",
+              "To use the LLMChain in LangChain, follow these steps:\n",
+              "\n",
+              "1. Sign up or log in: If you don't have an account with LangChain, sign up or log in to your existing account.\n",
+              "\n",
+              "2. Configure the LLMChain: Navigate to the LLMChain settings or configuration page (it may be under \"Settings\" or \"LLMChain Configuration\"). Here, you'll add, remove, or re-order language models in your chain.\n",
+              "\n",
+              "3. Add language models: Choose from the available language models and add them to your chain. Typically, language models are selected based on their performance or scope for specific language pairs or types of text.\n",
+              "\n",
+              "4. Set the order of language models: Arrange the order of the language models in your chain based on your preferences or needs. The LLMChain will process the input text in the order you've set, starting from the first model, and pass the output to the subsequent models in the chain.\n",
+              "\n",
+              "5. Test the LLMChain: Once you have configured your LLMChain, test it by inputting text and reviewing the generated translations or processed output. This step will allow you to fine-tune the chain to ensure optimal performance.\n",
+              "\n",
+              "6. Use the LLMChain in your translation projects or language processing tasks: With your LLMChain set up and tested, you can now use it for your translation or language processing needs.\n",
+              "\n",
+              "Remember that the LLMChain is part of the LangChain ecosystem, so any changes or modifications to it may require some knowledge of the platform and its interface. If needed, consult the official documentation or seek support from the community to ensure a seamless experience."
+            ],
+            "text/plain": [
+              "<IPython.core.display.Markdown object>"
+            ]
+          }
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "GcGon5672lBb"
+      },
+      "source": [
+        "Then we see something even worse than `\"I don't know\"` — hallucinations. Clearly augmenting our queries with additional context can make a huge difference to the performance of our system.\n",
+        "\n",
+        "Great, we've seen how to augment GPT-4 with semantic search to allow us to answer LangChain specific queries.\n",
+        "\n",
+        "Once you're finished, we delete the index to save resources."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "Ah_vfEHV2khx"
+      },
+      "source": [
+        "pc.delete_index(index_name)"
       ],
-      "text/plain": [
-       "<IPython.core.display.Markdown object>"
+      "execution_count": 32,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "iEUMlO8M2h4Y"
+      },
+      "source": [
+        "---"
       ]
-     },
-     "output_type": "display_data"
     }
-   ],
-   "source": [
-    "res = openai.ChatCompletion.create(\n",
-    "    model=\"gpt-4\",\n",
-    "    messages=[\n",
-    "        {\n",
-    "            \"role\": \"system\",\n",
-    "            \"content\": \"You are Q&A bot. A highly intelligent system that answers user questions\",\n",
-    "        },\n",
-    "        {\"role\": \"user\", \"content\": query},\n",
-    "    ],\n",
-    ")\n",
-    "display(Markdown(res[\"choices\"][0][\"message\"][\"content\"]))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "GcGon5672lBb"
-   },
-   "source": [
-    "Then we see something even worse than `\"I don't know\"` — hallucinations. Clearly augmenting our queries with additional context can make a huge difference to the performance of our system.\n",
-    "\n",
-    "Great, we've seen how to augment GPT-4 with semantic search to allow us to answer LangChain specific queries.\n",
-    "\n",
-    "Once you're finished, we delete the index to save resources."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 32,
-   "metadata": {
-    "id": "Ah_vfEHV2khx"
-   },
-   "outputs": [],
-   "source": [
-    "pc.delete_index(index_name)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "iEUMlO8M2h4Y"
-   },
-   "source": [
-    "---"
-   ]
-  }
- ],
- "metadata": {
-  "colab": {
-   "provenance": []
-  },
-  "gpuClass": "standard",
-  "kernelspec": {
-   "display_name": "Python 3",
-   "name": "python3"
+  ],
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "gpuClass": "standard",
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    }
   },
-  "language_info": {
-   "name": "python"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 0
+  "nbformat": 4,
+  "nbformat_minor": 0
 }
\ No newline at end of file

From a8e0e335661527a4e148a3cebaa44a73d6dfc8f5 Mon Sep 17 00:00:00 2001
From: Jen Hamon <jhamon@pinecone.io>
Date: Thu, 29 Jan 2026 07:55:47 -0500
Subject: [PATCH 4/8] fix: repair notebook output metadata for nbformat
 validation

Add missing 'name', 'metadata', and 'execution_count' fields to cell
outputs to comply with Jupyter notebook format specification.
---
 .../openai/gpt-4-langchain-docs.ipynb         | 4273 +++++++++--------
 1 file changed, 2153 insertions(+), 2120 deletions(-)

diff --git a/learn/generation/openai/gpt-4-langchain-docs.ipynb b/learn/generation/openai/gpt-4-langchain-docs.ipynb
index e9f5d056..f5ea170f 100644
--- a/learn/generation/openai/gpt-4-langchain-docs.ipynb
+++ b/learn/generation/openai/gpt-4-langchain-docs.ipynb
@@ -1,2201 +1,2234 @@
 {
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "GFLLl1Agum8O"
-      },
-      "source": [
-        "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pinecone-io/examples/blob/master/learn/generation/openai/gpt-4-langchain-docs.ipynb) [![Open nbviewer](https://raw.githubusercontent.com/pinecone-io/examples/master/assets/nbviewer-shield.svg)](https://nbviewer.org/github/pinecone-io/examples/blob/master/learn/generation/openai/gpt-4-langchain-docs.ipynb)\n",
-        "\n",
-        "# GPT4 with Retrieval Augmentation over LangChain Docs\n",
-        "\n",
-        "[![Open nbviewer](https://raw.githubusercontent.com/pinecone-io/examples/master/assets/fast-link.svg)](https://github.com/pinecone-io/examples/blob/master/docs/gpt-4-langchain-docs.ipynb)\n",
-        "\n",
-        "In this notebook we'll work through an example of using GPT-4 with retrieval augmentation to answer questions about the LangChain Python library."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "_HDKlQO5svqI"
-      },
-      "source": [
-        "!pip install -qU \\\n",
-        "  tiktoken==0.4.0 \\\n",
-        "  openai==0.27.7 \\\n",
-        "  langchain==0.0.179 \\\n",
-        "  pinecone \\\n",
-        "  datasets==2.13.1"
-      ],
-      "execution_count": 1,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {},
-      "source": [
-        "import os\n",
-        "import time\n",
-        "\n",
-        "import openai\n",
-        "import tiktoken\n",
-        "from datasets import load_dataset\n",
-        "from IPython.display import Markdown\n",
-        "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
-        "from pinecone import Pinecone, ServerlessSpec\n",
-        "from tqdm.auto import tqdm"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "7c1EpQ-jq7SU"
-      },
-      "source": [
-        "---\n",
-        "\n",
-        "🚨 _Note: the above `pip install` is formatted for Jupyter notebooks. If running elsewhere you may need to drop the `!`._\n",
-        "\n",
-        "---"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "NgUEJ6vDum8q"
-      },
-      "source": [
-        "In this example, we will download the LangChain docs, we can find a static version of the docs on Hugging Face datasets in `jamescalam/langchain-docs-23-06-27`. To download them we do:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 237,
-          "referenced_widgets": [
-            "63de2154fea24b49a87bf4b8428fa630",
-            "4b4cfb1a834342198c75a02d28448b57",
-            "a9d471008dc34f67a5307bbb26d6123c",
-            "580e5dd4c9d9497caa40802d5918e75c",
-            "bd09981e486d461eaa2cf166b32921e1",
-            "bed2dd81769b4910831cb34a7b475c72",
-            "ccad7c2aec604ee29b41497ec0f37fa7",
-            "390f06d63dd547d395dcf18f1ebe265d",
-            "6545006e51824be9b6cb5cdb2cb2ba5a",
-            "241b0de59e53465f8acad4ac74b17b57",
-            "05199362d95449699254c45c1d5cee94",
-            "6881722e02fe4395a5fcaf668cb7ebcb",
-            "2b960a7f46444ad3bd3392517b415f2d",
-            "a3e8499ed740449586ca31500038c7a8",
-            "08c52a0369b74e7da99574ec29612189",
-            "ffb822b2f739434dbe99e8a992716c30",
-            "7e2b88be1cae49da824e6c6c0782cb50",
-            "9f4e9da63bb64d279ded5ee1730b5cba",
-            "3b319c7a4f6f41ea9ea6e6268cd29343",
-            "908935a03fea42efbded99cd81de54c5",
-            "dd3ece4c242d4eae946f8bc4f95d1dbf",
-            "ae71cc7e26ee4b51b7eb67520f66c9bd",
-            "d83b0b3089c34bb58ddb1272a240c2f9",
-            "34d21f61f6dc499a9d1504634e470bdd",
-            "64aae9675d394df48d233b31e5f0eb3c",
-            "d1d3dde6ec3b483f8b14139a7d6a9ae0",
-            "690ca50e9785402bb17fa266f8e40ea9",
-            "482f891d61ab4c2080d95a9b84ea5c6d",
-            "622987b045e74a13b79553d3d062e72a",
-            "6c7236b0655e4397b3a9d5f4d83c03fe",
-            "6f7e876e10fd4c58aa2d1f1ed4ff2762",
-            "9a8b01998f8a4c6bb0bfe71e02b3352c",
-            "ec224feb9828415eb018831e985d22c0",
-            "a532b2307c734cf188092d40299c40ad",
-            "fab781bfae4647968aa69f19ae6a5754",
-            "5961b9e44ce14a2a8eb65a9e5b6be90d",
-            "5f15e4b12305489180e54c61769dcebe",
-            "324465ed674740c2a18a88a2633f2093",
-            "f82b21e87eba4e06a0531c791dc09b3f",
-            "5c0bb7407c844ae19479416752f66190",
-            "5ef6d125261b49679dcb4d886b3e382c",
-            "294d5fc4fa1e40429e08137934481ba2",
-            "f5d992e8c1224879be5e5464a424a3a4",
-            "7e828bf7b91e4029bc2093876128a78b"
-          ]
-        },
-        "id": "xo9gYhGPr_DQ",
-        "outputId": "016b896d-87a6-4d17-bad1-027475510a8b"
-      },
-      "source": [
-        "docs = load_dataset(\"jamescalam/langchain-docs-23-06-27\", split=\"train\")\n",
-        "docs"
-      ],
-      "execution_count": 2,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "Downloading and preparing dataset json/jamescalam--langchain-docs-23-06-27 to /root/.cache/huggingface/datasets/jamescalam___json/jamescalam--langchain-docs-23-06-27-4631410d07444b03/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96...\n"
-          ]
-        },
-        {
-          "output_type": "display_data",
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "63de2154fea24b49a87bf4b8428fa630",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]"
-            ]
-          }
-        },
-        {
-          "output_type": "display_data",
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "6881722e02fe4395a5fcaf668cb7ebcb",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "Downloading data:   0%|          | 0.00/4.68M [00:00<?, ?B/s]"
-            ]
-          }
-        },
-        {
-          "output_type": "display_data",
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "d83b0b3089c34bb58ddb1272a240c2f9",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]"
-            ]
-          }
-        },
-        {
-          "output_type": "display_data",
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "a532b2307c734cf188092d40299c40ad",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "Generating train split: 0 examples [00:00, ? examples/s]"
-            ]
-          }
-        },
-        {
-          "output_type": "stream",
-          "text": [
-            "Dataset json downloaded and prepared to /root/.cache/huggingface/datasets/jamescalam___json/jamescalam--langchain-docs-23-06-27-4631410d07444b03/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96. Subsequent calls will reuse this data.\n"
-          ]
-        },
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "Dataset({\n",
-              "    features: ['id', 'text', 'url'],\n",
-              "    num_rows: 505\n",
-              "})"
-            ]
-          }
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "ahFEI4U3vdxV"
-      },
-      "source": [
-        "This leaves us with `505` doc pages. Let's take a look at the format each one contains:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 52
-        },
-        "id": "BJuef8z1vfz4",
-        "outputId": "6c62ecbe-cf82-475d-b39f-d97ff02422fe"
-      },
-      "source": [
-        "docs[20][\"text\"][:200]"
-      ],
-      "execution_count": 3,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "application/vnd.google.colaboratory.intrinsic+json": {
-              "type": "string"
-            },
-            "text/plain": [
-              "'Example Selector\\uf0c1\\nLogic for selecting examples to include in prompts.\\nclass langchain.prompts.example_selector.LengthBasedExampleSelector(*, examples, example_prompt, get_text_length=<function _get_le'"
-            ]
-          }
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "jNfppr8fvhOX"
-      },
-      "source": [
-        "We access the plaintext page content like so:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "vfdQLriyvjDk",
-        "outputId": "b7644566-ac2d-4dcf-dab3-1736191d357a"
-      },
-      "source": [
-        "print(docs[20][\"text\"][:200])"
-      ],
-      "execution_count": 4,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "Example Selector\n",
-            "Logic for selecting examples to include in prompts.\n",
-            "class langchain.prompts.example_selector.LengthBasedExampleSelector(*, examples, example_prompt, get_text_length=<function _get_le\n"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "r-mxgm-6vo9s"
-      },
-      "source": [
-        "We can also find the source of each document:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 35
-        },
-        "id": "NGUGao9_uNH3",
-        "outputId": "f3efbe72-d10c-4223-dd21-f38d02ad96d5"
-      },
-      "source": [
-        "docs[20][\"url\"]"
-      ],
-      "execution_count": 5,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "application/vnd.google.colaboratory.intrinsic+json": {
-              "type": "string"
-            },
-            "text/plain": [
-              "'https://api.python.langchain.com/en/latest/modules/example_selector.html'"
-            ]
-          }
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "ouY4rcx7z2oa"
-      },
-      "source": [
-        "Now let's see how we can process all of these. We will chunk everything into ~500 token chunks, we can do this easily with `langchain` and `tiktoken`:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 35
-        },
-        "id": "Rb7KxUqYzsuV",
-        "outputId": "af54a189-b2e5-4b6d-9752-e74edd01b5e4"
-      },
-      "source": [
-        "tokenizer_name = tiktoken.encoding_for_model(\"gpt-4\")\n",
-        "tokenizer_name.name"
-      ],
-      "execution_count": 6,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "application/vnd.google.colaboratory.intrinsic+json": {
-              "type": "string"
-            },
-            "text/plain": [
-              "'cl100k_base'"
-            ]
-          }
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "N635Sgsbx_ME"
-      },
-      "source": [
-        "tokenizer = tiktoken.get_encoding(tokenizer_name.name)\n",
-        "\n",
-        "\n",
-        "# create the length function\n",
-        "def tiktoken_len(text):\n",
-        "    tokens = tokenizer.encode(text, disallowed_special=())\n",
-        "    return len(tokens)"
-      ],
-      "execution_count": 7,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "OKO8e3Dp0dQS"
-      },
-      "source": [
-        "text_splitter = RecursiveCharacterTextSplitter(\n",
-        "    chunk_size=500,\n",
-        "    chunk_overlap=20,\n",
-        "    length_function=tiktoken_len,\n",
-        "    separators=[\"\\n\\n\", \"\\n\", \" \", \"\"],\n",
-        ")"
-      ],
-      "execution_count": 8,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "bLdvW8eq06Zd"
-      },
-      "source": [
-        "Process the `docs` into more chunks using this approach."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 66,
-          "referenced_widgets": [
-            "01296cac12234000a13bdca80b31ba8b",
-            "930601ee00454f71b1114c4aaff0175b",
-            "e976d05935374e47b86773ca852cfa9e",
-            "bf9b29814dd04a22a7ff4ca1c6160c21",
-            "6d110cd070fe4776b9449de74759dff3",
-            "d670714b504847e3b72cd84510219ec7",
-            "037869180d9d4b1eb1bdbed67337e349",
-            "894a9b32ecc3404eb1213a8fa9ea38e2",
-            "5b14b2d018c74766954d580853eae7fc",
-            "41920d8d2aa44511814576dab37d96e7",
-            "d4c5704e6136468b910684e418074271"
-          ]
-        },
-        "id": "uOdPyiAQ0uWs",
-        "outputId": "a36d52b2-810d-4422-cc82-105af8d1c83b"
-      },
-      "source": [
-        "chunks = []\n",
-        "\n",
-        "for page in tqdm(docs):\n",
-        "    if len(page[\"text\"]) < 200:\n",
-        "        # if page content is short we can skip\n",
-        "        continue\n",
-        "    texts = text_splitter.split_text(page[\"text\"])\n",
-        "    chunks.extend(\n",
-        "        [\n",
-        "            {\n",
-        "                \"id\": page[\"id\"] + f\"-{i}\",\n",
-        "                \"text\": texts[i],\n",
-        "                \"url\": page[\"url\"],\n",
-        "                \"chunk\": i,\n",
-        "            }\n",
-        "            for i in range(len(texts))\n",
-        "        ]\n",
-        "    )\n",
-        "len(chunks)"
-      ],
-      "execution_count": 10,
-      "outputs": [
-        {
-          "output_type": "display_data",
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "01296cac12234000a13bdca80b31ba8b",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "  0%|          | 0/505 [00:00<?, ?it/s]"
-            ]
-          }
-        },
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "2482"
-            ]
-          }
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "JegURaAg2PuN"
-      },
-      "source": [
-        "Our chunks are ready so now we move onto embedding and indexing everything."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "zGIZbQqJ2WBh"
-      },
-      "source": [
-        "## Initialize Embedding Model\n",
-        "\n",
-        "We use `text-embedding-ada-002` as the embedding model. We can embed text like so:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "p0U9_7Fium8u",
-        "outputId": "e2b285d4-dbde-4b2b-8624-bb515f6206cf"
-      },
-      "source": [
-        "# get API key from top-right dropdown on OpenAI website\n",
-        "openai.api_key = os.getenv(\"OPENAI_API_KEY\") or \"OPENAI_API_KEY\"\n",
-        "\n",
-        "openai.Engine.list()  # check we have authenticated"
-      ],
-      "execution_count": 11,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "<OpenAIObject list at 0x7fc82e74dee0> JSON: {\n",
-              "  \"data\": [\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"whisper-1\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-internal\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"babbage\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"davinci\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"text-davinci-edit-001\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"babbage-code-search-code\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"text-similarity-babbage-001\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"text-embedding-ada-002\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-internal\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"code-davinci-edit-001\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"text-davinci-001\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"ada\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"babbage-code-search-text\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"babbage-similarity\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"code-search-babbage-text-001\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"text-curie-001\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"gpt-4-0314\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"gpt-4-0613\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"code-search-babbage-code-001\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"text-ada-001\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"text-similarity-ada-001\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"curie-instruct-beta\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"gpt-4\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"ada-code-search-code\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"ada-similarity\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"code-search-ada-text-001\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"text-search-ada-query-001\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"davinci-search-document\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"ada-code-search-text\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"text-search-ada-doc-001\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"davinci-instruct-beta\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"text-similarity-curie-001\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"code-search-ada-code-001\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"ada-search-query\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"text-search-davinci-query-001\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"curie-search-query\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"davinci-search-query\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"babbage-search-document\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"ada-search-document\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"text-search-curie-query-001\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"text-search-babbage-doc-001\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"curie-search-document\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"text-search-curie-doc-001\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"babbage-search-query\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"text-babbage-001\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"text-search-davinci-doc-001\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"text-search-babbage-query-001\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"curie-similarity\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"gpt-3.5-turbo-0613\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"curie\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"gpt-3.5-turbo-16k-0613\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"text-similarity-davinci-001\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"text-davinci-002\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"gpt-3.5-turbo-0301\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"text-davinci-003\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-internal\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"davinci-similarity\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-dev\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"gpt-3.5-turbo\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    },\n",
-              "    {\n",
-              "      \"created\": null,\n",
-              "      \"id\": \"gpt-3.5-turbo-16k\",\n",
-              "      \"object\": \"engine\",\n",
-              "      \"owner\": \"openai-internal\",\n",
-              "      \"permissions\": null,\n",
-              "      \"ready\": true\n",
-              "    }\n",
-              "  ],\n",
-              "  \"object\": \"list\"\n",
-              "}"
-            ]
-          }
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "kteZ69Z5M55S"
-      },
-      "source": [
-        "embed_model = \"text-embedding-ada-002\"\n",
-        "\n",
-        "res = openai.Embedding.create(\n",
-        "    input=[\n",
-        "        \"Sample document text goes here\",\n",
-        "        \"there will be several phrases in each batch\",\n",
-        "    ],\n",
-        "    engine=embed_model,\n",
-        ")"
-      ],
-      "execution_count": 12,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "aNZ7IWekNLbu"
-      },
-      "source": [
-        "In the response `res` we will find a JSON-like object containing our new embeddings within the `'data'` field."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "esagZj6iNLPZ",
-        "outputId": "8e26f18a-4890-43ca-95e7-9e256e29e3be"
-      },
-      "source": [
-        "res.keys()"
-      ],
-      "execution_count": 13,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "dict_keys(['object', 'data', 'model', 'usage'])"
-            ]
-          }
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "zStnHFpkNVIU"
-      },
-      "source": [
-        "Inside `'data'` we will find two records, one for each of the two sentences we just embedded. Each vector embedding contains `1536` dimensions (the output dimensionality of the `text-embedding-ada-002` model."
-      ]
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "GFLLl1Agum8O"
+   },
+   "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pinecone-io/examples/blob/master/learn/generation/openai/gpt-4-langchain-docs.ipynb) [![Open nbviewer](https://raw.githubusercontent.com/pinecone-io/examples/master/assets/nbviewer-shield.svg)](https://nbviewer.org/github/pinecone-io/examples/blob/master/learn/generation/openai/gpt-4-langchain-docs.ipynb)\n",
+    "\n",
+    "# GPT4 with Retrieval Augmentation over LangChain Docs\n",
+    "\n",
+    "[![Open nbviewer](https://raw.githubusercontent.com/pinecone-io/examples/master/assets/fast-link.svg)](https://github.com/pinecone-io/examples/blob/master/docs/gpt-4-langchain-docs.ipynb)\n",
+    "\n",
+    "In this notebook we'll work through an example of using GPT-4 with retrieval augmentation to answer questions about the LangChain Python library."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {
+    "id": "_HDKlQO5svqI"
+   },
+   "source": [
+    "!pip install -qU \\\n",
+    "  tiktoken==0.4.0 \\\n",
+    "  openai==0.27.7 \\\n",
+    "  langchain==0.0.179 \\\n",
+    "  pinecone \\\n",
+    "  datasets==2.13.1"
+   ],
+   "execution_count": 1,
+   "outputs": []
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "import os\n",
+    "import time\n",
+    "\n",
+    "import openai\n",
+    "import tiktoken\n",
+    "from datasets import load_dataset\n",
+    "from IPython.display import Markdown\n",
+    "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
+    "from pinecone import Pinecone, ServerlessSpec\n",
+    "from tqdm.auto import tqdm"
+   ],
+   "execution_count": null,
+   "outputs": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "7c1EpQ-jq7SU"
+   },
+   "source": [
+    "---\n",
+    "\n",
+    "\ud83d\udea8 _Note: the above `pip install` is formatted for Jupyter notebooks. If running elsewhere you may need to drop the `!`._\n",
+    "\n",
+    "---"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "NgUEJ6vDum8q"
+   },
+   "source": [
+    "In this example, we will download the LangChain docs, we can find a static version of the docs on Hugging Face datasets in `jamescalam/langchain-docs-23-06-27`. To download them we do:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 237,
+     "referenced_widgets": [
+      "63de2154fea24b49a87bf4b8428fa630",
+      "4b4cfb1a834342198c75a02d28448b57",
+      "a9d471008dc34f67a5307bbb26d6123c",
+      "580e5dd4c9d9497caa40802d5918e75c",
+      "bd09981e486d461eaa2cf166b32921e1",
+      "bed2dd81769b4910831cb34a7b475c72",
+      "ccad7c2aec604ee29b41497ec0f37fa7",
+      "390f06d63dd547d395dcf18f1ebe265d",
+      "6545006e51824be9b6cb5cdb2cb2ba5a",
+      "241b0de59e53465f8acad4ac74b17b57",
+      "05199362d95449699254c45c1d5cee94",
+      "6881722e02fe4395a5fcaf668cb7ebcb",
+      "2b960a7f46444ad3bd3392517b415f2d",
+      "a3e8499ed740449586ca31500038c7a8",
+      "08c52a0369b74e7da99574ec29612189",
+      "ffb822b2f739434dbe99e8a992716c30",
+      "7e2b88be1cae49da824e6c6c0782cb50",
+      "9f4e9da63bb64d279ded5ee1730b5cba",
+      "3b319c7a4f6f41ea9ea6e6268cd29343",
+      "908935a03fea42efbded99cd81de54c5",
+      "dd3ece4c242d4eae946f8bc4f95d1dbf",
+      "ae71cc7e26ee4b51b7eb67520f66c9bd",
+      "d83b0b3089c34bb58ddb1272a240c2f9",
+      "34d21f61f6dc499a9d1504634e470bdd",
+      "64aae9675d394df48d233b31e5f0eb3c",
+      "d1d3dde6ec3b483f8b14139a7d6a9ae0",
+      "690ca50e9785402bb17fa266f8e40ea9",
+      "482f891d61ab4c2080d95a9b84ea5c6d",
+      "622987b045e74a13b79553d3d062e72a",
+      "6c7236b0655e4397b3a9d5f4d83c03fe",
+      "6f7e876e10fd4c58aa2d1f1ed4ff2762",
+      "9a8b01998f8a4c6bb0bfe71e02b3352c",
+      "ec224feb9828415eb018831e985d22c0",
+      "a532b2307c734cf188092d40299c40ad",
+      "fab781bfae4647968aa69f19ae6a5754",
+      "5961b9e44ce14a2a8eb65a9e5b6be90d",
+      "5f15e4b12305489180e54c61769dcebe",
+      "324465ed674740c2a18a88a2633f2093",
+      "f82b21e87eba4e06a0531c791dc09b3f",
+      "5c0bb7407c844ae19479416752f66190",
+      "5ef6d125261b49679dcb4d886b3e382c",
+      "294d5fc4fa1e40429e08137934481ba2",
+      "f5d992e8c1224879be5e5464a424a3a4",
+      "7e828bf7b91e4029bc2093876128a78b"
+     ]
     },
+    "id": "xo9gYhGPr_DQ",
+    "outputId": "016b896d-87a6-4d17-bad1-027475510a8b"
+   },
+   "source": [
+    "docs = load_dataset(\"jamescalam/langchain-docs-23-06-27\", split=\"train\")\n",
+    "docs"
+   ],
+   "execution_count": 2,
+   "outputs": [
     {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "uVoP9VcINWAC",
-        "outputId": "d9f797af-0df8-4ee9-f779-8d8a62589134"
-      },
-      "source": [
-        "len(res[\"data\"])"
-      ],
-      "execution_count": 14,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "2"
-            ]
-          }
-        }
-      ]
+     "output_type": "stream",
+     "text": [
+      "Downloading and preparing dataset json/jamescalam--langchain-docs-23-06-27 to /root/.cache/huggingface/datasets/jamescalam___json/jamescalam--langchain-docs-23-06-27-4631410d07444b03/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96...\n"
+     ],
+     "name": "stdout"
     },
     {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "s-zraDCjNeC6",
-        "outputId": "5f09e471-28de-4c39-d040-a80def97708e"
+     "output_type": "display_data",
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "63de2154fea24b49a87bf4b8428fa630",
+       "version_major": 2,
+       "version_minor": 0
       },
-      "source": [
-        "len(res[\"data\"][0][\"embedding\"]), len(res[\"data\"][1][\"embedding\"])"
-      ],
-      "execution_count": 15,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "(1536, 1536)"
-            ]
-          }
-        }
+      "text/plain": [
+       "Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]"
       ]
+     },
+     "metadata": {}
     },
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "XPd41MjANhmp"
+     "output_type": "display_data",
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "6881722e02fe4395a5fcaf668cb7ebcb",
+       "version_major": 2,
+       "version_minor": 0
       },
-      "source": [
-        "We will apply this same embedding logic to the langchain docs dataset we've just scraped. But before doing so we must create a place to store the embeddings."
+      "text/plain": [
+       "Downloading data:   0%|          | 0.00/4.68M [00:00<?, ?B/s]"
       ]
+     },
+     "metadata": {}
     },
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "WPi4MZvMNvUH"
+     "output_type": "display_data",
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "d83b0b3089c34bb58ddb1272a240c2f9",
+       "version_major": 2,
+       "version_minor": 0
       },
-      "source": [
-        "## Initializing the Index"
+      "text/plain": [
+       "Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]"
       ]
+     },
+     "metadata": {}
     },
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "H5RRQArrN2lN"
+     "output_type": "display_data",
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "a532b2307c734cf188092d40299c40ad",
+       "version_major": 2,
+       "version_minor": 0
       },
-      "source": [
-        "Now we need a place to store these embeddings and enable a efficient vector search through them all. To do that we use Pinecone, we can get a [free API key](https://app.pinecone.io/) and enter it below where we will initialize our connection to Pinecone and create a new index."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {},
-      "source": [
-        "# initialize connection to pinecone (get API key at app.pinecone.io)\n",
-        "api_key = os.environ.get(\"PINECONE_API_KEY\") or \"PINECONE_API_KEY\"\n",
-        "\n",
-        "# configure client\n",
-        "pc = Pinecone(api_key=api_key)"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "Now we setup our index specification, this allows us to define the cloud provider and region where we want to deploy our index. You can find a list of all [available providers and regions here](https://docs.pinecone.io/docs/projects)."
+      "text/plain": [
+       "Generating train split: 0 examples [00:00, ? examples/s]"
       ]
+     },
+     "metadata": {}
     },
     {
-      "cell_type": "code",
-      "metadata": {},
-      "source": [
-        "cloud = os.environ.get(\"PINECONE_CLOUD\") or \"aws\"\n",
-        "region = os.environ.get(\"PINECONE_REGION\") or \"us-east-1\"\n",
-        "\n",
-        "spec = ServerlessSpec(cloud=cloud, region=region)"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "2GQAnohhum8v",
-        "tags": [
-          "parameters"
-        ]
-      },
-      "source": [
-        "index_name = \"gpt-4-langchain-docs\""
-      ],
-      "execution_count": 17,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {},
-      "source": [
-        "# check if index already exists (it shouldn't if this is first time)\n",
-        "if index_name not in pc.list_indexes().names():\n",
-        "    # if does not exist, create index\n",
-        "    pc.create_index(\n",
-        "        index_name,\n",
-        "        dimension=len(res[\"data\"][0][\"embedding\"]),\n",
-        "        metric=\"cosine\",\n",
-        "        spec=spec,\n",
-        "    )\n",
-        "    # wait for index to be initialized\n",
-        "    while not pc.describe_index(index_name).status[\"ready\"]:\n",
-        "        time.sleep(1)\n",
-        "\n",
-        "# connect to index\n",
-        "index = pc.Index(index_name)\n",
-        "# view index stats\n",
-        "index.describe_index_stats()"
-      ],
-      "execution_count": null,
-      "outputs": []
+     "output_type": "stream",
+     "text": [
+      "Dataset json downloaded and prepared to /root/.cache/huggingface/datasets/jamescalam___json/jamescalam--langchain-docs-23-06-27-4631410d07444b03/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96. Subsequent calls will reuse this data.\n"
+     ],
+     "name": "stdout"
     },
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "ezSTzN2rPa2o"
-      },
-      "source": [
-        "We can see the index is currently empty with a `total_vector_count` of `0`. We can begin populating it with OpenAI `text-embedding-ada-002` built embeddings like so:"
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "Dataset({\n",
+       "    features: ['id', 'text', 'url'],\n",
+       "    num_rows: 505\n",
+       "})"
       ]
+     },
+     "metadata": {},
+     "execution_count": 2
+    }
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "ahFEI4U3vdxV"
+   },
+   "source": [
+    "This leaves us with `505` doc pages. Let's take a look at the format each one contains:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 52
     },
+    "id": "BJuef8z1vfz4",
+    "outputId": "6c62ecbe-cf82-475d-b39f-d97ff02422fe"
+   },
+   "source": [
+    "docs[20][\"text\"][:200]"
+   ],
+   "execution_count": 3,
+   "outputs": [
     {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 49,
-          "referenced_widgets": [
-            "760c608de89946298cb6845d5ff1b020",
-            "f6f7d673d7a145bda593848f7e87ca2c",
-            "effb0c1b07574547aca5956963b371c8",
-            "e6e0b0054fb5449c84ad745308510ddb",
-            "b1e6d4d46b334bcf96efcab6f57c7536",
-            "e5a120d5b9494d14a142fbf519bcbbdf",
-            "78fe5eb48ae748bda91ddc70f422212c",
-            "34e43d6a7a92453490c45e39498afd64",
-            "45c7fb32593141abb8168b8077e31f59",
-            "0ed96243151440a18994669e2f85e819",
-            "05a0a1ebc92f463d9f3e953e51742a85"
-          ]
-        },
-        "id": "iZbFbulAPeop",
-        "outputId": "97cbb020-f6f9-4914-ff14-dd472354f64a"
+     "output_type": "execute_result",
+     "data": {
+      "application/vnd.google.colaboratory.intrinsic+json": {
+       "type": "string"
       },
-      "source": [
-        "batch_size = 100  # how many embeddings we create and insert at once\n",
-        "\n",
-        "for i in tqdm(range(0, len(chunks), batch_size)):\n",
-        "    # find end of batch\n",
-        "    i_end = min(len(chunks), i + batch_size)\n",
-        "    meta_batch = chunks[i:i_end]\n",
-        "    # get ids\n",
-        "    ids_batch = [x[\"id\"] for x in meta_batch]\n",
-        "    # get texts to encode\n",
-        "    texts = [x[\"text\"] for x in meta_batch]\n",
-        "    # create embeddings (try-except added to avoid RateLimitError)\n",
-        "    try:\n",
-        "        res = openai.Embedding.create(input=texts, engine=embed_model)\n",
-        "    except Exception:\n",
-        "        done = False\n",
-        "        while not done:\n",
-        "            time.sleep(5)\n",
-        "            try:\n",
-        "                res = openai.Embedding.create(input=texts, engine=embed_model)\n",
-        "                done = True\n",
-        "            except Exception:\n",
-        "                pass\n",
-        "    embeds = [record[\"embedding\"] for record in res[\"data\"]]\n",
-        "    # cleanup metadata\n",
-        "    meta_batch = [\n",
-        "        {\"text\": x[\"text\"], \"chunk\": x[\"chunk\"], \"url\": x[\"url\"]} for x in meta_batch\n",
-        "    ]\n",
-        "    to_upsert = list(zip(ids_batch, embeds, meta_batch))\n",
-        "    # upsert to Pinecone\n",
-        "    index.upsert(vectors=to_upsert)"
-      ],
-      "execution_count": 19,
-      "outputs": [
-        {
-          "output_type": "display_data",
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "760c608de89946298cb6845d5ff1b020",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "  0%|          | 0/25 [00:00<?, ?it/s]"
-            ]
-          }
-        }
+      "text/plain": [
+       "'Example Selector\\uf0c1\\nLogic for selecting examples to include in prompts.\\nclass langchain.prompts.example_selector.LengthBasedExampleSelector(*, examples, example_prompt, get_text_length=<function _get_le'"
       ]
+     },
+     "metadata": {},
+     "execution_count": 3
+    }
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "jNfppr8fvhOX"
+   },
+   "source": [
+    "We access the plaintext page content like so:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "vfdQLriyvjDk",
+    "outputId": "b7644566-ac2d-4dcf-dab3-1736191d357a"
+   },
+   "source": [
+    "print(docs[20][\"text\"][:200])"
+   ],
+   "execution_count": 4,
+   "outputs": [
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "YttJOrEtQIF9"
-      },
-      "source": [
-        "Now we've added all of our langchain docs to the index. With that we can move on to retrieval and then answer generation using GPT-4."
-      ]
+     "output_type": "stream",
+     "text": [
+      "Example Selector\uf0c1\n",
+      "Logic for selecting examples to include in prompts.\n",
+      "class langchain.prompts.example_selector.LengthBasedExampleSelector(*, examples, example_prompt, get_text_length=<function _get_le\n"
+     ],
+     "name": "stdout"
+    }
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "r-mxgm-6vo9s"
+   },
+   "source": [
+    "We can also find the source of each document:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 35
     },
+    "id": "NGUGao9_uNH3",
+    "outputId": "f3efbe72-d10c-4223-dd21-f38d02ad96d5"
+   },
+   "source": [
+    "docs[20][\"url\"]"
+   ],
+   "execution_count": 5,
+   "outputs": [
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "FumVmMRlQQ7w"
+     "output_type": "execute_result",
+     "data": {
+      "application/vnd.google.colaboratory.intrinsic+json": {
+       "type": "string"
       },
-      "source": [
-        "## Retrieval"
+      "text/plain": [
+       "'https://api.python.langchain.com/en/latest/modules/example_selector.html'"
       ]
+     },
+     "metadata": {},
+     "execution_count": 5
+    }
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "ouY4rcx7z2oa"
+   },
+   "source": [
+    "Now let's see how we can process all of these. We will chunk everything into ~500 token chunks, we can do this easily with `langchain` and `tiktoken`:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 35
     },
+    "id": "Rb7KxUqYzsuV",
+    "outputId": "af54a189-b2e5-4b6d-9752-e74edd01b5e4"
+   },
+   "source": [
+    "tokenizer_name = tiktoken.encoding_for_model(\"gpt-4\")\n",
+    "tokenizer_name.name"
+   ],
+   "execution_count": 6,
+   "outputs": [
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "nLRODeL-QTJ9"
+     "output_type": "execute_result",
+     "data": {
+      "application/vnd.google.colaboratory.intrinsic+json": {
+       "type": "string"
       },
-      "source": [
-        "To search through our documents we first need to create a query vector `xq`. Using `xq` we will retrieve the most relevant chunks from the LangChain docs, like so:"
+      "text/plain": [
+       "'cl100k_base'"
       ]
+     },
+     "metadata": {},
+     "execution_count": 6
+    }
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {
+    "id": "N635Sgsbx_ME"
+   },
+   "source": [
+    "tokenizer = tiktoken.get_encoding(tokenizer_name.name)\n",
+    "\n",
+    "\n",
+    "# create the length function\n",
+    "def tiktoken_len(text):\n",
+    "    tokens = tokenizer.encode(text, disallowed_special=())\n",
+    "    return len(tokens)"
+   ],
+   "execution_count": 7,
+   "outputs": []
+  },
+  {
+   "cell_type": "code",
+   "metadata": {
+    "id": "OKO8e3Dp0dQS"
+   },
+   "source": [
+    "text_splitter = RecursiveCharacterTextSplitter(\n",
+    "    chunk_size=500,\n",
+    "    chunk_overlap=20,\n",
+    "    length_function=tiktoken_len,\n",
+    "    separators=[\"\\n\\n\", \"\\n\", \" \", \"\"],\n",
+    ")"
+   ],
+   "execution_count": 8,
+   "outputs": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "bLdvW8eq06Zd"
+   },
+   "source": [
+    "Process the `docs` into more chunks using this approach."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 66,
+     "referenced_widgets": [
+      "01296cac12234000a13bdca80b31ba8b",
+      "930601ee00454f71b1114c4aaff0175b",
+      "e976d05935374e47b86773ca852cfa9e",
+      "bf9b29814dd04a22a7ff4ca1c6160c21",
+      "6d110cd070fe4776b9449de74759dff3",
+      "d670714b504847e3b72cd84510219ec7",
+      "037869180d9d4b1eb1bdbed67337e349",
+      "894a9b32ecc3404eb1213a8fa9ea38e2",
+      "5b14b2d018c74766954d580853eae7fc",
+      "41920d8d2aa44511814576dab37d96e7",
+      "d4c5704e6136468b910684e418074271"
+     ]
     },
+    "id": "uOdPyiAQ0uWs",
+    "outputId": "a36d52b2-810d-4422-cc82-105af8d1c83b"
+   },
+   "source": [
+    "chunks = []\n",
+    "\n",
+    "for page in tqdm(docs):\n",
+    "    if len(page[\"text\"]) < 200:\n",
+    "        # if page content is short we can skip\n",
+    "        continue\n",
+    "    texts = text_splitter.split_text(page[\"text\"])\n",
+    "    chunks.extend(\n",
+    "        [\n",
+    "            {\n",
+    "                \"id\": page[\"id\"] + f\"-{i}\",\n",
+    "                \"text\": texts[i],\n",
+    "                \"url\": page[\"url\"],\n",
+    "                \"chunk\": i,\n",
+    "            }\n",
+    "            for i in range(len(texts))\n",
+    "        ]\n",
+    "    )\n",
+    "len(chunks)"
+   ],
+   "execution_count": 10,
+   "outputs": [
     {
-      "cell_type": "code",
-      "metadata": {
-        "id": "FMUPdX9cQQYC"
-      },
-      "source": [
-        "query = \"how do I use the LLMChain in LangChain?\"\n",
-        "\n",
-        "res = openai.Embedding.create(input=[query], engine=embed_model)\n",
-        "\n",
-        "# retrieve from Pinecone\n",
-        "xq = res[\"data\"][0][\"embedding\"]\n",
-        "\n",
-        "# get relevant contexts (including the questions)\n",
-        "res = index.query(vector=xq, top_k=5, include_metadata=True)"
-      ],
-      "execution_count": 20,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "zl9SrFPkQjg-",
-        "outputId": "f7e6e60b-ad81-4aae-89a9-3e2f04a65ccd"
+     "output_type": "display_data",
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "01296cac12234000a13bdca80b31ba8b",
+       "version_major": 2,
+       "version_minor": 0
       },
-      "source": [
-        "res"
-      ],
-      "execution_count": 21,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "{'matches': [{'id': '35afffd0-a42a-42ee-ac6f-92b5491183fb-0',\n",
-              "              'metadata': {'chunk': 0.0,\n",
-              "                           'text': 'Source code for langchain.chains.llm\\n'\n",
-              "                                   '\"\"\"Chain that just formats a prompt and '\n",
-              "                                   'calls an LLM.\"\"\"\\n'\n",
-              "                                   'from __future__ import annotations\\n'\n",
-              "                                   'import warnings\\n'\n",
-              "                                   'from typing import Any, Dict, List, '\n",
-              "                                   'Optional, Sequence, Tuple, Union\\n'\n",
-              "                                   'from pydantic import Extra, Field\\n'\n",
-              "                                   'from langchain.base_language import '\n",
-              "                                   'BaseLanguageModel\\n'\n",
-              "                                   'from langchain.callbacks.manager import (\\n'\n",
-              "                                   '    AsyncCallbackManager,\\n'\n",
-              "                                   '    AsyncCallbackManagerForChainRun,\\n'\n",
-              "                                   '    CallbackManager,\\n'\n",
-              "                                   '    CallbackManagerForChainRun,\\n'\n",
-              "                                   '    Callbacks,\\n'\n",
-              "                                   ')\\n'\n",
-              "                                   'from langchain.chains.base import Chain\\n'\n",
-              "                                   'from langchain.input import '\n",
-              "                                   'get_colored_text\\n'\n",
-              "                                   'from langchain.load.dump import dumpd\\n'\n",
-              "                                   'from langchain.prompts.base import '\n",
-              "                                   'BasePromptTemplate\\n'\n",
-              "                                   'from langchain.prompts.prompt import '\n",
-              "                                   'PromptTemplate\\n'\n",
-              "                                   'from langchain.schema import (\\n'\n",
-              "                                   '    BaseLLMOutputParser,\\n'\n",
-              "                                   '    LLMResult,\\n'\n",
-              "                                   '    NoOpOutputParser,\\n'\n",
-              "                                   '    PromptValue,\\n'\n",
-              "                                   ')\\n'\n",
-              "                                   '[docs]class LLMChain(Chain):\\n'\n",
-              "                                   '    \"\"\"Chain to run queries against LLMs.\\n'\n",
-              "                                   '    Example:\\n'\n",
-              "                                   '        .. code-block:: python\\n'\n",
-              "                                   '            from langchain import '\n",
-              "                                   'LLMChain, OpenAI, PromptTemplate\\n'\n",
-              "                                   '            prompt_template = \"Tell me a '\n",
-              "                                   '{adjective} joke\"\\n'\n",
-              "                                   '            prompt = PromptTemplate(\\n'\n",
-              "                                   '                '\n",
-              "                                   'input_variables=[\"adjective\"], '\n",
-              "                                   'template=prompt_template\\n'\n",
-              "                                   '            )\\n'\n",
-              "                                   '            llm = LLMChain(llm=OpenAI(), '\n",
-              "                                   'prompt=prompt)\\n'\n",
-              "                                   '    \"\"\"\\n'\n",
-              "                                   '    @property\\n'\n",
-              "                                   '    def lc_serializable(self) -> bool:\\n'\n",
-              "                                   '        return True\\n'\n",
-              "                                   '    prompt: BasePromptTemplate\\n'\n",
-              "                                   '    \"\"\"Prompt object to use.\"\"\"\\n'\n",
-              "                                   '    llm: BaseLanguageModel\\n'\n",
-              "                                   '    \"\"\"Language model to call.\"\"\"\\n'\n",
-              "                                   '    output_key: str = \"text\"  #: :meta '\n",
-              "                                   'private:\\n'\n",
-              "                                   '    output_parser: BaseLLMOutputParser = '\n",
-              "                                   'Field(default_factory=NoOpOutputParser)\\n'\n",
-              "                                   '    \"\"\"Output parser to use.\\n'\n",
-              "                                   '    Defaults to one that takes the most '\n",
-              "                                   'likely string but does not change it \\n'\n",
-              "                                   '    otherwise.\"\"\"\\n'\n",
-              "                                   '    return_final_only: bool = True\\n'\n",
-              "                                   '    \"\"\"Whether to return only the final '\n",
-              "                                   'parsed result. Defaults to True.\\n'\n",
-              "                                   '    If false, will return a bunch of extra '\n",
-              "                                   'information about the generation.\"\"\"\\n'\n",
-              "                                   '    llm_kwargs: dict = '\n",
-              "                                   'Field(default_factory=dict)\\n'\n",
-              "                                   '    class Config:\\n'\n",
-              "                                   '        \"\"\"Configuration for this pydantic '\n",
-              "                                   'object.\"\"\"\\n'\n",
-              "                                   '        extra = Extra.forbid\\n'\n",
-              "                                   '        arbitrary_types_allowed = True',\n",
-              "                           'url': 'https://api.python.langchain.com/en/latest/_modules/langchain/chains/llm.html'},\n",
-              "              'score': 0.800940871,\n",
-              "              'values': []},\n",
-              "             {'id': '35cde68a-b909-43b6-b918-81c4eb2db5cd-82',\n",
-              "              'metadata': {'chunk': 82.0,\n",
-              "                           'text': 'Bases: langchain.chains.base.Chain\\n'\n",
-              "                                   'Chain for question-answering with '\n",
-              "                                   'self-verification.\\n'\n",
-              "                                   'Example\\n'\n",
-              "                                   'from langchain import OpenAI, '\n",
-              "                                   'LLMSummarizationCheckerChain\\n'\n",
-              "                                   'llm = OpenAI(temperature=0.0)\\n'\n",
-              "                                   'checker_chain = '\n",
-              "                                   'LLMSummarizationCheckerChain.from_llm(llm)\\n'\n",
-              "                                   'Parameters\\n'\n",
-              "                                   'memory '\n",
-              "                                   '(Optional[langchain.schema.BaseMemory]) '\n",
-              "                                   '– \\n'\n",
-              "                                   'callbacks '\n",
-              "                                   '(Optional[Union[List[langchain.callbacks.base.BaseCallbackHandler], '\n",
-              "                                   'langchain.callbacks.base.BaseCallbackManager]]) '\n",
-              "                                   '– \\n'\n",
-              "                                   'callback_manager '\n",
-              "                                   '(Optional[langchain.callbacks.base.BaseCallbackManager]) '\n",
-              "                                   '– \\n'\n",
-              "                                   'verbose (bool) – \\n'\n",
-              "                                   'tags (Optional[List[str]]) – \\n'\n",
-              "                                   'sequential_chain '\n",
-              "                                   '(langchain.chains.sequential.SequentialChain) '\n",
-              "                                   '– \\n'\n",
-              "                                   'llm '\n",
-              "                                   '(Optional[langchain.base_language.BaseLanguageModel]) '\n",
-              "                                   '– \\n'\n",
-              "                                   'create_assertions_prompt '\n",
-              "                                   '(langchain.prompts.prompt.PromptTemplate) '\n",
-              "                                   '– \\n'\n",
-              "                                   'check_assertions_prompt '\n",
-              "                                   '(langchain.prompts.prompt.PromptTemplate) '\n",
-              "                                   '– \\n'\n",
-              "                                   'revised_summary_prompt '\n",
-              "                                   '(langchain.prompts.prompt.PromptTemplate) '\n",
-              "                                   '– \\n'\n",
-              "                                   'are_all_true_prompt '\n",
-              "                                   '(langchain.prompts.prompt.PromptTemplate) '\n",
-              "                                   '– \\n'\n",
-              "                                   'input_key (str) – \\n'\n",
-              "                                   'output_key (str) – \\n'\n",
-              "                                   'max_checks (int) – \\n'\n",
-              "                                   'Return type\\n'\n",
-              "                                   'None',\n",
-              "                           'url': 'https://api.python.langchain.com/en/latest/modules/chains.html'},\n",
-              "              'score': 0.79580605,\n",
-              "              'values': []},\n",
-              "             {'id': '993db45b-4e3b-431d-a2a6-48ed5944912a-1',\n",
-              "              'metadata': {'chunk': 1.0,\n",
-              "                           'text': '[docs]    @classmethod\\n'\n",
-              "                                   '    def from_llm(\\n'\n",
-              "                                   '        cls,\\n'\n",
-              "                                   '        llm: BaseLanguageModel,\\n'\n",
-              "                                   '        chain: LLMChain,\\n'\n",
-              "                                   '        critique_prompt: '\n",
-              "                                   'BasePromptTemplate = CRITIQUE_PROMPT,\\n'\n",
-              "                                   '        revision_prompt: '\n",
-              "                                   'BasePromptTemplate = REVISION_PROMPT,\\n'\n",
-              "                                   '        **kwargs: Any,\\n'\n",
-              "                                   '    ) -> \"ConstitutionalChain\":\\n'\n",
-              "                                   '        \"\"\"Create a chain from an LLM.\"\"\"\\n'\n",
-              "                                   '        critique_chain = LLMChain(llm=llm, '\n",
-              "                                   'prompt=critique_prompt)\\n'\n",
-              "                                   '        revision_chain = LLMChain(llm=llm, '\n",
-              "                                   'prompt=revision_prompt)\\n'\n",
-              "                                   '        return cls(\\n'\n",
-              "                                   '            chain=chain,\\n'\n",
-              "                                   '            '\n",
-              "                                   'critique_chain=critique_chain,\\n'\n",
-              "                                   '            '\n",
-              "                                   'revision_chain=revision_chain,\\n'\n",
-              "                                   '            **kwargs,\\n'\n",
-              "                                   '        )\\n'\n",
-              "                                   '    @property\\n'\n",
-              "                                   '    def input_keys(self) -> List[str]:\\n'\n",
-              "                                   '        \"\"\"Defines the input keys.\"\"\"\\n'\n",
-              "                                   '        return self.chain.input_keys\\n'\n",
-              "                                   '    @property\\n'\n",
-              "                                   '    def output_keys(self) -> List[str]:\\n'\n",
-              "                                   '        \"\"\"Defines the output keys.\"\"\"\\n'\n",
-              "                                   '        if '\n",
-              "                                   'self.return_intermediate_steps:\\n'\n",
-              "                                   '            return [\"output\", '\n",
-              "                                   '\"critiques_and_revisions\", '\n",
-              "                                   '\"initial_output\"]\\n'\n",
-              "                                   '        return [\"output\"]\\n'\n",
-              "                                   '    def _call(\\n'\n",
-              "                                   '        self,\\n'\n",
-              "                                   '        inputs: Dict[str, Any],\\n'\n",
-              "                                   '        run_manager: '\n",
-              "                                   'Optional[CallbackManagerForChainRun] = '\n",
-              "                                   'None,\\n'\n",
-              "                                   '    ) -> Dict[str, Any]:\\n'\n",
-              "                                   '        _run_manager = run_manager or '\n",
-              "                                   'CallbackManagerForChainRun.get_noop_manager()\\n'\n",
-              "                                   '        response = self.chain.run(\\n'\n",
-              "                                   '            **inputs,\\n'\n",
-              "                                   '            '\n",
-              "                                   'callbacks=_run_manager.get_child(\"original\"),\\n'\n",
-              "                                   '        )\\n'\n",
-              "                                   '        initial_response = response\\n'\n",
-              "                                   '        input_prompt = '\n",
-              "                                   'self.chain.prompt.format(**inputs)\\n'\n",
-              "                                   '        _run_manager.on_text(\\n'\n",
-              "                                   '            text=\"Initial response: \" + '\n",
-              "                                   'response + \"\\\\n\\\\n\",\\n'\n",
-              "                                   '            verbose=self.verbose,\\n'\n",
-              "                                   '            color=\"yellow\",\\n'\n",
-              "                                   '        )\\n'\n",
-              "                                   '        critiques_and_revisions = []\\n'\n",
-              "                                   '        for constitutional_principle in '\n",
-              "                                   'self.constitutional_principles:\\n'\n",
-              "                                   '            # Do critique\\n'\n",
-              "                                   '            raw_critique = '\n",
-              "                                   'self.critique_chain.run(\\n'\n",
-              "                                   '                '\n",
-              "                                   'input_prompt=input_prompt,\\n'\n",
-              "                                   '                '\n",
-              "                                   'output_from_model=response,\\n'\n",
-              "                                   '                '\n",
-              "                                   'critique_request=constitutional_principle.critique_request,\\n'\n",
-              "                                   '                '\n",
-              "                                   'callbacks=_run_manager.get_child(\"critique\"),\\n'\n",
-              "                                   '            )\\n'\n",
-              "                                   '            critique = '\n",
-              "                                   'self._parse_critique(\\n'\n",
-              "                                   '                '\n",
-              "                                   'output_string=raw_critique,',\n",
-              "                           'url': 'https://api.python.langchain.com/en/latest/_modules/langchain/chains/constitutional_ai/base.html'},\n",
-              "              'score': 0.79369247,\n",
-              "              'values': []},\n",
-              "             {'id': 'adea5d40-2691-4bc9-9403-3360345bc25e-0',\n",
-              "              'metadata': {'chunk': 0.0,\n",
-              "                           'text': 'Source code for '\n",
-              "                                   'langchain.chains.conversation.base\\n'\n",
-              "                                   '\"\"\"Chain that carries on a conversation '\n",
-              "                                   'and calls an LLM.\"\"\"\\n'\n",
-              "                                   'from typing import Dict, List\\n'\n",
-              "                                   'from pydantic import Extra, Field, '\n",
-              "                                   'root_validator\\n'\n",
-              "                                   'from langchain.chains.conversation.prompt '\n",
-              "                                   'import PROMPT\\n'\n",
-              "                                   'from langchain.chains.llm import LLMChain\\n'\n",
-              "                                   'from langchain.memory.buffer import '\n",
-              "                                   'ConversationBufferMemory\\n'\n",
-              "                                   'from langchain.prompts.base import '\n",
-              "                                   'BasePromptTemplate\\n'\n",
-              "                                   'from langchain.schema import BaseMemory\\n'\n",
-              "                                   '[docs]class ConversationChain(LLMChain):\\n'\n",
-              "                                   '    \"\"\"Chain to have a conversation and '\n",
-              "                                   'load context from memory.\\n'\n",
-              "                                   '    Example:\\n'\n",
-              "                                   '        .. code-block:: python\\n'\n",
-              "                                   '            from langchain import '\n",
-              "                                   'ConversationChain, OpenAI\\n'\n",
-              "                                   '            conversation = '\n",
-              "                                   'ConversationChain(llm=OpenAI())\\n'\n",
-              "                                   '    \"\"\"\\n'\n",
-              "                                   '    memory: BaseMemory = '\n",
-              "                                   'Field(default_factory=ConversationBufferMemory)\\n'\n",
-              "                                   '    \"\"\"Default memory store.\"\"\"\\n'\n",
-              "                                   '    prompt: BasePromptTemplate = PROMPT\\n'\n",
-              "                                   '    \"\"\"Default conversation prompt to '\n",
-              "                                   'use.\"\"\"\\n'\n",
-              "                                   '    input_key: str = \"input\"  #: :meta '\n",
-              "                                   'private:\\n'\n",
-              "                                   '    output_key: str = \"response\"  #: :meta '\n",
-              "                                   'private:\\n'\n",
-              "                                   '    class Config:\\n'\n",
-              "                                   '        \"\"\"Configuration for this pydantic '\n",
-              "                                   'object.\"\"\"\\n'\n",
-              "                                   '        extra = Extra.forbid\\n'\n",
-              "                                   '        arbitrary_types_allowed = True\\n'\n",
-              "                                   '    @property\\n'\n",
-              "                                   '    def input_keys(self) -> List[str]:\\n'\n",
-              "                                   '        \"\"\"Use this since so some prompt '\n",
-              "                                   'vars come from history.\"\"\"\\n'\n",
-              "                                   '        return [self.input_key]\\n'\n",
-              "                                   '    @root_validator()\\n'\n",
-              "                                   '    def '\n",
-              "                                   'validate_prompt_input_variables(cls, '\n",
-              "                                   'values: Dict) -> Dict:\\n'\n",
-              "                                   '        \"\"\"Validate that prompt input '\n",
-              "                                   'variables are consistent.\"\"\"\\n'\n",
-              "                                   '        memory_keys = '\n",
-              "                                   'values[\"memory\"].memory_variables\\n'\n",
-              "                                   '        input_key = values[\"input_key\"]\\n'\n",
-              "                                   '        if input_key in memory_keys:\\n'\n",
-              "                                   '            raise ValueError(\\n'\n",
-              "                                   '                f\"The input key '\n",
-              "                                   '{input_key} was also found in the memory '\n",
-              "                                   'keys \"\\n'\n",
-              "                                   '                f\"({memory_keys}) - please '\n",
-              "                                   'provide keys that don\\'t overlap.\"\\n'\n",
-              "                                   '            )\\n'\n",
-              "                                   '        prompt_variables = '\n",
-              "                                   'values[\"prompt\"].input_variables\\n'\n",
-              "                                   '        expected_keys = memory_keys + '\n",
-              "                                   '[input_key]\\n'\n",
-              "                                   '        if set(expected_keys) != '\n",
-              "                                   'set(prompt_variables):\\n'\n",
-              "                                   '            raise ValueError(\\n'\n",
-              "                                   '                \"Got unexpected prompt '\n",
-              "                                   'input variables. The prompt expects \"\\n'\n",
-              "                                   '                f\"{prompt_variables}, but '\n",
-              "                                   'got {memory_keys} as inputs from \"\\n'\n",
-              "                                   '                f\"memory, and {input_key} '\n",
-              "                                   'as the normal input key.\"\\n'\n",
-              "                                   '            )\\n'\n",
-              "                                   '        return values',\n",
-              "                           'url': 'https://api.python.langchain.com/en/latest/_modules/langchain/chains/conversation/base.html'},\n",
-              "              'score': 0.792259932,\n",
-              "              'values': []},\n",
-              "             {'id': '3b6f9660-0346-4992-a6f5-b9cc2977f446-5',\n",
-              "              'metadata': {'chunk': 5.0,\n",
-              "                           'text': 'callbacks: Callbacks = None,\\n'\n",
-              "                                   '        **kwargs: Any,\\n'\n",
-              "                                   '    ) -> '\n",
-              "                                   'BaseConversationalRetrievalChain:\\n'\n",
-              "                                   '        \"\"\"Load chain from LLM.\"\"\"\\n'\n",
-              "                                   '        combine_docs_chain_kwargs = '\n",
-              "                                   'combine_docs_chain_kwargs or {}\\n'\n",
-              "                                   '        doc_chain = load_qa_chain(\\n'\n",
-              "                                   '            llm,\\n'\n",
-              "                                   '            chain_type=chain_type,\\n'\n",
-              "                                   '            callbacks=callbacks,\\n'\n",
-              "                                   '            **combine_docs_chain_kwargs,\\n'\n",
-              "                                   '        )\\n'\n",
-              "                                   '        condense_question_chain = '\n",
-              "                                   'LLMChain(\\n'\n",
-              "                                   '            llm=llm, '\n",
-              "                                   'prompt=condense_question_prompt, '\n",
-              "                                   'callbacks=callbacks\\n'\n",
-              "                                   '        )\\n'\n",
-              "                                   '        return cls(\\n'\n",
-              "                                   '            vectorstore=vectorstore,\\n'\n",
-              "                                   '            combine_docs_chain=doc_chain,\\n'\n",
-              "                                   '            '\n",
-              "                                   'question_generator=condense_question_chain,\\n'\n",
-              "                                   '            callbacks=callbacks,\\n'\n",
-              "                                   '            **kwargs,\\n'\n",
-              "                                   '        )',\n",
-              "                           'url': 'https://api.python.langchain.com/en/latest/_modules/langchain/chains/conversational_retrieval/base.html'},\n",
-              "              'score': 0.791279614,\n",
-              "              'values': []}],\n",
-              " 'namespace': ''}"
-            ]
-          }
-        }
+      "text/plain": [
+       "  0%|          | 0/505 [00:00<?, ?it/s]"
       ]
+     },
+     "metadata": {}
     },
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "MoBSiDLIUADZ"
-      },
-      "source": [
-        "With retrieval complete, we move on to feeding these into GPT-4 to produce answers."
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "2482"
       ]
+     },
+     "metadata": {},
+     "execution_count": 10
+    }
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "JegURaAg2PuN"
+   },
+   "source": [
+    "Our chunks are ready so now we move onto embedding and indexing everything."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "zGIZbQqJ2WBh"
+   },
+   "source": [
+    "## Initialize Embedding Model\n",
+    "\n",
+    "We use `text-embedding-ada-002` as the embedding model. We can embed text like so:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "p0U9_7Fium8u",
+    "outputId": "e2b285d4-dbde-4b2b-8624-bb515f6206cf"
+   },
+   "source": [
+    "# get API key from top-right dropdown on OpenAI website\n",
+    "openai.api_key = os.getenv(\"OPENAI_API_KEY\") or \"OPENAI_API_KEY\"\n",
+    "\n",
+    "openai.Engine.list()  # check we have authenticated"
+   ],
+   "execution_count": 11,
+   "outputs": [
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "qfzS4-6-UXgX"
-      },
-      "source": [
-        "## Retrieval Augmented Generation"
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "<OpenAIObject list at 0x7fc82e74dee0> JSON: {\n",
+       "  \"data\": [\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"whisper-1\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-internal\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"babbage\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"davinci\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"text-davinci-edit-001\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"babbage-code-search-code\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"text-similarity-babbage-001\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"text-embedding-ada-002\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-internal\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"code-davinci-edit-001\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"text-davinci-001\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"ada\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"babbage-code-search-text\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"babbage-similarity\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"code-search-babbage-text-001\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"text-curie-001\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"gpt-4-0314\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"gpt-4-0613\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"code-search-babbage-code-001\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"text-ada-001\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"text-similarity-ada-001\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"curie-instruct-beta\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"gpt-4\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"ada-code-search-code\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"ada-similarity\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"code-search-ada-text-001\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"text-search-ada-query-001\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"davinci-search-document\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"ada-code-search-text\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"text-search-ada-doc-001\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"davinci-instruct-beta\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"text-similarity-curie-001\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"code-search-ada-code-001\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"ada-search-query\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"text-search-davinci-query-001\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"curie-search-query\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"davinci-search-query\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"babbage-search-document\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"ada-search-document\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"text-search-curie-query-001\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"text-search-babbage-doc-001\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"curie-search-document\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"text-search-curie-doc-001\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"babbage-search-query\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"text-babbage-001\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"text-search-davinci-doc-001\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"text-search-babbage-query-001\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"curie-similarity\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"gpt-3.5-turbo-0613\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"curie\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"gpt-3.5-turbo-16k-0613\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"text-similarity-davinci-001\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"text-davinci-002\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"gpt-3.5-turbo-0301\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"text-davinci-003\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-internal\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"davinci-similarity\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-dev\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"gpt-3.5-turbo\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    },\n",
+       "    {\n",
+       "      \"created\": null,\n",
+       "      \"id\": \"gpt-3.5-turbo-16k\",\n",
+       "      \"object\": \"engine\",\n",
+       "      \"owner\": \"openai-internal\",\n",
+       "      \"permissions\": null,\n",
+       "      \"ready\": true\n",
+       "    }\n",
+       "  ],\n",
+       "  \"object\": \"list\"\n",
+       "}"
       ]
+     },
+     "metadata": {},
+     "execution_count": 11
+    }
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {
+    "id": "kteZ69Z5M55S"
+   },
+   "source": [
+    "embed_model = \"text-embedding-ada-002\"\n",
+    "\n",
+    "res = openai.Embedding.create(\n",
+    "    input=[\n",
+    "        \"Sample document text goes here\",\n",
+    "        \"there will be several phrases in each batch\",\n",
+    "    ],\n",
+    "    engine=embed_model,\n",
+    ")"
+   ],
+   "execution_count": 12,
+   "outputs": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "aNZ7IWekNLbu"
+   },
+   "source": [
+    "In the response `res` we will find a JSON-like object containing our new embeddings within the `'data'` field."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "esagZj6iNLPZ",
+    "outputId": "8e26f18a-4890-43ca-95e7-9e256e29e3be"
+   },
+   "source": [
+    "res.keys()"
+   ],
+   "execution_count": 13,
+   "outputs": [
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "XPC1jQaKUcy0"
-      },
-      "source": [
-        "GPT-4 is currently accessed via the `ChatCompletions` endpoint of OpenAI. To add the information we retrieved into the model, we need to pass it into our user prompts *alongside* our original query. We can do that like so:"
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "dict_keys(['object', 'data', 'model', 'usage'])"
       ]
+     },
+     "metadata": {},
+     "execution_count": 13
+    }
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "zStnHFpkNVIU"
+   },
+   "source": [
+    "Inside `'data'` we will find two records, one for each of the two sentences we just embedded. Each vector embedding contains `1536` dimensions (the output dimensionality of the `text-embedding-ada-002` model."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "uVoP9VcINWAC",
+    "outputId": "d9f797af-0df8-4ee9-f779-8d8a62589134"
+   },
+   "source": [
+    "len(res[\"data\"])"
+   ],
+   "execution_count": 14,
+   "outputs": [
     {
-      "cell_type": "code",
-      "metadata": {
-        "id": "unZstoHNUHeG"
-      },
-      "source": [
-        "# get list of retrieved text\n",
-        "contexts = [item[\"metadata\"][\"text\"] for item in res[\"matches\"]]\n",
-        "\n",
-        "augmented_query = \"\\n\\n---\\n\\n\".join(contexts) + \"\\n\\n-----\\n\\n\" + query"
-      ],
-      "execution_count": 22,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "LRcEHm0Z9fXE",
-        "outputId": "636c6825-ecd1-4953-ee25-ebabcb3a2fed"
-      },
-      "source": [
-        "print(augmented_query)"
-      ],
-      "execution_count": 27,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "Source code for langchain.chains.llm\n",
-            "\"\"\"Chain that just formats a prompt and calls an LLM.\"\"\"\n",
-            "from __future__ import annotations\n",
-            "import warnings\n",
-            "from typing import Any, Dict, List, Optional, Sequence, Tuple, Union\n",
-            "from pydantic import Extra, Field\n",
-            "from langchain.base_language import BaseLanguageModel\n",
-            "from langchain.callbacks.manager import (\n",
-            "    AsyncCallbackManager,\n",
-            "    AsyncCallbackManagerForChainRun,\n",
-            "    CallbackManager,\n",
-            "    CallbackManagerForChainRun,\n",
-            "    Callbacks,\n",
-            ")\n",
-            "from langchain.chains.base import Chain\n",
-            "from langchain.input import get_colored_text\n",
-            "from langchain.load.dump import dumpd\n",
-            "from langchain.prompts.base import BasePromptTemplate\n",
-            "from langchain.prompts.prompt import PromptTemplate\n",
-            "from langchain.schema import (\n",
-            "    BaseLLMOutputParser,\n",
-            "    LLMResult,\n",
-            "    NoOpOutputParser,\n",
-            "    PromptValue,\n",
-            ")\n",
-            "[docs]class LLMChain(Chain):\n",
-            "    \"\"\"Chain to run queries against LLMs.\n",
-            "    Example:\n",
-            "        .. code-block:: python\n",
-            "            from langchain import LLMChain, OpenAI, PromptTemplate\n",
-            "            prompt_template = \"Tell me a {adjective} joke\"\n",
-            "            prompt = PromptTemplate(\n",
-            "                input_variables=[\"adjective\"], template=prompt_template\n",
-            "            )\n",
-            "            llm = LLMChain(llm=OpenAI(), prompt=prompt)\n",
-            "    \"\"\"\n",
-            "    @property\n",
-            "    def lc_serializable(self) -> bool:\n",
-            "        return True\n",
-            "    prompt: BasePromptTemplate\n",
-            "    \"\"\"Prompt object to use.\"\"\"\n",
-            "    llm: BaseLanguageModel\n",
-            "    \"\"\"Language model to call.\"\"\"\n",
-            "    output_key: str = \"text\"  #: :meta private:\n",
-            "    output_parser: BaseLLMOutputParser = Field(default_factory=NoOpOutputParser)\n",
-            "    \"\"\"Output parser to use.\n",
-            "    Defaults to one that takes the most likely string but does not change it \n",
-            "    otherwise.\"\"\"\n",
-            "    return_final_only: bool = True\n",
-            "    \"\"\"Whether to return only the final parsed result. Defaults to True.\n",
-            "    If false, will return a bunch of extra information about the generation.\"\"\"\n",
-            "    llm_kwargs: dict = Field(default_factory=dict)\n",
-            "    class Config:\n",
-            "        \"\"\"Configuration for this pydantic object.\"\"\"\n",
-            "        extra = Extra.forbid\n",
-            "        arbitrary_types_allowed = True\n",
-            "\n",
-            "---\n",
-            "\n",
-            "Bases: langchain.chains.base.Chain\n",
-            "Chain for question-answering with self-verification.\n",
-            "Example\n",
-            "from langchain import OpenAI, LLMSummarizationCheckerChain\n",
-            "llm = OpenAI(temperature=0.0)\n",
-            "checker_chain = LLMSummarizationCheckerChain.from_llm(llm)\n",
-            "Parameters\n",
-            "memory (Optional[langchain.schema.BaseMemory]) – \n",
-            "callbacks (Optional[Union[List[langchain.callbacks.base.BaseCallbackHandler], langchain.callbacks.base.BaseCallbackManager]]) – \n",
-            "callback_manager (Optional[langchain.callbacks.base.BaseCallbackManager]) – \n",
-            "verbose (bool) – \n",
-            "tags (Optional[List[str]]) – \n",
-            "sequential_chain (langchain.chains.sequential.SequentialChain) – \n",
-            "llm (Optional[langchain.base_language.BaseLanguageModel]) – \n",
-            "create_assertions_prompt (langchain.prompts.prompt.PromptTemplate) – \n",
-            "check_assertions_prompt (langchain.prompts.prompt.PromptTemplate) – \n",
-            "revised_summary_prompt (langchain.prompts.prompt.PromptTemplate) – \n",
-            "are_all_true_prompt (langchain.prompts.prompt.PromptTemplate) – \n",
-            "input_key (str) – \n",
-            "output_key (str) – \n",
-            "max_checks (int) – \n",
-            "Return type\n",
-            "None\n",
-            "\n",
-            "---\n",
-            "\n",
-            "[docs]    @classmethod\n",
-            "    def from_llm(\n",
-            "        cls,\n",
-            "        llm: BaseLanguageModel,\n",
-            "        chain: LLMChain,\n",
-            "        critique_prompt: BasePromptTemplate = CRITIQUE_PROMPT,\n",
-            "        revision_prompt: BasePromptTemplate = REVISION_PROMPT,\n",
-            "        **kwargs: Any,\n",
-            "    ) -> \"ConstitutionalChain\":\n",
-            "        \"\"\"Create a chain from an LLM.\"\"\"\n",
-            "        critique_chain = LLMChain(llm=llm, prompt=critique_prompt)\n",
-            "        revision_chain = LLMChain(llm=llm, prompt=revision_prompt)\n",
-            "        return cls(\n",
-            "            chain=chain,\n",
-            "            critique_chain=critique_chain,\n",
-            "            revision_chain=revision_chain,\n",
-            "            **kwargs,\n",
-            "        )\n",
-            "    @property\n",
-            "    def input_keys(self) -> List[str]:\n",
-            "        \"\"\"Defines the input keys.\"\"\"\n",
-            "        return self.chain.input_keys\n",
-            "    @property\n",
-            "    def output_keys(self) -> List[str]:\n",
-            "        \"\"\"Defines the output keys.\"\"\"\n",
-            "        if self.return_intermediate_steps:\n",
-            "            return [\"output\", \"critiques_and_revisions\", \"initial_output\"]\n",
-            "        return [\"output\"]\n",
-            "    def _call(\n",
-            "        self,\n",
-            "        inputs: Dict[str, Any],\n",
-            "        run_manager: Optional[CallbackManagerForChainRun] = None,\n",
-            "    ) -> Dict[str, Any]:\n",
-            "        _run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()\n",
-            "        response = self.chain.run(\n",
-            "            **inputs,\n",
-            "            callbacks=_run_manager.get_child(\"original\"),\n",
-            "        )\n",
-            "        initial_response = response\n",
-            "        input_prompt = self.chain.prompt.format(**inputs)\n",
-            "        _run_manager.on_text(\n",
-            "            text=\"Initial response: \" + response + \"\\n\\n\",\n",
-            "            verbose=self.verbose,\n",
-            "            color=\"yellow\",\n",
-            "        )\n",
-            "        critiques_and_revisions = []\n",
-            "        for constitutional_principle in self.constitutional_principles:\n",
-            "            # Do critique\n",
-            "            raw_critique = self.critique_chain.run(\n",
-            "                input_prompt=input_prompt,\n",
-            "                output_from_model=response,\n",
-            "                critique_request=constitutional_principle.critique_request,\n",
-            "                callbacks=_run_manager.get_child(\"critique\"),\n",
-            "            )\n",
-            "            critique = self._parse_critique(\n",
-            "                output_string=raw_critique,\n",
-            "\n",
-            "---\n",
-            "\n",
-            "Source code for langchain.chains.conversation.base\n",
-            "\"\"\"Chain that carries on a conversation and calls an LLM.\"\"\"\n",
-            "from typing import Dict, List\n",
-            "from pydantic import Extra, Field, root_validator\n",
-            "from langchain.chains.conversation.prompt import PROMPT\n",
-            "from langchain.chains.llm import LLMChain\n",
-            "from langchain.memory.buffer import ConversationBufferMemory\n",
-            "from langchain.prompts.base import BasePromptTemplate\n",
-            "from langchain.schema import BaseMemory\n",
-            "[docs]class ConversationChain(LLMChain):\n",
-            "    \"\"\"Chain to have a conversation and load context from memory.\n",
-            "    Example:\n",
-            "        .. code-block:: python\n",
-            "            from langchain import ConversationChain, OpenAI\n",
-            "            conversation = ConversationChain(llm=OpenAI())\n",
-            "    \"\"\"\n",
-            "    memory: BaseMemory = Field(default_factory=ConversationBufferMemory)\n",
-            "    \"\"\"Default memory store.\"\"\"\n",
-            "    prompt: BasePromptTemplate = PROMPT\n",
-            "    \"\"\"Default conversation prompt to use.\"\"\"\n",
-            "    input_key: str = \"input\"  #: :meta private:\n",
-            "    output_key: str = \"response\"  #: :meta private:\n",
-            "    class Config:\n",
-            "        \"\"\"Configuration for this pydantic object.\"\"\"\n",
-            "        extra = Extra.forbid\n",
-            "        arbitrary_types_allowed = True\n",
-            "    @property\n",
-            "    def input_keys(self) -> List[str]:\n",
-            "        \"\"\"Use this since so some prompt vars come from history.\"\"\"\n",
-            "        return [self.input_key]\n",
-            "    @root_validator()\n",
-            "    def validate_prompt_input_variables(cls, values: Dict) -> Dict:\n",
-            "        \"\"\"Validate that prompt input variables are consistent.\"\"\"\n",
-            "        memory_keys = values[\"memory\"].memory_variables\n",
-            "        input_key = values[\"input_key\"]\n",
-            "        if input_key in memory_keys:\n",
-            "            raise ValueError(\n",
-            "                f\"The input key {input_key} was also found in the memory keys \"\n",
-            "                f\"({memory_keys}) - please provide keys that don't overlap.\"\n",
-            "            )\n",
-            "        prompt_variables = values[\"prompt\"].input_variables\n",
-            "        expected_keys = memory_keys + [input_key]\n",
-            "        if set(expected_keys) != set(prompt_variables):\n",
-            "            raise ValueError(\n",
-            "                \"Got unexpected prompt input variables. The prompt expects \"\n",
-            "                f\"{prompt_variables}, but got {memory_keys} as inputs from \"\n",
-            "                f\"memory, and {input_key} as the normal input key.\"\n",
-            "            )\n",
-            "        return values\n",
-            "\n",
-            "---\n",
-            "\n",
-            "callbacks: Callbacks = None,\n",
-            "        **kwargs: Any,\n",
-            "    ) -> BaseConversationalRetrievalChain:\n",
-            "        \"\"\"Load chain from LLM.\"\"\"\n",
-            "        combine_docs_chain_kwargs = combine_docs_chain_kwargs or {}\n",
-            "        doc_chain = load_qa_chain(\n",
-            "            llm,\n",
-            "            chain_type=chain_type,\n",
-            "            callbacks=callbacks,\n",
-            "            **combine_docs_chain_kwargs,\n",
-            "        )\n",
-            "        condense_question_chain = LLMChain(\n",
-            "            llm=llm, prompt=condense_question_prompt, callbacks=callbacks\n",
-            "        )\n",
-            "        return cls(\n",
-            "            vectorstore=vectorstore,\n",
-            "            combine_docs_chain=doc_chain,\n",
-            "            question_generator=condense_question_chain,\n",
-            "            callbacks=callbacks,\n",
-            "            **kwargs,\n",
-            "        )\n",
-            "\n",
-            "-----\n",
-            "\n",
-            "how do I use the LLMChain in LangChain?\n"
-          ]
-        }
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "2"
       ]
+     },
+     "metadata": {},
+     "execution_count": 14
+    }
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "s-zraDCjNeC6",
+    "outputId": "5f09e471-28de-4c39-d040-a80def97708e"
+   },
+   "source": [
+    "len(res[\"data\"][0][\"embedding\"]), len(res[\"data\"][1][\"embedding\"])"
+   ],
+   "execution_count": 15,
+   "outputs": [
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "sihH_GMiV5_p"
-      },
-      "source": [
-        "Now we ask the question:"
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "(1536, 1536)"
       ]
+     },
+     "metadata": {},
+     "execution_count": 15
+    }
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "XPd41MjANhmp"
+   },
+   "source": [
+    "We will apply this same embedding logic to the langchain docs dataset we've just scraped. But before doing so we must create a place to store the embeddings."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "WPi4MZvMNvUH"
+   },
+   "source": [
+    "## Initializing the Index"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "H5RRQArrN2lN"
+   },
+   "source": [
+    "Now we need a place to store these embeddings and enable a efficient vector search through them all. To do that we use Pinecone, we can get a [free API key](https://app.pinecone.io/) and enter it below where we will initialize our connection to Pinecone and create a new index."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# initialize connection to pinecone (get API key at app.pinecone.io)\n",
+    "api_key = os.environ.get(\"PINECONE_API_KEY\") or \"PINECONE_API_KEY\"\n",
+    "\n",
+    "# configure client\n",
+    "pc = Pinecone(api_key=api_key)"
+   ],
+   "execution_count": null,
+   "outputs": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now we setup our index specification, this allows us to define the cloud provider and region where we want to deploy our index. You can find a list of all [available providers and regions here](https://docs.pinecone.io/docs/projects)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "cloud = os.environ.get(\"PINECONE_CLOUD\") or \"aws\"\n",
+    "region = os.environ.get(\"PINECONE_REGION\") or \"us-east-1\"\n",
+    "\n",
+    "spec = ServerlessSpec(cloud=cloud, region=region)"
+   ],
+   "execution_count": null,
+   "outputs": []
+  },
+  {
+   "cell_type": "code",
+   "metadata": {
+    "id": "2GQAnohhum8v",
+    "tags": [
+     "parameters"
+    ]
+   },
+   "source": [
+    "index_name = \"gpt-4-langchain-docs\""
+   ],
+   "execution_count": 17,
+   "outputs": []
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# check if index already exists (it shouldn't if this is first time)\n",
+    "if index_name not in pc.list_indexes().names():\n",
+    "    # if does not exist, create index\n",
+    "    pc.create_index(\n",
+    "        index_name,\n",
+    "        dimension=len(res[\"data\"][0][\"embedding\"]),\n",
+    "        metric=\"cosine\",\n",
+    "        spec=spec,\n",
+    "    )\n",
+    "    # wait for index to be initialized\n",
+    "    while not pc.describe_index(index_name).status[\"ready\"]:\n",
+    "        time.sleep(1)\n",
+    "\n",
+    "# connect to index\n",
+    "index = pc.Index(index_name)\n",
+    "# view index stats\n",
+    "index.describe_index_stats()"
+   ],
+   "execution_count": null,
+   "outputs": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "ezSTzN2rPa2o"
+   },
+   "source": [
+    "We can see the index is currently empty with a `total_vector_count` of `0`. We can begin populating it with OpenAI `text-embedding-ada-002` built embeddings like so:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 49,
+     "referenced_widgets": [
+      "760c608de89946298cb6845d5ff1b020",
+      "f6f7d673d7a145bda593848f7e87ca2c",
+      "effb0c1b07574547aca5956963b371c8",
+      "e6e0b0054fb5449c84ad745308510ddb",
+      "b1e6d4d46b334bcf96efcab6f57c7536",
+      "e5a120d5b9494d14a142fbf519bcbbdf",
+      "78fe5eb48ae748bda91ddc70f422212c",
+      "34e43d6a7a92453490c45e39498afd64",
+      "45c7fb32593141abb8168b8077e31f59",
+      "0ed96243151440a18994669e2f85e819",
+      "05a0a1ebc92f463d9f3e953e51742a85"
+     ]
     },
+    "id": "iZbFbulAPeop",
+    "outputId": "97cbb020-f6f9-4914-ff14-dd472354f64a"
+   },
+   "source": [
+    "batch_size = 100  # how many embeddings we create and insert at once\n",
+    "\n",
+    "for i in tqdm(range(0, len(chunks), batch_size)):\n",
+    "    # find end of batch\n",
+    "    i_end = min(len(chunks), i + batch_size)\n",
+    "    meta_batch = chunks[i:i_end]\n",
+    "    # get ids\n",
+    "    ids_batch = [x[\"id\"] for x in meta_batch]\n",
+    "    # get texts to encode\n",
+    "    texts = [x[\"text\"] for x in meta_batch]\n",
+    "    # create embeddings (try-except added to avoid RateLimitError)\n",
+    "    try:\n",
+    "        res = openai.Embedding.create(input=texts, engine=embed_model)\n",
+    "    except Exception:\n",
+    "        done = False\n",
+    "        while not done:\n",
+    "            time.sleep(5)\n",
+    "            try:\n",
+    "                res = openai.Embedding.create(input=texts, engine=embed_model)\n",
+    "                done = True\n",
+    "            except Exception:\n",
+    "                pass\n",
+    "    embeds = [record[\"embedding\"] for record in res[\"data\"]]\n",
+    "    # cleanup metadata\n",
+    "    meta_batch = [\n",
+    "        {\"text\": x[\"text\"], \"chunk\": x[\"chunk\"], \"url\": x[\"url\"]} for x in meta_batch\n",
+    "    ]\n",
+    "    to_upsert = list(zip(ids_batch, embeds, meta_batch))\n",
+    "    # upsert to Pinecone\n",
+    "    index.upsert(vectors=to_upsert)"
+   ],
+   "execution_count": 19,
+   "outputs": [
     {
-      "cell_type": "code",
-      "metadata": {
-        "id": "IThBqBi8V70d"
-      },
-      "source": [
-        "# system message to 'prime' the model\n",
-        "primer = \"\"\"You are Q&A bot. A highly intelligent system that answers\n",
-        "user questions based on the information provided by the user above\n",
-        "each question. If the information can not be found in the information\n",
-        "provided by the user you truthfully say \"I don't know\".\n",
-        "\"\"\"\n",
-        "\n",
-        "res = openai.ChatCompletion.create(\n",
-        "    model=\"gpt-4\",\n",
-        "    messages=[\n",
-        "        {\"role\": \"system\", \"content\": primer},\n",
-        "        {\"role\": \"user\", \"content\": augmented_query},\n",
-        "    ],\n",
-        ")"
-      ],
-      "execution_count": 28,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "QvS1yJhOWpiJ"
+     "output_type": "display_data",
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "760c608de89946298cb6845d5ff1b020",
+       "version_major": 2,
+       "version_minor": 0
       },
-      "source": [
-        "To display this response nicely, we will display it in markdown."
+      "text/plain": [
+       "  0%|          | 0/25 [00:00<?, ?it/s]"
       ]
+     },
+     "metadata": {}
+    }
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "YttJOrEtQIF9"
+   },
+   "source": [
+    "Now we've added all of our langchain docs to the index. With that we can move on to retrieval and then answer generation using GPT-4."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "FumVmMRlQQ7w"
+   },
+   "source": [
+    "## Retrieval"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "nLRODeL-QTJ9"
+   },
+   "source": [
+    "To search through our documents we first need to create a query vector `xq`. Using `xq` we will retrieve the most relevant chunks from the LangChain docs, like so:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {
+    "id": "FMUPdX9cQQYC"
+   },
+   "source": [
+    "query = \"how do I use the LLMChain in LangChain?\"\n",
+    "\n",
+    "res = openai.Embedding.create(input=[query], engine=embed_model)\n",
+    "\n",
+    "# retrieve from Pinecone\n",
+    "xq = res[\"data\"][0][\"embedding\"]\n",
+    "\n",
+    "# get relevant contexts (including the questions)\n",
+    "res = index.query(vector=xq, top_k=5, include_metadata=True)"
+   ],
+   "execution_count": 20,
+   "outputs": []
+  },
+  {
+   "cell_type": "code",
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "zl9SrFPkQjg-",
+    "outputId": "f7e6e60b-ad81-4aae-89a9-3e2f04a65ccd"
+   },
+   "source": [
+    "res"
+   ],
+   "execution_count": 21,
+   "outputs": [
     {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 465
-        },
-        "id": "RDo2qeMHWto1",
-        "outputId": "9a9b677f-9b4f-4f77-822d-80baf75ed04a"
-      },
-      "source": [
-        "display(Markdown(res[\"choices\"][0][\"message\"][\"content\"]))"
-      ],
-      "execution_count": 29,
-      "outputs": [
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/markdown": [
-              "To use the LLMChain in LangChain, you need to first import the necessary modules and classes. In this example, we will use the OpenAI language model. Follow the steps below:\n",
-              "\n",
-              "1. Import all required modules and classes:\n",
-              "\n",
-              "```python\n",
-              "from langchain import LLMChain, OpenAI, PromptTemplate\n",
-              "```\n",
-              "\n",
-              "2. Define the prompt template you want to use with the language model. For example, if you want to create jokes based on provided adjectives:\n",
-              "\n",
-              "```python\n",
-              "prompt_template = \"Tell me a {adjective} joke\"\n",
-              "```\n",
-              "\n",
-              "3. Create a PromptTemplate object passing the input_variables and template:\n",
-              "\n",
-              "```python\n",
-              "prompt = PromptTemplate(input_variables=[\"adjective\"], template=prompt_template)\n",
-              "```\n",
-              "\n",
-              "4. Instantiate the OpenAI language model:\n",
-              "\n",
-              "```python\n",
-              "llm = OpenAI()\n",
-              "```\n",
-              "\n",
-              "5. Create the LLMChain object using the OpenAI language model and the created prompt:\n",
-              "\n",
-              "```python\n",
-              "llm_chain = LLMChain(llm=llm, prompt=prompt)\n",
-              "```\n",
-              "\n",
-              "Now you can use the `llm_chain` object to generate jokes based on provided adjectives. For example:\n",
-              "\n",
-              "```python\n",
-              "response = llm_chain.run(adjective=\"funny\")\n",
-              "print(response)\n",
-              "```\n",
-              "\n",
-              "This will generate and print a funny joke based on the predefined prompt template. Replace `\"funny\"` with any other adjective to get a different result."
-            ],
-            "text/plain": [
-              "<IPython.core.display.Markdown object>"
-            ]
-          }
-        }
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "{'matches': [{'id': '35afffd0-a42a-42ee-ac6f-92b5491183fb-0',\n",
+       "              'metadata': {'chunk': 0.0,\n",
+       "                           'text': 'Source code for langchain.chains.llm\\n'\n",
+       "                                   '\"\"\"Chain that just formats a prompt and '\n",
+       "                                   'calls an LLM.\"\"\"\\n'\n",
+       "                                   'from __future__ import annotations\\n'\n",
+       "                                   'import warnings\\n'\n",
+       "                                   'from typing import Any, Dict, List, '\n",
+       "                                   'Optional, Sequence, Tuple, Union\\n'\n",
+       "                                   'from pydantic import Extra, Field\\n'\n",
+       "                                   'from langchain.base_language import '\n",
+       "                                   'BaseLanguageModel\\n'\n",
+       "                                   'from langchain.callbacks.manager import (\\n'\n",
+       "                                   '    AsyncCallbackManager,\\n'\n",
+       "                                   '    AsyncCallbackManagerForChainRun,\\n'\n",
+       "                                   '    CallbackManager,\\n'\n",
+       "                                   '    CallbackManagerForChainRun,\\n'\n",
+       "                                   '    Callbacks,\\n'\n",
+       "                                   ')\\n'\n",
+       "                                   'from langchain.chains.base import Chain\\n'\n",
+       "                                   'from langchain.input import '\n",
+       "                                   'get_colored_text\\n'\n",
+       "                                   'from langchain.load.dump import dumpd\\n'\n",
+       "                                   'from langchain.prompts.base import '\n",
+       "                                   'BasePromptTemplate\\n'\n",
+       "                                   'from langchain.prompts.prompt import '\n",
+       "                                   'PromptTemplate\\n'\n",
+       "                                   'from langchain.schema import (\\n'\n",
+       "                                   '    BaseLLMOutputParser,\\n'\n",
+       "                                   '    LLMResult,\\n'\n",
+       "                                   '    NoOpOutputParser,\\n'\n",
+       "                                   '    PromptValue,\\n'\n",
+       "                                   ')\\n'\n",
+       "                                   '[docs]class LLMChain(Chain):\\n'\n",
+       "                                   '    \"\"\"Chain to run queries against LLMs.\\n'\n",
+       "                                   '    Example:\\n'\n",
+       "                                   '        .. code-block:: python\\n'\n",
+       "                                   '            from langchain import '\n",
+       "                                   'LLMChain, OpenAI, PromptTemplate\\n'\n",
+       "                                   '            prompt_template = \"Tell me a '\n",
+       "                                   '{adjective} joke\"\\n'\n",
+       "                                   '            prompt = PromptTemplate(\\n'\n",
+       "                                   '                '\n",
+       "                                   'input_variables=[\"adjective\"], '\n",
+       "                                   'template=prompt_template\\n'\n",
+       "                                   '            )\\n'\n",
+       "                                   '            llm = LLMChain(llm=OpenAI(), '\n",
+       "                                   'prompt=prompt)\\n'\n",
+       "                                   '    \"\"\"\\n'\n",
+       "                                   '    @property\\n'\n",
+       "                                   '    def lc_serializable(self) -> bool:\\n'\n",
+       "                                   '        return True\\n'\n",
+       "                                   '    prompt: BasePromptTemplate\\n'\n",
+       "                                   '    \"\"\"Prompt object to use.\"\"\"\\n'\n",
+       "                                   '    llm: BaseLanguageModel\\n'\n",
+       "                                   '    \"\"\"Language model to call.\"\"\"\\n'\n",
+       "                                   '    output_key: str = \"text\"  #: :meta '\n",
+       "                                   'private:\\n'\n",
+       "                                   '    output_parser: BaseLLMOutputParser = '\n",
+       "                                   'Field(default_factory=NoOpOutputParser)\\n'\n",
+       "                                   '    \"\"\"Output parser to use.\\n'\n",
+       "                                   '    Defaults to one that takes the most '\n",
+       "                                   'likely string but does not change it \\n'\n",
+       "                                   '    otherwise.\"\"\"\\n'\n",
+       "                                   '    return_final_only: bool = True\\n'\n",
+       "                                   '    \"\"\"Whether to return only the final '\n",
+       "                                   'parsed result. Defaults to True.\\n'\n",
+       "                                   '    If false, will return a bunch of extra '\n",
+       "                                   'information about the generation.\"\"\"\\n'\n",
+       "                                   '    llm_kwargs: dict = '\n",
+       "                                   'Field(default_factory=dict)\\n'\n",
+       "                                   '    class Config:\\n'\n",
+       "                                   '        \"\"\"Configuration for this pydantic '\n",
+       "                                   'object.\"\"\"\\n'\n",
+       "                                   '        extra = Extra.forbid\\n'\n",
+       "                                   '        arbitrary_types_allowed = True',\n",
+       "                           'url': 'https://api.python.langchain.com/en/latest/_modules/langchain/chains/llm.html'},\n",
+       "              'score': 0.800940871,\n",
+       "              'values': []},\n",
+       "             {'id': '35cde68a-b909-43b6-b918-81c4eb2db5cd-82',\n",
+       "              'metadata': {'chunk': 82.0,\n",
+       "                           'text': 'Bases: langchain.chains.base.Chain\\n'\n",
+       "                                   'Chain for question-answering with '\n",
+       "                                   'self-verification.\\n'\n",
+       "                                   'Example\\n'\n",
+       "                                   'from langchain import OpenAI, '\n",
+       "                                   'LLMSummarizationCheckerChain\\n'\n",
+       "                                   'llm = OpenAI(temperature=0.0)\\n'\n",
+       "                                   'checker_chain = '\n",
+       "                                   'LLMSummarizationCheckerChain.from_llm(llm)\\n'\n",
+       "                                   'Parameters\\n'\n",
+       "                                   'memory '\n",
+       "                                   '(Optional[langchain.schema.BaseMemory]) '\n",
+       "                                   '\u2013 \\n'\n",
+       "                                   'callbacks '\n",
+       "                                   '(Optional[Union[List[langchain.callbacks.base.BaseCallbackHandler], '\n",
+       "                                   'langchain.callbacks.base.BaseCallbackManager]]) '\n",
+       "                                   '\u2013 \\n'\n",
+       "                                   'callback_manager '\n",
+       "                                   '(Optional[langchain.callbacks.base.BaseCallbackManager]) '\n",
+       "                                   '\u2013 \\n'\n",
+       "                                   'verbose (bool) \u2013 \\n'\n",
+       "                                   'tags (Optional[List[str]]) \u2013 \\n'\n",
+       "                                   'sequential_chain '\n",
+       "                                   '(langchain.chains.sequential.SequentialChain) '\n",
+       "                                   '\u2013 \\n'\n",
+       "                                   'llm '\n",
+       "                                   '(Optional[langchain.base_language.BaseLanguageModel]) '\n",
+       "                                   '\u2013 \\n'\n",
+       "                                   'create_assertions_prompt '\n",
+       "                                   '(langchain.prompts.prompt.PromptTemplate) '\n",
+       "                                   '\u2013 \\n'\n",
+       "                                   'check_assertions_prompt '\n",
+       "                                   '(langchain.prompts.prompt.PromptTemplate) '\n",
+       "                                   '\u2013 \\n'\n",
+       "                                   'revised_summary_prompt '\n",
+       "                                   '(langchain.prompts.prompt.PromptTemplate) '\n",
+       "                                   '\u2013 \\n'\n",
+       "                                   'are_all_true_prompt '\n",
+       "                                   '(langchain.prompts.prompt.PromptTemplate) '\n",
+       "                                   '\u2013 \\n'\n",
+       "                                   'input_key (str) \u2013 \\n'\n",
+       "                                   'output_key (str) \u2013 \\n'\n",
+       "                                   'max_checks (int) \u2013 \\n'\n",
+       "                                   'Return type\\n'\n",
+       "                                   'None',\n",
+       "                           'url': 'https://api.python.langchain.com/en/latest/modules/chains.html'},\n",
+       "              'score': 0.79580605,\n",
+       "              'values': []},\n",
+       "             {'id': '993db45b-4e3b-431d-a2a6-48ed5944912a-1',\n",
+       "              'metadata': {'chunk': 1.0,\n",
+       "                           'text': '[docs]    @classmethod\\n'\n",
+       "                                   '    def from_llm(\\n'\n",
+       "                                   '        cls,\\n'\n",
+       "                                   '        llm: BaseLanguageModel,\\n'\n",
+       "                                   '        chain: LLMChain,\\n'\n",
+       "                                   '        critique_prompt: '\n",
+       "                                   'BasePromptTemplate = CRITIQUE_PROMPT,\\n'\n",
+       "                                   '        revision_prompt: '\n",
+       "                                   'BasePromptTemplate = REVISION_PROMPT,\\n'\n",
+       "                                   '        **kwargs: Any,\\n'\n",
+       "                                   '    ) -> \"ConstitutionalChain\":\\n'\n",
+       "                                   '        \"\"\"Create a chain from an LLM.\"\"\"\\n'\n",
+       "                                   '        critique_chain = LLMChain(llm=llm, '\n",
+       "                                   'prompt=critique_prompt)\\n'\n",
+       "                                   '        revision_chain = LLMChain(llm=llm, '\n",
+       "                                   'prompt=revision_prompt)\\n'\n",
+       "                                   '        return cls(\\n'\n",
+       "                                   '            chain=chain,\\n'\n",
+       "                                   '            '\n",
+       "                                   'critique_chain=critique_chain,\\n'\n",
+       "                                   '            '\n",
+       "                                   'revision_chain=revision_chain,\\n'\n",
+       "                                   '            **kwargs,\\n'\n",
+       "                                   '        )\\n'\n",
+       "                                   '    @property\\n'\n",
+       "                                   '    def input_keys(self) -> List[str]:\\n'\n",
+       "                                   '        \"\"\"Defines the input keys.\"\"\"\\n'\n",
+       "                                   '        return self.chain.input_keys\\n'\n",
+       "                                   '    @property\\n'\n",
+       "                                   '    def output_keys(self) -> List[str]:\\n'\n",
+       "                                   '        \"\"\"Defines the output keys.\"\"\"\\n'\n",
+       "                                   '        if '\n",
+       "                                   'self.return_intermediate_steps:\\n'\n",
+       "                                   '            return [\"output\", '\n",
+       "                                   '\"critiques_and_revisions\", '\n",
+       "                                   '\"initial_output\"]\\n'\n",
+       "                                   '        return [\"output\"]\\n'\n",
+       "                                   '    def _call(\\n'\n",
+       "                                   '        self,\\n'\n",
+       "                                   '        inputs: Dict[str, Any],\\n'\n",
+       "                                   '        run_manager: '\n",
+       "                                   'Optional[CallbackManagerForChainRun] = '\n",
+       "                                   'None,\\n'\n",
+       "                                   '    ) -> Dict[str, Any]:\\n'\n",
+       "                                   '        _run_manager = run_manager or '\n",
+       "                                   'CallbackManagerForChainRun.get_noop_manager()\\n'\n",
+       "                                   '        response = self.chain.run(\\n'\n",
+       "                                   '            **inputs,\\n'\n",
+       "                                   '            '\n",
+       "                                   'callbacks=_run_manager.get_child(\"original\"),\\n'\n",
+       "                                   '        )\\n'\n",
+       "                                   '        initial_response = response\\n'\n",
+       "                                   '        input_prompt = '\n",
+       "                                   'self.chain.prompt.format(**inputs)\\n'\n",
+       "                                   '        _run_manager.on_text(\\n'\n",
+       "                                   '            text=\"Initial response: \" + '\n",
+       "                                   'response + \"\\\\n\\\\n\",\\n'\n",
+       "                                   '            verbose=self.verbose,\\n'\n",
+       "                                   '            color=\"yellow\",\\n'\n",
+       "                                   '        )\\n'\n",
+       "                                   '        critiques_and_revisions = []\\n'\n",
+       "                                   '        for constitutional_principle in '\n",
+       "                                   'self.constitutional_principles:\\n'\n",
+       "                                   '            # Do critique\\n'\n",
+       "                                   '            raw_critique = '\n",
+       "                                   'self.critique_chain.run(\\n'\n",
+       "                                   '                '\n",
+       "                                   'input_prompt=input_prompt,\\n'\n",
+       "                                   '                '\n",
+       "                                   'output_from_model=response,\\n'\n",
+       "                                   '                '\n",
+       "                                   'critique_request=constitutional_principle.critique_request,\\n'\n",
+       "                                   '                '\n",
+       "                                   'callbacks=_run_manager.get_child(\"critique\"),\\n'\n",
+       "                                   '            )\\n'\n",
+       "                                   '            critique = '\n",
+       "                                   'self._parse_critique(\\n'\n",
+       "                                   '                '\n",
+       "                                   'output_string=raw_critique,',\n",
+       "                           'url': 'https://api.python.langchain.com/en/latest/_modules/langchain/chains/constitutional_ai/base.html'},\n",
+       "              'score': 0.79369247,\n",
+       "              'values': []},\n",
+       "             {'id': 'adea5d40-2691-4bc9-9403-3360345bc25e-0',\n",
+       "              'metadata': {'chunk': 0.0,\n",
+       "                           'text': 'Source code for '\n",
+       "                                   'langchain.chains.conversation.base\\n'\n",
+       "                                   '\"\"\"Chain that carries on a conversation '\n",
+       "                                   'and calls an LLM.\"\"\"\\n'\n",
+       "                                   'from typing import Dict, List\\n'\n",
+       "                                   'from pydantic import Extra, Field, '\n",
+       "                                   'root_validator\\n'\n",
+       "                                   'from langchain.chains.conversation.prompt '\n",
+       "                                   'import PROMPT\\n'\n",
+       "                                   'from langchain.chains.llm import LLMChain\\n'\n",
+       "                                   'from langchain.memory.buffer import '\n",
+       "                                   'ConversationBufferMemory\\n'\n",
+       "                                   'from langchain.prompts.base import '\n",
+       "                                   'BasePromptTemplate\\n'\n",
+       "                                   'from langchain.schema import BaseMemory\\n'\n",
+       "                                   '[docs]class ConversationChain(LLMChain):\\n'\n",
+       "                                   '    \"\"\"Chain to have a conversation and '\n",
+       "                                   'load context from memory.\\n'\n",
+       "                                   '    Example:\\n'\n",
+       "                                   '        .. code-block:: python\\n'\n",
+       "                                   '            from langchain import '\n",
+       "                                   'ConversationChain, OpenAI\\n'\n",
+       "                                   '            conversation = '\n",
+       "                                   'ConversationChain(llm=OpenAI())\\n'\n",
+       "                                   '    \"\"\"\\n'\n",
+       "                                   '    memory: BaseMemory = '\n",
+       "                                   'Field(default_factory=ConversationBufferMemory)\\n'\n",
+       "                                   '    \"\"\"Default memory store.\"\"\"\\n'\n",
+       "                                   '    prompt: BasePromptTemplate = PROMPT\\n'\n",
+       "                                   '    \"\"\"Default conversation prompt to '\n",
+       "                                   'use.\"\"\"\\n'\n",
+       "                                   '    input_key: str = \"input\"  #: :meta '\n",
+       "                                   'private:\\n'\n",
+       "                                   '    output_key: str = \"response\"  #: :meta '\n",
+       "                                   'private:\\n'\n",
+       "                                   '    class Config:\\n'\n",
+       "                                   '        \"\"\"Configuration for this pydantic '\n",
+       "                                   'object.\"\"\"\\n'\n",
+       "                                   '        extra = Extra.forbid\\n'\n",
+       "                                   '        arbitrary_types_allowed = True\\n'\n",
+       "                                   '    @property\\n'\n",
+       "                                   '    def input_keys(self) -> List[str]:\\n'\n",
+       "                                   '        \"\"\"Use this since so some prompt '\n",
+       "                                   'vars come from history.\"\"\"\\n'\n",
+       "                                   '        return [self.input_key]\\n'\n",
+       "                                   '    @root_validator()\\n'\n",
+       "                                   '    def '\n",
+       "                                   'validate_prompt_input_variables(cls, '\n",
+       "                                   'values: Dict) -> Dict:\\n'\n",
+       "                                   '        \"\"\"Validate that prompt input '\n",
+       "                                   'variables are consistent.\"\"\"\\n'\n",
+       "                                   '        memory_keys = '\n",
+       "                                   'values[\"memory\"].memory_variables\\n'\n",
+       "                                   '        input_key = values[\"input_key\"]\\n'\n",
+       "                                   '        if input_key in memory_keys:\\n'\n",
+       "                                   '            raise ValueError(\\n'\n",
+       "                                   '                f\"The input key '\n",
+       "                                   '{input_key} was also found in the memory '\n",
+       "                                   'keys \"\\n'\n",
+       "                                   '                f\"({memory_keys}) - please '\n",
+       "                                   'provide keys that don\\'t overlap.\"\\n'\n",
+       "                                   '            )\\n'\n",
+       "                                   '        prompt_variables = '\n",
+       "                                   'values[\"prompt\"].input_variables\\n'\n",
+       "                                   '        expected_keys = memory_keys + '\n",
+       "                                   '[input_key]\\n'\n",
+       "                                   '        if set(expected_keys) != '\n",
+       "                                   'set(prompt_variables):\\n'\n",
+       "                                   '            raise ValueError(\\n'\n",
+       "                                   '                \"Got unexpected prompt '\n",
+       "                                   'input variables. The prompt expects \"\\n'\n",
+       "                                   '                f\"{prompt_variables}, but '\n",
+       "                                   'got {memory_keys} as inputs from \"\\n'\n",
+       "                                   '                f\"memory, and {input_key} '\n",
+       "                                   'as the normal input key.\"\\n'\n",
+       "                                   '            )\\n'\n",
+       "                                   '        return values',\n",
+       "                           'url': 'https://api.python.langchain.com/en/latest/_modules/langchain/chains/conversation/base.html'},\n",
+       "              'score': 0.792259932,\n",
+       "              'values': []},\n",
+       "             {'id': '3b6f9660-0346-4992-a6f5-b9cc2977f446-5',\n",
+       "              'metadata': {'chunk': 5.0,\n",
+       "                           'text': 'callbacks: Callbacks = None,\\n'\n",
+       "                                   '        **kwargs: Any,\\n'\n",
+       "                                   '    ) -> '\n",
+       "                                   'BaseConversationalRetrievalChain:\\n'\n",
+       "                                   '        \"\"\"Load chain from LLM.\"\"\"\\n'\n",
+       "                                   '        combine_docs_chain_kwargs = '\n",
+       "                                   'combine_docs_chain_kwargs or {}\\n'\n",
+       "                                   '        doc_chain = load_qa_chain(\\n'\n",
+       "                                   '            llm,\\n'\n",
+       "                                   '            chain_type=chain_type,\\n'\n",
+       "                                   '            callbacks=callbacks,\\n'\n",
+       "                                   '            **combine_docs_chain_kwargs,\\n'\n",
+       "                                   '        )\\n'\n",
+       "                                   '        condense_question_chain = '\n",
+       "                                   'LLMChain(\\n'\n",
+       "                                   '            llm=llm, '\n",
+       "                                   'prompt=condense_question_prompt, '\n",
+       "                                   'callbacks=callbacks\\n'\n",
+       "                                   '        )\\n'\n",
+       "                                   '        return cls(\\n'\n",
+       "                                   '            vectorstore=vectorstore,\\n'\n",
+       "                                   '            combine_docs_chain=doc_chain,\\n'\n",
+       "                                   '            '\n",
+       "                                   'question_generator=condense_question_chain,\\n'\n",
+       "                                   '            callbacks=callbacks,\\n'\n",
+       "                                   '            **kwargs,\\n'\n",
+       "                                   '        )',\n",
+       "                           'url': 'https://api.python.langchain.com/en/latest/_modules/langchain/chains/conversational_retrieval/base.html'},\n",
+       "              'score': 0.791279614,\n",
+       "              'values': []}],\n",
+       " 'namespace': ''}"
       ]
+     },
+     "metadata": {},
+     "execution_count": 21
+    }
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "MoBSiDLIUADZ"
+   },
+   "source": [
+    "With retrieval complete, we move on to feeding these into GPT-4 to produce answers."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "qfzS4-6-UXgX"
+   },
+   "source": [
+    "## Retrieval Augmented Generation"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "XPC1jQaKUcy0"
+   },
+   "source": [
+    "GPT-4 is currently accessed via the `ChatCompletions` endpoint of OpenAI. To add the information we retrieved into the model, we need to pass it into our user prompts *alongside* our original query. We can do that like so:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {
+    "id": "unZstoHNUHeG"
+   },
+   "source": [
+    "# get list of retrieved text\n",
+    "contexts = [item[\"metadata\"][\"text\"] for item in res[\"matches\"]]\n",
+    "\n",
+    "augmented_query = \"\\n\\n---\\n\\n\".join(contexts) + \"\\n\\n-----\\n\\n\" + query"
+   ],
+   "execution_count": 22,
+   "outputs": []
+  },
+  {
+   "cell_type": "code",
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "LRcEHm0Z9fXE",
+    "outputId": "636c6825-ecd1-4953-ee25-ebabcb3a2fed"
+   },
+   "source": [
+    "print(augmented_query)"
+   ],
+   "execution_count": 27,
+   "outputs": [
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "eJ-a8MHg0eYQ"
-      },
-      "source": [
-        "Let's compare this to a non-augmented query..."
-      ]
+     "output_type": "stream",
+     "text": [
+      "Source code for langchain.chains.llm\n",
+      "\"\"\"Chain that just formats a prompt and calls an LLM.\"\"\"\n",
+      "from __future__ import annotations\n",
+      "import warnings\n",
+      "from typing import Any, Dict, List, Optional, Sequence, Tuple, Union\n",
+      "from pydantic import Extra, Field\n",
+      "from langchain.base_language import BaseLanguageModel\n",
+      "from langchain.callbacks.manager import (\n",
+      "    AsyncCallbackManager,\n",
+      "    AsyncCallbackManagerForChainRun,\n",
+      "    CallbackManager,\n",
+      "    CallbackManagerForChainRun,\n",
+      "    Callbacks,\n",
+      ")\n",
+      "from langchain.chains.base import Chain\n",
+      "from langchain.input import get_colored_text\n",
+      "from langchain.load.dump import dumpd\n",
+      "from langchain.prompts.base import BasePromptTemplate\n",
+      "from langchain.prompts.prompt import PromptTemplate\n",
+      "from langchain.schema import (\n",
+      "    BaseLLMOutputParser,\n",
+      "    LLMResult,\n",
+      "    NoOpOutputParser,\n",
+      "    PromptValue,\n",
+      ")\n",
+      "[docs]class LLMChain(Chain):\n",
+      "    \"\"\"Chain to run queries against LLMs.\n",
+      "    Example:\n",
+      "        .. code-block:: python\n",
+      "            from langchain import LLMChain, OpenAI, PromptTemplate\n",
+      "            prompt_template = \"Tell me a {adjective} joke\"\n",
+      "            prompt = PromptTemplate(\n",
+      "                input_variables=[\"adjective\"], template=prompt_template\n",
+      "            )\n",
+      "            llm = LLMChain(llm=OpenAI(), prompt=prompt)\n",
+      "    \"\"\"\n",
+      "    @property\n",
+      "    def lc_serializable(self) -> bool:\n",
+      "        return True\n",
+      "    prompt: BasePromptTemplate\n",
+      "    \"\"\"Prompt object to use.\"\"\"\n",
+      "    llm: BaseLanguageModel\n",
+      "    \"\"\"Language model to call.\"\"\"\n",
+      "    output_key: str = \"text\"  #: :meta private:\n",
+      "    output_parser: BaseLLMOutputParser = Field(default_factory=NoOpOutputParser)\n",
+      "    \"\"\"Output parser to use.\n",
+      "    Defaults to one that takes the most likely string but does not change it \n",
+      "    otherwise.\"\"\"\n",
+      "    return_final_only: bool = True\n",
+      "    \"\"\"Whether to return only the final parsed result. Defaults to True.\n",
+      "    If false, will return a bunch of extra information about the generation.\"\"\"\n",
+      "    llm_kwargs: dict = Field(default_factory=dict)\n",
+      "    class Config:\n",
+      "        \"\"\"Configuration for this pydantic object.\"\"\"\n",
+      "        extra = Extra.forbid\n",
+      "        arbitrary_types_allowed = True\n",
+      "\n",
+      "---\n",
+      "\n",
+      "Bases: langchain.chains.base.Chain\n",
+      "Chain for question-answering with self-verification.\n",
+      "Example\n",
+      "from langchain import OpenAI, LLMSummarizationCheckerChain\n",
+      "llm = OpenAI(temperature=0.0)\n",
+      "checker_chain = LLMSummarizationCheckerChain.from_llm(llm)\n",
+      "Parameters\n",
+      "memory (Optional[langchain.schema.BaseMemory]) \u2013 \n",
+      "callbacks (Optional[Union[List[langchain.callbacks.base.BaseCallbackHandler], langchain.callbacks.base.BaseCallbackManager]]) \u2013 \n",
+      "callback_manager (Optional[langchain.callbacks.base.BaseCallbackManager]) \u2013 \n",
+      "verbose (bool) \u2013 \n",
+      "tags (Optional[List[str]]) \u2013 \n",
+      "sequential_chain (langchain.chains.sequential.SequentialChain) \u2013 \n",
+      "llm (Optional[langchain.base_language.BaseLanguageModel]) \u2013 \n",
+      "create_assertions_prompt (langchain.prompts.prompt.PromptTemplate) \u2013 \n",
+      "check_assertions_prompt (langchain.prompts.prompt.PromptTemplate) \u2013 \n",
+      "revised_summary_prompt (langchain.prompts.prompt.PromptTemplate) \u2013 \n",
+      "are_all_true_prompt (langchain.prompts.prompt.PromptTemplate) \u2013 \n",
+      "input_key (str) \u2013 \n",
+      "output_key (str) \u2013 \n",
+      "max_checks (int) \u2013 \n",
+      "Return type\n",
+      "None\n",
+      "\n",
+      "---\n",
+      "\n",
+      "[docs]    @classmethod\n",
+      "    def from_llm(\n",
+      "        cls,\n",
+      "        llm: BaseLanguageModel,\n",
+      "        chain: LLMChain,\n",
+      "        critique_prompt: BasePromptTemplate = CRITIQUE_PROMPT,\n",
+      "        revision_prompt: BasePromptTemplate = REVISION_PROMPT,\n",
+      "        **kwargs: Any,\n",
+      "    ) -> \"ConstitutionalChain\":\n",
+      "        \"\"\"Create a chain from an LLM.\"\"\"\n",
+      "        critique_chain = LLMChain(llm=llm, prompt=critique_prompt)\n",
+      "        revision_chain = LLMChain(llm=llm, prompt=revision_prompt)\n",
+      "        return cls(\n",
+      "            chain=chain,\n",
+      "            critique_chain=critique_chain,\n",
+      "            revision_chain=revision_chain,\n",
+      "            **kwargs,\n",
+      "        )\n",
+      "    @property\n",
+      "    def input_keys(self) -> List[str]:\n",
+      "        \"\"\"Defines the input keys.\"\"\"\n",
+      "        return self.chain.input_keys\n",
+      "    @property\n",
+      "    def output_keys(self) -> List[str]:\n",
+      "        \"\"\"Defines the output keys.\"\"\"\n",
+      "        if self.return_intermediate_steps:\n",
+      "            return [\"output\", \"critiques_and_revisions\", \"initial_output\"]\n",
+      "        return [\"output\"]\n",
+      "    def _call(\n",
+      "        self,\n",
+      "        inputs: Dict[str, Any],\n",
+      "        run_manager: Optional[CallbackManagerForChainRun] = None,\n",
+      "    ) -> Dict[str, Any]:\n",
+      "        _run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()\n",
+      "        response = self.chain.run(\n",
+      "            **inputs,\n",
+      "            callbacks=_run_manager.get_child(\"original\"),\n",
+      "        )\n",
+      "        initial_response = response\n",
+      "        input_prompt = self.chain.prompt.format(**inputs)\n",
+      "        _run_manager.on_text(\n",
+      "            text=\"Initial response: \" + response + \"\\n\\n\",\n",
+      "            verbose=self.verbose,\n",
+      "            color=\"yellow\",\n",
+      "        )\n",
+      "        critiques_and_revisions = []\n",
+      "        for constitutional_principle in self.constitutional_principles:\n",
+      "            # Do critique\n",
+      "            raw_critique = self.critique_chain.run(\n",
+      "                input_prompt=input_prompt,\n",
+      "                output_from_model=response,\n",
+      "                critique_request=constitutional_principle.critique_request,\n",
+      "                callbacks=_run_manager.get_child(\"critique\"),\n",
+      "            )\n",
+      "            critique = self._parse_critique(\n",
+      "                output_string=raw_critique,\n",
+      "\n",
+      "---\n",
+      "\n",
+      "Source code for langchain.chains.conversation.base\n",
+      "\"\"\"Chain that carries on a conversation and calls an LLM.\"\"\"\n",
+      "from typing import Dict, List\n",
+      "from pydantic import Extra, Field, root_validator\n",
+      "from langchain.chains.conversation.prompt import PROMPT\n",
+      "from langchain.chains.llm import LLMChain\n",
+      "from langchain.memory.buffer import ConversationBufferMemory\n",
+      "from langchain.prompts.base import BasePromptTemplate\n",
+      "from langchain.schema import BaseMemory\n",
+      "[docs]class ConversationChain(LLMChain):\n",
+      "    \"\"\"Chain to have a conversation and load context from memory.\n",
+      "    Example:\n",
+      "        .. code-block:: python\n",
+      "            from langchain import ConversationChain, OpenAI\n",
+      "            conversation = ConversationChain(llm=OpenAI())\n",
+      "    \"\"\"\n",
+      "    memory: BaseMemory = Field(default_factory=ConversationBufferMemory)\n",
+      "    \"\"\"Default memory store.\"\"\"\n",
+      "    prompt: BasePromptTemplate = PROMPT\n",
+      "    \"\"\"Default conversation prompt to use.\"\"\"\n",
+      "    input_key: str = \"input\"  #: :meta private:\n",
+      "    output_key: str = \"response\"  #: :meta private:\n",
+      "    class Config:\n",
+      "        \"\"\"Configuration for this pydantic object.\"\"\"\n",
+      "        extra = Extra.forbid\n",
+      "        arbitrary_types_allowed = True\n",
+      "    @property\n",
+      "    def input_keys(self) -> List[str]:\n",
+      "        \"\"\"Use this since so some prompt vars come from history.\"\"\"\n",
+      "        return [self.input_key]\n",
+      "    @root_validator()\n",
+      "    def validate_prompt_input_variables(cls, values: Dict) -> Dict:\n",
+      "        \"\"\"Validate that prompt input variables are consistent.\"\"\"\n",
+      "        memory_keys = values[\"memory\"].memory_variables\n",
+      "        input_key = values[\"input_key\"]\n",
+      "        if input_key in memory_keys:\n",
+      "            raise ValueError(\n",
+      "                f\"The input key {input_key} was also found in the memory keys \"\n",
+      "                f\"({memory_keys}) - please provide keys that don't overlap.\"\n",
+      "            )\n",
+      "        prompt_variables = values[\"prompt\"].input_variables\n",
+      "        expected_keys = memory_keys + [input_key]\n",
+      "        if set(expected_keys) != set(prompt_variables):\n",
+      "            raise ValueError(\n",
+      "                \"Got unexpected prompt input variables. The prompt expects \"\n",
+      "                f\"{prompt_variables}, but got {memory_keys} as inputs from \"\n",
+      "                f\"memory, and {input_key} as the normal input key.\"\n",
+      "            )\n",
+      "        return values\n",
+      "\n",
+      "---\n",
+      "\n",
+      "callbacks: Callbacks = None,\n",
+      "        **kwargs: Any,\n",
+      "    ) -> BaseConversationalRetrievalChain:\n",
+      "        \"\"\"Load chain from LLM.\"\"\"\n",
+      "        combine_docs_chain_kwargs = combine_docs_chain_kwargs or {}\n",
+      "        doc_chain = load_qa_chain(\n",
+      "            llm,\n",
+      "            chain_type=chain_type,\n",
+      "            callbacks=callbacks,\n",
+      "            **combine_docs_chain_kwargs,\n",
+      "        )\n",
+      "        condense_question_chain = LLMChain(\n",
+      "            llm=llm, prompt=condense_question_prompt, callbacks=callbacks\n",
+      "        )\n",
+      "        return cls(\n",
+      "            vectorstore=vectorstore,\n",
+      "            combine_docs_chain=doc_chain,\n",
+      "            question_generator=condense_question_chain,\n",
+      "            callbacks=callbacks,\n",
+      "            **kwargs,\n",
+      "        )\n",
+      "\n",
+      "-----\n",
+      "\n",
+      "how do I use the LLMChain in LangChain?\n"
+     ],
+     "name": "stdout"
+    }
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "sihH_GMiV5_p"
+   },
+   "source": [
+    "Now we ask the question:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {
+    "id": "IThBqBi8V70d"
+   },
+   "source": [
+    "# system message to 'prime' the model\n",
+    "primer = \"\"\"You are Q&A bot. A highly intelligent system that answers\n",
+    "user questions based on the information provided by the user above\n",
+    "each question. If the information can not be found in the information\n",
+    "provided by the user you truthfully say \"I don't know\".\n",
+    "\"\"\"\n",
+    "\n",
+    "res = openai.ChatCompletion.create(\n",
+    "    model=\"gpt-4\",\n",
+    "    messages=[\n",
+    "        {\"role\": \"system\", \"content\": primer},\n",
+    "        {\"role\": \"user\", \"content\": augmented_query},\n",
+    "    ],\n",
+    ")"
+   ],
+   "execution_count": 28,
+   "outputs": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "QvS1yJhOWpiJ"
+   },
+   "source": [
+    "To display this response nicely, we will display it in markdown."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 465
     },
+    "id": "RDo2qeMHWto1",
+    "outputId": "9a9b677f-9b4f-4f77-822d-80baf75ed04a"
+   },
+   "source": [
+    "display(Markdown(res[\"choices\"][0][\"message\"][\"content\"]))"
+   ],
+   "execution_count": 29,
+   "outputs": [
     {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 46
-        },
-        "id": "vwhaSgdF0ZDX",
-        "outputId": "ce085b0f-e0da-4c00-f3f5-43b49e64568c"
-      },
-      "source": [
-        "res = openai.ChatCompletion.create(\n",
-        "    model=\"gpt-4\",\n",
-        "    messages=[\n",
-        "        {\"role\": \"system\", \"content\": primer},\n",
-        "        {\"role\": \"user\", \"content\": query},\n",
-        "    ],\n",
-        ")\n",
-        "display(Markdown(res[\"choices\"][0][\"message\"][\"content\"]))"
+     "output_type": "display_data",
+     "data": {
+      "text/markdown": [
+       "To use the LLMChain in LangChain, you need to first import the necessary modules and classes. In this example, we will use the OpenAI language model. Follow the steps below:\n",
+       "\n",
+       "1. Import all required modules and classes:\n",
+       "\n",
+       "```python\n",
+       "from langchain import LLMChain, OpenAI, PromptTemplate\n",
+       "```\n",
+       "\n",
+       "2. Define the prompt template you want to use with the language model. For example, if you want to create jokes based on provided adjectives:\n",
+       "\n",
+       "```python\n",
+       "prompt_template = \"Tell me a {adjective} joke\"\n",
+       "```\n",
+       "\n",
+       "3. Create a PromptTemplate object passing the input_variables and template:\n",
+       "\n",
+       "```python\n",
+       "prompt = PromptTemplate(input_variables=[\"adjective\"], template=prompt_template)\n",
+       "```\n",
+       "\n",
+       "4. Instantiate the OpenAI language model:\n",
+       "\n",
+       "```python\n",
+       "llm = OpenAI()\n",
+       "```\n",
+       "\n",
+       "5. Create the LLMChain object using the OpenAI language model and the created prompt:\n",
+       "\n",
+       "```python\n",
+       "llm_chain = LLMChain(llm=llm, prompt=prompt)\n",
+       "```\n",
+       "\n",
+       "Now you can use the `llm_chain` object to generate jokes based on provided adjectives. For example:\n",
+       "\n",
+       "```python\n",
+       "response = llm_chain.run(adjective=\"funny\")\n",
+       "print(response)\n",
+       "```\n",
+       "\n",
+       "This will generate and print a funny joke based on the predefined prompt template. Replace `\"funny\"` with any other adjective to get a different result."
       ],
-      "execution_count": 30,
-      "outputs": [
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/markdown": [
-              "I don't know."
-            ],
-            "text/plain": [
-              "<IPython.core.display.Markdown object>"
-            ]
-          }
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "5CSsA-dW0m_P"
-      },
-      "source": [
-        "If we drop the `\"I don't know\"` part of the `primer`?"
+      "text/plain": [
+       "<IPython.core.display.Markdown object>"
       ]
+     },
+     "metadata": {}
+    }
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "eJ-a8MHg0eYQ"
+   },
+   "source": [
+    "Let's compare this to a non-augmented query..."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 46
     },
+    "id": "vwhaSgdF0ZDX",
+    "outputId": "ce085b0f-e0da-4c00-f3f5-43b49e64568c"
+   },
+   "source": [
+    "res = openai.ChatCompletion.create(\n",
+    "    model=\"gpt-4\",\n",
+    "    messages=[\n",
+    "        {\"role\": \"system\", \"content\": primer},\n",
+    "        {\"role\": \"user\", \"content\": query},\n",
+    "    ],\n",
+    ")\n",
+    "display(Markdown(res[\"choices\"][0][\"message\"][\"content\"]))"
+   ],
+   "execution_count": 30,
+   "outputs": [
     {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 371
-        },
-        "id": "Z3svdTCZ0iJ2",
-        "outputId": "19673965-a2f8-45be-b82a-6e491aa88416"
-      },
-      "source": [
-        "res = openai.ChatCompletion.create(\n",
-        "    model=\"gpt-4\",\n",
-        "    messages=[\n",
-        "        {\n",
-        "            \"role\": \"system\",\n",
-        "            \"content\": \"You are Q&A bot. A highly intelligent system that answers user questions\",\n",
-        "        },\n",
-        "        {\"role\": \"user\", \"content\": query},\n",
-        "    ],\n",
-        ")\n",
-        "display(Markdown(res[\"choices\"][0][\"message\"][\"content\"]))"
+     "output_type": "display_data",
+     "data": {
+      "text/markdown": [
+       "I don't know."
       ],
-      "execution_count": 31,
-      "outputs": [
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/markdown": [
-              "LLMChain, which stands for LangChain's Language Model Chain, is a feature within the LangChain ecosystem that allows connecting multiple language models to achieve more accurate translations and processing of natural language data.\n",
-              "\n",
-              "To use the LLMChain in LangChain, follow these steps:\n",
-              "\n",
-              "1. Sign up or log in: If you don't have an account with LangChain, sign up or log in to your existing account.\n",
-              "\n",
-              "2. Configure the LLMChain: Navigate to the LLMChain settings or configuration page (it may be under \"Settings\" or \"LLMChain Configuration\"). Here, you'll add, remove, or re-order language models in your chain.\n",
-              "\n",
-              "3. Add language models: Choose from the available language models and add them to your chain. Typically, language models are selected based on their performance or scope for specific language pairs or types of text.\n",
-              "\n",
-              "4. Set the order of language models: Arrange the order of the language models in your chain based on your preferences or needs. The LLMChain will process the input text in the order you've set, starting from the first model, and pass the output to the subsequent models in the chain.\n",
-              "\n",
-              "5. Test the LLMChain: Once you have configured your LLMChain, test it by inputting text and reviewing the generated translations or processed output. This step will allow you to fine-tune the chain to ensure optimal performance.\n",
-              "\n",
-              "6. Use the LLMChain in your translation projects or language processing tasks: With your LLMChain set up and tested, you can now use it for your translation or language processing needs.\n",
-              "\n",
-              "Remember that the LLMChain is part of the LangChain ecosystem, so any changes or modifications to it may require some knowledge of the platform and its interface. If needed, consult the official documentation or seek support from the community to ensure a seamless experience."
-            ],
-            "text/plain": [
-              "<IPython.core.display.Markdown object>"
-            ]
-          }
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "GcGon5672lBb"
-      },
-      "source": [
-        "Then we see something even worse than `\"I don't know\"` — hallucinations. Clearly augmenting our queries with additional context can make a huge difference to the performance of our system.\n",
-        "\n",
-        "Great, we've seen how to augment GPT-4 with semantic search to allow us to answer LangChain specific queries.\n",
-        "\n",
-        "Once you're finished, we delete the index to save resources."
+      "text/plain": [
+       "<IPython.core.display.Markdown object>"
       ]
+     },
+     "metadata": {}
+    }
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "5CSsA-dW0m_P"
+   },
+   "source": [
+    "If we drop the `\"I don't know\"` part of the `primer`?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 371
     },
+    "id": "Z3svdTCZ0iJ2",
+    "outputId": "19673965-a2f8-45be-b82a-6e491aa88416"
+   },
+   "source": [
+    "res = openai.ChatCompletion.create(\n",
+    "    model=\"gpt-4\",\n",
+    "    messages=[\n",
+    "        {\n",
+    "            \"role\": \"system\",\n",
+    "            \"content\": \"You are Q&A bot. A highly intelligent system that answers user questions\",\n",
+    "        },\n",
+    "        {\"role\": \"user\", \"content\": query},\n",
+    "    ],\n",
+    ")\n",
+    "display(Markdown(res[\"choices\"][0][\"message\"][\"content\"]))"
+   ],
+   "execution_count": 31,
+   "outputs": [
     {
-      "cell_type": "code",
-      "metadata": {
-        "id": "Ah_vfEHV2khx"
-      },
-      "source": [
-        "pc.delete_index(index_name)"
+     "output_type": "display_data",
+     "data": {
+      "text/markdown": [
+       "LLMChain, which stands for LangChain's Language Model Chain, is a feature within the LangChain ecosystem that allows connecting multiple language models to achieve more accurate translations and processing of natural language data.\n",
+       "\n",
+       "To use the LLMChain in LangChain, follow these steps:\n",
+       "\n",
+       "1. Sign up or log in: If you don't have an account with LangChain, sign up or log in to your existing account.\n",
+       "\n",
+       "2. Configure the LLMChain: Navigate to the LLMChain settings or configuration page (it may be under \"Settings\" or \"LLMChain Configuration\"). Here, you'll add, remove, or re-order language models in your chain.\n",
+       "\n",
+       "3. Add language models: Choose from the available language models and add them to your chain. Typically, language models are selected based on their performance or scope for specific language pairs or types of text.\n",
+       "\n",
+       "4. Set the order of language models: Arrange the order of the language models in your chain based on your preferences or needs. The LLMChain will process the input text in the order you've set, starting from the first model, and pass the output to the subsequent models in the chain.\n",
+       "\n",
+       "5. Test the LLMChain: Once you have configured your LLMChain, test it by inputting text and reviewing the generated translations or processed output. This step will allow you to fine-tune the chain to ensure optimal performance.\n",
+       "\n",
+       "6. Use the LLMChain in your translation projects or language processing tasks: With your LLMChain set up and tested, you can now use it for your translation or language processing needs.\n",
+       "\n",
+       "Remember that the LLMChain is part of the LangChain ecosystem, so any changes or modifications to it may require some knowledge of the platform and its interface. If needed, consult the official documentation or seek support from the community to ensure a seamless experience."
       ],
-      "execution_count": 32,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "iEUMlO8M2h4Y"
-      },
-      "source": [
-        "---"
+      "text/plain": [
+       "<IPython.core.display.Markdown object>"
       ]
+     },
+     "metadata": {}
     }
-  ],
-  "metadata": {
-    "colab": {
-      "provenance": []
-    },
-    "gpuClass": "standard",
-    "kernelspec": {
-      "display_name": "Python 3",
-      "name": "python3"
-    },
-    "language_info": {
-      "name": "python"
-    }
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "GcGon5672lBb"
+   },
+   "source": [
+    "Then we see something even worse than `\"I don't know\"` \u2014 hallucinations. Clearly augmenting our queries with additional context can make a huge difference to the performance of our system.\n",
+    "\n",
+    "Great, we've seen how to augment GPT-4 with semantic search to allow us to answer LangChain specific queries.\n",
+    "\n",
+    "Once you're finished, we delete the index to save resources."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {
+    "id": "Ah_vfEHV2khx"
+   },
+   "source": [
+    "pc.delete_index(index_name)"
+   ],
+   "execution_count": 32,
+   "outputs": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "iEUMlO8M2h4Y"
+   },
+   "source": [
+    "---"
+   ]
+  }
+ ],
+ "metadata": {
+  "colab": {
+   "provenance": []
+  },
+  "gpuClass": "standard",
+  "kernelspec": {
+   "display_name": "Python 3",
+   "name": "python3"
   },
-  "nbformat": 4,
-  "nbformat_minor": 0
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
 }
\ No newline at end of file

From 79814bbcba9790249dd8b52a471cb032f1d2a946 Mon Sep 17 00:00:00 2001
From: Jen Hamon <jhamon@pinecone.io>
Date: Thu, 29 Jan 2026 07:59:01 -0500
Subject: [PATCH 5/8] fix: merge imports with pip install and update broken
 link

- Combine pip install and imports into first code cell per guidelines
- Remove empty imports cell
- Update broken docs.pinecone.io/docs/projects link
- Fix notebook output metadata
---
 .../openai/gpt-4-langchain-docs.ipynb         | 22 ++++++++++---------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/learn/generation/openai/gpt-4-langchain-docs.ipynb b/learn/generation/openai/gpt-4-langchain-docs.ipynb
index f5ea170f..70fd5312 100644
--- a/learn/generation/openai/gpt-4-langchain-docs.ipynb
+++ b/learn/generation/openai/gpt-4-langchain-docs.ipynb
@@ -26,15 +26,8 @@
     "  openai==0.27.7 \\\n",
     "  langchain==0.0.179 \\\n",
     "  pinecone \\\n",
-    "  datasets==2.13.1"
-   ],
-   "execution_count": 1,
-   "outputs": []
-  },
-  {
-   "cell_type": "code",
-   "metadata": {},
-   "source": [
+    "  datasets==2.13.1\n",
+    "\n",
     "import os\n",
     "import time\n",
     "\n",
@@ -46,6 +39,15 @@
     "from pinecone import Pinecone, ServerlessSpec\n",
     "from tqdm.auto import tqdm"
    ],
+   "execution_count": 1,
+   "outputs": []
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    ""
+   ],
    "execution_count": null,
    "outputs": []
   },
@@ -1144,7 +1146,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Now we setup our index specification, this allows us to define the cloud provider and region where we want to deploy our index. You can find a list of all [available providers and regions here](https://docs.pinecone.io/docs/projects)."
+    "Now we setup our index specification, this allows us to define the cloud provider and region where we want to deploy our index. You can find a list of all [available providers and regions here](https://docs.pinecone.io/troubleshooting/available-cloud-regions)."
    ]
   },
   {

From 9024c650a8fca14492a75065aee24229a02a54ae Mon Sep 17 00:00:00 2001
From: Jen Hamon <jhamon@pinecone.io>
Date: Thu, 29 Jan 2026 08:03:34 -0500
Subject: [PATCH 6/8] fix: restructure notebook to pass all CI checks

- Move imports to first code cell, pip install to second code cell
- Format pip install on single line to avoid check-pinning false positive
- Pin pinecone==7.0.0
- Fix documentation link to correct cloud regions page
- Fix notebook output metadata
---
 .../openai/gpt-4-langchain-docs.ipynb          | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/learn/generation/openai/gpt-4-langchain-docs.ipynb b/learn/generation/openai/gpt-4-langchain-docs.ipynb
index 70fd5312..e1a302c5 100644
--- a/learn/generation/openai/gpt-4-langchain-docs.ipynb
+++ b/learn/generation/openai/gpt-4-langchain-docs.ipynb
@@ -21,13 +21,6 @@
     "id": "_HDKlQO5svqI"
    },
    "source": [
-    "!pip install -qU \\\n",
-    "  tiktoken==0.4.0 \\\n",
-    "  openai==0.27.7 \\\n",
-    "  langchain==0.0.179 \\\n",
-    "  pinecone \\\n",
-    "  datasets==2.13.1\n",
-    "\n",
     "import os\n",
     "import time\n",
     "\n",
@@ -46,11 +39,18 @@
    "cell_type": "code",
    "metadata": {},
    "source": [
-    ""
+    "!pip install -qU tiktoken==0.4.0 openai==0.27.7 langchain==0.0.179 pinecone==7.0.0 datasets==2.13.1"
    ],
    "execution_count": null,
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [],
+   "execution_count": null,
+   "outputs": []
+  },
   {
    "cell_type": "markdown",
    "metadata": {
@@ -1146,7 +1146,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Now we setup our index specification, this allows us to define the cloud provider and region where we want to deploy our index. You can find a list of all [available providers and regions here](https://docs.pinecone.io/troubleshooting/available-cloud-regions)."
+    "Now we setup our index specification, this allows us to define the cloud provider and region where we want to deploy our index. You can find a list of all [available providers and regions here](https://docs.pinecone.io/guides/index-data/create-an-index#cloud-regions)."
    ]
   },
   {

From f7bcf20dfcab7c97be0170f4b379a4267f94599b Mon Sep 17 00:00:00 2001
From: Jen Hamon <jhamon@pinecone.io>
Date: Thu, 29 Jan 2026 08:05:10 -0500
Subject: [PATCH 7/8] fix: remove empty code cell

Address Bugbot review feedback by removing leftover empty code cell.
---
 learn/generation/openai/gpt-4-langchain-docs.ipynb | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/learn/generation/openai/gpt-4-langchain-docs.ipynb b/learn/generation/openai/gpt-4-langchain-docs.ipynb
index e1a302c5..bbe9ada0 100644
--- a/learn/generation/openai/gpt-4-langchain-docs.ipynb
+++ b/learn/generation/openai/gpt-4-langchain-docs.ipynb
@@ -44,13 +44,6 @@
    "execution_count": null,
    "outputs": []
   },
-  {
-   "cell_type": "code",
-   "metadata": {},
-   "source": [],
-   "execution_count": null,
-   "outputs": []
-  },
   {
    "cell_type": "markdown",
    "metadata": {

From 5444ff91e11c5d213dc0b985eeeb7f385da37cf8 Mon Sep 17 00:00:00 2001
From: Jen Hamon <jhamon@pinecone.io>
Date: Thu, 29 Jan 2026 11:26:36 -0500
Subject: [PATCH 8/8] fix(docs): run pip install before imports in
 gpt-4-langchain-docs.ipynb

Addresses Bugbot feedback: move pip install cell before the imports cell
so the notebook runs correctly in a fresh Colab environment.
---
 .../openai/gpt-4-langchain-docs.ipynb          | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/learn/generation/openai/gpt-4-langchain-docs.ipynb b/learn/generation/openai/gpt-4-langchain-docs.ipynb
index bbe9ada0..ab7472dd 100644
--- a/learn/generation/openai/gpt-4-langchain-docs.ipynb
+++ b/learn/generation/openai/gpt-4-langchain-docs.ipynb
@@ -15,6 +15,15 @@
     "In this notebook we'll work through an example of using GPT-4 with retrieval augmentation to answer questions about the LangChain Python library."
    ]
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "!pip install -qU tiktoken==0.4.0 openai==0.27.7 langchain==0.0.179 pinecone==7.0.0 datasets==2.13.1"
+   ],
+   "execution_count": null,
+   "outputs": []
+  },
   {
    "cell_type": "code",
    "metadata": {
@@ -32,15 +41,6 @@
     "from pinecone import Pinecone, ServerlessSpec\n",
     "from tqdm.auto import tqdm"
    ],
-   "execution_count": 1,
-   "outputs": []
-  },
-  {
-   "cell_type": "code",
-   "metadata": {},
-   "source": [
-    "!pip install -qU tiktoken==0.4.0 openai==0.27.7 langchain==0.0.179 pinecone==7.0.0 datasets==2.13.1"
-   ],
    "execution_count": null,
    "outputs": []
   },