pinecone-io · jhamon · Jan 29, 2026 · Jan 29, 2026
diff --git a/docs/pinecone-reranker.ipynb b/docs/pinecone-reranker.ipynb
@@ -54,7 +54,7 @@
     "10. **Rerank Results**\n",
     "\n",
     "\n",
-    "The main dataset we will be using consists of randomly generated doctor’s notes sample data. The original JSON data has been embedded into vectors, which we will load into Pinecone.\n"
+    "The main dataset we will be using consists of randomly generated doctor\u2019s notes sample data. The original JSON data has been embedded into vectors, which we will load into Pinecone.\n"
    ]
   },
   {
@@ -66,8 +66,8 @@
    },
    "outputs": [
     {
-     "name": "stdout",
      "output_type": "stream",
+     "name": "stdout",
      "text": [
       "Requirement already satisfied: pinecone==6.0.1 in /opt/conda/lib/python3.12/site-packages (6.0.1)\n",
       "Requirement already satisfied: certifi>=2019.11.17 in /opt/conda/lib/python3.12/site-packages (from pinecone==6.0.1) (2025.1.31)\n",
@@ -82,8 +82,7 @@
    ],
    "source": [
     "# Installation\n",
-    "!pip install -U pinecone==6.0.1\n",
-    "!pip install -U pinecone-notebooks"
+    "!pip install -qU pinecone==8.0.0 pinecone-notebooks"
    ]
   },
   {
@@ -99,6 +98,7 @@
     "\n",
     "if not os.environ.get(\"PINECONE_API_KEY\"):\n",
     "    from pinecone_notebooks.colab import Authenticate\n",
+    "\n",
     "    Authenticate()"
    ]
   },
@@ -111,8 +111,8 @@
    },
    "outputs": [
     {
-     "name": "stderr",
      "output_type": "stream",
+     "name": "stdout",
      "text": [
       "/opt/conda/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
       "  from .autonotebook import tqdm as notebook_tqdm\n"
@@ -144,7 +144,7 @@
     "    \"Apple is known for its innovative products like the iPhone.\",\n",
     "    \"Many people enjoy eating apples as a healthy snack.\",\n",
     "    \"Apple Inc. has revolutionized the tech industry with its sleek designs and user-friendly interfaces.\",\n",
-    "    \"An apple a day keeps the doctor away, as the saying goes.\"\n",
+    "    \"An apple a day keeps the doctor away, as the saying goes.\",\n",
     "]"
    ]
   },
@@ -166,9 +166,9 @@
        " 'An apple a day keeps the doctor away, as the saying goes.']"
       ]
      },
-     "execution_count": 5,
+     "output_type": "execute_result",
      "metadata": {},
-     "output_type": "execute_result"
+     "execution_count": 5
     }
    ],
    "source": [
@@ -192,7 +192,7 @@
     "    query=query,\n",
     "    documents=documents,\n",
     "    top_n=3,\n",
-    "    return_documents=True\n",
+    "    return_documents=True,\n",
     ")"
    ]
   },
@@ -205,8 +205,8 @@
    },
    "outputs": [
     {
-     "name": "stdout",
      "output_type": "stream",
+     "name": "stdout",
      "text": [
       "Query: Tell me about Apple's products\n",
       "Reranked Results:\n",
@@ -225,12 +225,13 @@
    "source": [
     "def show_reranked_results(query, matches):\n",
     "    \"\"\"A utility function to print our reranked results\"\"\"\n",
-    "    print(f'Query: {query}')\n",
-    "    print('Reranked Results:')\n",
+    "    print(f\"Query: {query}\")\n",
+    "    print(\"Reranked Results:\")\n",
     "    for i, match in enumerate(matches):\n",
-    "        print(f'{str(i+1).rjust(4)}. Score: {match.score}')\n",
-    "        print(f'      Document: {match.document.text}')\n",
-    "        print('')\n",
+    "        print(f\"{str(i + 1).rjust(4)}. Score: {match.score}\")\n",
+    "        print(f\"      Document: {match.document.text}\")\n",
+    "        print(\"\")\n",
+    "\n",
     "\n",
     "# Note the reranker ranks Apple the company over apple the fruit based on the context of the query\n",
     "show_reranked_results(query, reranked_results.data)"
@@ -256,8 +257,8 @@
    "metadata": {},
    "outputs": [
     {
-     "name": "stdout",
      "output_type": "stream",
+     "name": "stdout",
      "text": [
       "Requirement already satisfied: pandas in /opt/conda/lib/python3.12/site-packages (2.2.3)\n",
       "Requirement already satisfied: torch in /opt/conda/lib/python3.12/site-packages (2.6.0)\n",
@@ -306,10 +307,11 @@
    "source": [
     "import os\n",
     "import time\n",
+    "\n",
     "import pandas as pd\n",
+    "import torch\n",
     "from pinecone import Pinecone, ServerlessSpec\n",
-    "from transformers import AutoTokenizer, AutoModel\n",
-    "import torch"
+    "from transformers import AutoModel, AutoTokenizer"
    ]
   },
   {
@@ -322,14 +324,14 @@
    "outputs": [],
    "source": [
     "# Get cloud and region settings\n",
-    "cloud = os.getenv('PINECONE_CLOUD', 'aws')\n",
-    "region = os.getenv('PINECONE_REGION', 'us-east-1')\n",
+    "cloud = os.getenv(\"PINECONE_CLOUD\", \"aws\")\n",
+    "region = os.getenv(\"PINECONE_REGION\", \"us-east-1\")\n",
     "\n",
     "# Define serverless specifications\n",
     "spec = ServerlessSpec(cloud=cloud, region=region)\n",
     "\n",
     "# Define index name\n",
-    "index_name = 'pinecone-reranker'"
+    "index_name = \"pinecone-reranker\""
    ]
   },
   {
@@ -364,22 +366,17 @@
        "}"
       ]
      },
-     "execution_count": 11,
+     "output_type": "execute_result",
      "metadata": {},
-     "output_type": "execute_result"
+     "execution_count": 11
     }
    ],
    "source": [
     "if pc.has_index(name=index_name):\n",
     "    pc.delete_index(name=index_name)\n",
     "\n",
     "# Create a new index\n",
-    "pc.create_index(\n",
-    "    name=index_name, \n",
-    "    dimension=384, \n",
-    "    metric='cosine', \n",
-    "    spec=spec\n",
-    ")"
+    "pc.create_index(name=index_name, dimension=384, metric=\"cosine\", spec=spec)"
    ]
   },
   {
@@ -477,29 +474,30 @@
        "4  {'referral': 'dermatology', 'condition': 'susp...  "
       ]
      },
-     "execution_count": 13,
+     "output_type": "execute_result",
      "metadata": {},
-     "output_type": "execute_result"
+     "execution_count": 13
     }
    ],
    "source": [
-    "import requests\n",
     "import tempfile\n",
     "\n",
+    "import requests\n",
+    "\n",
     "with tempfile.TemporaryDirectory() as tmpdirname:\n",
     "    # Construct the full path for the file within the temporary directory.\n",
     "    file_path = os.path.join(tmpdirname, \"sample_notes_data.jsonl\")\n",
-    "    \n",
+    "\n",
     "    # Download the file from github\n",
     "    url = \"https://raw.githubusercontent.com/pinecone-io/examples/refs/heads/master/docs/data/sample_notes_data.jsonl\"\n",
     "    response = requests.get(url)\n",
-    "    response.raise_for_status() # Raise an exception for any HTTP errors.\n",
-    "    \n",
+    "    response.raise_for_status()  # Raise an exception for any HTTP errors.\n",
+    "\n",
     "    # Write the file content to the temporary directory.\n",
     "    with open(file_path, \"wb\") as f:\n",
     "        f.write(response.content)\n",
     "\n",
-    "    df = pd.read_json(file_path, orient='records', lines=True)\n",
+    "    df = pd.read_json(file_path, orient=\"records\", lines=True)\n",
     "\n",
     "# Show head of the DataFrame\n",
     "df.head()"
@@ -524,10 +522,10 @@
    },
    "outputs": [
     {
-     "name": "stderr",
      "output_type": "stream",
+     "name": "stdout",
      "text": [
-      "sending upsert requests: 100%|██████████| 100/100 [00:00<00:00, 200.29it/s]\n"
+      "sending upsert requests: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 100/100 [00:00<00:00, 200.29it/s]\n"
      ]
     },
     {
@@ -536,9 +534,9 @@
        "{'upserted_count': 100}"
       ]
      },
-     "execution_count": 14,
+     "output_type": "execute_result",
      "metadata": {},
-     "output_type": "execute_result"
+     "execution_count": 14
     }
    ],
    "source": [
@@ -558,8 +556,8 @@
    },
    "outputs": [
     {
-     "name": "stdout",
      "output_type": "stream",
+     "name": "stdout",
      "text": [
       "Vector count:  0\n",
       "Vector count:  0\n",
@@ -584,20 +582,19 @@
        " 'vector_type': 'dense'}"
       ]
      },
-     "execution_count": 15,
+     "output_type": "execute_result",
      "metadata": {},
-     "output_type": "execute_result"
+     "execution_count": 15
     }
    ],
    "source": [
-    "import time\n",
-    "\n",
     "def is_fresh(index):\n",
     "    stats = index.describe_index_stats()\n",
     "    vector_count = stats.total_vector_count\n",
-    "    print(f\"Vector count: \", vector_count)\n",
+    "    print(\"Vector count: \", vector_count)\n",
     "    return vector_count > 0\n",
     "\n",
+    "\n",
     "while not is_fresh(index):\n",
     "    # It takes a few moments for vectors we just upserted\n",
     "    # to become available for querying\n",
@@ -627,11 +624,13 @@
    "outputs": [],
    "source": [
     "def get_embedding(input_question):\n",
-    "    model_name = 'sentence-transformers/all-MiniLM-L6-v2' # HuggingFace Model\n",
+    "    model_name = \"sentence-transformers/all-MiniLM-L6-v2\"  # HuggingFace Model\n",
     "    tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
     "    model = AutoModel.from_pretrained(model_name)\n",
     "\n",
-    "    encoded_input = tokenizer(input_question, padding=True, truncation=True, return_tensors='pt')\n",
+    "    encoded_input = tokenizer(\n",
+    "        input_question, padding=True, truncation=True, return_tensors=\"pt\"\n",
+    "    )\n",
     "\n",
     "    with torch.no_grad():\n",
     "        model_output = model(**encoded_input)\n",
@@ -659,8 +658,8 @@
    },
    "outputs": [
     {
-     "name": "stderr",
      "output_type": "stream",
+     "name": "stdout",
      "text": [
       "/pytorch/third_party/ideep/mkl-dnn/src/cpu/aarch64/xbyak_aarch64/src/util_impl_linux.h, 451: Can't read MIDR_EL1 sysfs entry\n"
      ]
@@ -675,7 +674,7 @@
     "results = index.query(vector=[query], top_k=10, include_metadata=True)\n",
     "\n",
     "# Sort results by score in descending order\n",
-    "sorted_matches = sorted(results['matches'], key=lambda x: x['score'], reverse=True)"
+    "sorted_matches = sorted(results[\"matches\"], key=lambda x: x[\"score\"], reverse=True)"
    ]
   },
   {
@@ -699,13 +698,13 @@
    "source": [
     "def show_results(question, matches):\n",
     "    \"\"\"A utility function to print our results\"\"\"\n",
-    "    print(f'Question: \\'{question}\\'')\n",
-    "    print('\\nResults:')\n",
+    "    print(f\"Question: '{question}'\")\n",
+    "    print(\"\\nResults:\")\n",
     "    for i, match in enumerate(matches):\n",
-    "        print(f'{str(i+1).rjust(4)}. ID: {match[\"id\"]}')\n",
-    "        print(f'      Score: {match[\"score\"]}')\n",
-    "        print(f'      Metadata: {match[\"metadata\"]}')\n",
-    "        print('')"
+    "        print(f\"{str(i + 1).rjust(4)}. ID: {match['id']}\")\n",
+    "        print(f\"      Score: {match['score']}\")\n",
+    "        print(f\"      Metadata: {match['metadata']}\")\n",
+    "        print(\"\")"
    ]
   },
   {
@@ -715,8 +714,8 @@
    "metadata": {},
    "outputs": [
     {
-     "name": "stdout",
      "output_type": "stream",
+     "name": "stdout",
      "text": [
       "Question: 'what if my patient has leg pain'\n",
       "\n",
@@ -790,10 +789,12 @@
     "# Create documents with concatenated metadata field as \"reranking_field\" field\n",
     "transformed_documents = [\n",
     "    {\n",
-    "        'id': match['id'],\n",
-    "        'reranking_field': '; '.join([f\"{key}: {value}\" for key, value in match['metadata'].items()])\n",
+    "        \"id\": match[\"id\"],\n",
+    "        \"reranking_field\": \"; \".join(\n",
+    "            [f\"{key}: {value}\" for key, value in match[\"metadata\"].items()]\n",
+    "        ),\n",
     "    }\n",
-    "    for match in results['matches']\n",
+    "    for match in results[\"matches\"]\n",
     "]"
    ]
   },
@@ -829,8 +830,8 @@
    },
    "outputs": [
     {
-     "name": "stdout",
      "output_type": "stream",
+     "name": "stdout",
      "text": [
       "Question: 'what if my patient had knee surgery'\n",
       "\n",
@@ -849,13 +850,14 @@
    "source": [
     "def show_reranked_results(question, matches):\n",
     "    \"\"\"A utility function to print our reranked results\"\"\"\n",
-    "    print(f'Question: \\'{question}\\'')\n",
-    "    print('\\nReranked Results:')\n",
+    "    print(f\"Question: '{question}'\")\n",
+    "    print(\"\\nReranked Results:\")\n",
     "    for i, match in enumerate(matches):\n",
-    "        print(f'{str(i+1).rjust(4)}. ID: {match.document.id}')\n",
-    "        print(f'      Score: {match.score}')\n",
-    "        print(f'      Reranking Field: {match.document.reranking_field}')\n",
-    "        print('')\n",
+    "        print(f\"{str(i + 1).rjust(4)}. ID: {match.document.id}\")\n",
+    "        print(f\"      Score: {match.score}\")\n",
+    "        print(f\"      Reranking Field: {match.document.reranking_field}\")\n",
+    "        print(\"\")\n",
+    "\n",
     "\n",
     "show_reranked_results(query, reranked_results_field.data)"
    ]
@@ -907,4 +909,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 5
-}
+}