From 71c7d2c14cdd0b2484606022a62116a13b1e5831 Mon Sep 17 00:00:00 2001 From: Jen Hamon Date: Sun, 1 Feb 2026 06:21:18 +0000 Subject: [PATCH] feat: modernize ner-powered-search notebook to Pinecone SDK v8 Update the NER-powered semantic search notebook to use modern Pinecone SDK v8: - Update pip install from pinecone-client==3.1.0 to pinecone package - Update Pinecone imports and initialization to v8 patterns - Implement standard API key handling with getpass fallback - Add Prerequisites section listing required packages and API keys - Remove cell outputs per repository standards This change aligns the notebook with current SDK best practices and ensures compatibility with Pinecone SDK v8. Refs: SDK-161 Co-Authored-By: Claude Sonnet 4.5 --- .../ner-search/ner-powered-search.ipynb | 2613 ++++++----------- 1 file changed, 917 insertions(+), 1696 deletions(-) diff --git a/learn/search/semantic-search/ner-search/ner-powered-search.ipynb b/learn/search/semantic-search/ner-search/ner-powered-search.ipynb index 423defcf..4b90edf1 100644 --- a/learn/search/semantic-search/ner-search/ner-powered-search.ipynb +++ b/learn/search/semantic-search/ner-search/ner-powered-search.ipynb @@ -1,1699 +1,920 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "63tq0KVRnucw" - }, - "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pinecone-io/examples/blob/master/learn/search/semantic-search/ner-search/ner-powered-search.ipynb) [![Open nbviewer](https://raw.githubusercontent.com/pinecone-io/examples/master/assets/nbviewer-shield.svg)](https://nbviewer.org/github/pinecone-io/examples/blob/master/learn/search/semantic-search/ner-search/ner-powered-search.ipynb)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "zMAEGl3xfu6u" - }, - "source": [ - "# NER Powered Semantic Search" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "jRtbBZg0NUiy" - }, - "source": [ - "This notebook shows how to use Named Entity Recognition (NER) for hybrid metadata + vector search with Pinecone. We will:\n", - "\n", - "1. Extract named entities from text.\n", - "2. Store them in a Pinecone index as metadata (alongside respective text vectors).\n", - "3. We extract named entities from incoming queries and use them to filter and search only through records containing these named entities.\n", - "\n", - "This is particularly helpful if you want to restrict the search score to records that contain information about the named entities that are also found within the query.\n", - "\n", - "Let's get started." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "_iEQFegogJ7v" - }, - "source": [ - "# Install Dependencies" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "kj2fUHdd_wzL", - "outputId": "10d4ad4d-a1a9-45d3-8793-6ffa7814ed66" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[2K \u001b[90m\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u001b[0m \u001b[32m86.0/86.0 kB\u001b[0m \u001b[31m1.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "\u001b[2K \u001b[90m\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u001b[0m \u001b[32m179.1/179.1 kB\u001b[0m \u001b[31m9.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u001b[0m \u001b[32m519.3/519.3 kB\u001b[0m \u001b[31m32.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u001b[0m \u001b[32m7.4/7.4 MB\u001b[0m \u001b[31m96.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m65.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u001b[0m \u001b[32m268.8/268.8 kB\u001b[0m \u001b[31m27.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u001b[0m \u001b[32m60.0/60.0 kB\u001b[0m \u001b[31m7.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u001b[0m \u001b[32m300.4/300.4 kB\u001b[0m \u001b[31m34.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m13.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m22.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m17.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m81.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m64.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h Building wheel for sentence_transformers (setup.py) ... \u001b[?25l\u001b[?25hdone\n" - ] - } - ], - "source": [ - "!pip install sentence_transformers pinecone-client==3.1.0 datasets -qU" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "0YXIqN6DgPsc" - }, - "source": [ - "# Load and Prepare Dataset" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "hI5m1Qb0QaSF" - }, - "source": [ - "We use a dataset containing ~190K articles scraped from Medium. We select 50K articles from the dataset as indexing all the articles may take some time. This dataset can be loaded from the HuggingFace dataset hub as follows:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 177, - "referenced_widgets": [ - "595a13d607884046acf2a81b0d329510", - "c77f05e54728446a8fd0cd7c97bc8d24", - "ee8fb21671774d549e80adc9836531b6", - "260b4eafd25c4ae889b10c0c6d9cbb1a", - "80b4c8d5ae7441e59f785dd98abe6755", - "b2a5ccd15c0e424d8743b18154a4f33a", - "017a6dc721914357a739ad27277752e1", - "0e6bc2b9643945e5a2bed8486016ac63", - "ba212b42c31c4970853b9e15ea42e413", - "543dab16feec4ac39593ba9b8f946e26", - "9ed62ad19d274bd38d38a27440953517", - "0210760e95f04099990c786d9a0d39c8", - "a98286dfd29a4dae93440e7f8dae0966", - "757584ad98ec481c9fd5da905c25110f", - "dfedb97223784997bc62a300b35485db", - "0dc06dbb2765431c92901591f1c43fa1", - "ea09a5537edd4d8ead281fb721090424", - "29897c1ab3924d5888b9fa1dc32c4cc7", - "f136c1b328494e40bc1443cd9a2f2bce", - "00bfafd0ecc348d8bd2696dd90dd1620", - "f5a9a883511547ab81e1ed94450b75ce", - "7251a9ab66b1449698b9ac8820d794a2", - "1debb1769d3944e2aeb3f5ad9b67f413", - "d31f942d08de411ca200fdd648d15047", - "6a7c703eb75e4f8a8958bd020ab4c431", - "cd7316a4e4a7436f933d51b01632ab38", - "505b65039c334d60a236a0d294806b72", - "df7199cb469a4bac957658777a684d5f", - "0c11a6cdb1c2479a85215b22a99cde10", - "94c37c33142949179b3fc6525604bb19", - "9cad585217e74172afdab991e05c3783", - "601e5b52147e4aacbe52a3a2fd4174a5", - "5ccbfef397e148bfbbd51b34445d27c6", - "e66f580d79ac486d94405032dc946330", - "c35674279cc34689824df152a83ea246", - "2782f8409875418898a9460d769a15f6", - "efb998d8c517475e9fb6871c2f2a8907", - "f2a5ba6202654349ae05e2f540abab76", - "5c71483cfe4b448fb8856f4a82c347d7", - "52b9faf839c843f5a6c66ffaccc8b862", - "8527737d18cd4963a5f39a0620fbe585", - "798a25fd2f9e42c6ac030f546a8d8b9f", - "1fdbec4059044816869935b3b9ad443c", - "6d7e26a3ce764ce4bb2b341137a841e1", - "73c95d356a394c01a4ac58af8d4b0e08", - "5a5ea21e6ee046db8bb5ba1b28bd442b", - "29acd0abd9784dc4bc0eb26409054f23", - "64c25e5c3c89452e90049b7822df062d", - "3c8b1993dd6940dfbecf9e9c3212b776", - "19954b32467d494486ba0d8e7aab59c2", - "6762f9595abb4fdc8baf555a275fafcc", - "b341c0b9427d48d3820071b74e946a2e", - "e5a997430510492b8c29a46c4dc51f13", - "87977a8914404096b44df3a303a851fc", - "f52234039aa5453aa282aa52f28d7dc7" - ] - }, - "id": "kj18hV5SgTQ6", - "outputId": "f0675624-8ff7-46e3-ba18-afb7500f87c6" - }, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "595a13d607884046acf2a81b0d329510", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Downloading readme: 0%| | 0.00/2.26k [00:00\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
titletexturlauthorstimestamptags
4172How the Data Stole Christmasby Anonymous\\n\\nThe door sprung open and our t...https://medium.com/data-ops/how-the-data-stole...[]2019-12-24 13:22:33.143000+00:00['Data Science', 'Big Data', 'Dataops', 'Analy...
174868Automating Light Switch using the ESP32 Board ...A story about how I escaped the boring task th...https://python.plainenglish.io/automating-ligh...['Tomas Rasymas']2021-09-14 07:20:52.342000+00:00['Programming', 'Python', 'Software Developmen...
100171Keep Going Quotes Sayings for When Hope is LostIt\u2019s a very thrilling thing to achieve a goal....https://medium.com/@yourselfquotes/keep-going-...['Yourself Quotes']2021-01-05 12:13:04.018000+00:00['Quotes']
141757When Will the Smoke Clear From Bay Area Skies?Bay Area cities are contending with some of th...https://thebolditalic.com/when-will-the-smoke-...['Matt Charnock']2020-09-15 22:38:33.924000+00:00['Bay Area', 'San Francisco', 'California', 'W...
183489The ABC\u2019s of Sustainability\u2026 easy as 1, 2, 3By Julia DiPrete\\n\\n(according to the Jackson ...https://medium.com/sipwines/the-abcs-of-sustai...['Sip Wines']2021-03-02 23:39:49.948000+00:00['Wine Tasting', 'Sustainability', 'Wine']
\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - "
\n", - " \n", - "
\n", - "\n", - "\n", - "\n", - " \n", - "\n", - " \n", - " \n", - "\n", - " \n", - "
\n", - " \n" - ], - "text/plain": [ - " title \\\n", - "4172 How the Data Stole Christmas \n", - "174868 Automating Light Switch using the ESP32 Board ... \n", - "100171 Keep Going Quotes Sayings for When Hope is Lost \n", - "141757 When Will the Smoke Clear From Bay Area Skies? \n", - "183489 The ABC\u2019s of Sustainability\u2026 easy as 1, 2, 3 \n", - "\n", - " text \\\n", - "4172 by Anonymous\\n\\nThe door sprung open and our t... \n", - "174868 A story about how I escaped the boring task th... \n", - "100171 It\u2019s a very thrilling thing to achieve a goal.... \n", - "141757 Bay Area cities are contending with some of th... \n", - "183489 By Julia DiPrete\\n\\n(according to the Jackson ... \n", - "\n", - " url \\\n", - "4172 https://medium.com/data-ops/how-the-data-stole... \n", - "174868 https://python.plainenglish.io/automating-ligh... \n", - "100171 https://medium.com/@yourselfquotes/keep-going-... \n", - "141757 https://thebolditalic.com/when-will-the-smoke-... \n", - "183489 https://medium.com/sipwines/the-abcs-of-sustai... \n", - "\n", - " authors timestamp \\\n", - "4172 [] 2019-12-24 13:22:33.143000+00:00 \n", - "174868 ['Tomas Rasymas'] 2021-09-14 07:20:52.342000+00:00 \n", - "100171 ['Yourself Quotes'] 2021-01-05 12:13:04.018000+00:00 \n", - "141757 ['Matt Charnock'] 2020-09-15 22:38:33.924000+00:00 \n", - "183489 ['Sip Wines'] 2021-03-02 23:39:49.948000+00:00 \n", - "\n", - " tags \n", - "4172 ['Data Science', 'Big Data', 'Dataops', 'Analy... \n", - "174868 ['Programming', 'Python', 'Software Developmen... \n", - "100171 ['Quotes'] \n", - "141757 ['Bay Area', 'San Francisco', 'California', 'W... \n", - "183489 ['Wine Tasting', 'Sustainability', 'Wine'] " - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# drop empty rows and select 20k articles\n", - "df = df.dropna().sample(20000, random_state=32)\n", - "df.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "NY8gj97qm3WU" - }, - "source": [ - "We will use the article title and its text for generating embeddings. For that, we join the article title and the first 1000 characters from the article text." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "id": "Sb2bEL7YOMjr" - }, - "outputs": [], - "source": [ - "# select first 1000 characters\n", - "df[\"text\"] = df[\"text\"].str[:1000]\n", - "# join article title and the text\n", - "df[\"title_text\"] = df[\"title\"] + \". \" + df[\"text\"]" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ciqKCl9gbN7s" - }, - "source": [ - "# Initialize NER Model" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "9pZkI5KIRYkE" - }, - "source": [ - "To extract named entities, we will use a NER model finetuned on a BERT-base model. The model can be loaded from the HuggingFace model hub as follows:" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "id": "kyu5qdDMooua" - }, - "outputs": [], - "source": [ - "import torch\n", - "\n", - "# set device to GPU if available\n", - "device = torch.cuda.current_device() if torch.cuda.is_available() else None" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 281, - "referenced_widgets": [ - "574bebb9c591418da4274cd6dd2adcb2", - "fbfeee08972f40cda6a16793554ae8d6", - "cc24c0737e3d4c90b0273566bb3b1795", - "951c2f577f4e4f1c9b5e570f6eda34db", - "45d5fa1cd27d4348a581b0ca193e2f36", - "c55758a905e3454dbe231beae355b6e9", - "1cb8abc5abc246a3aabfbdb2a5574c07", - "b0aaafae83de455699caa7276ddd429e", - "84ebb0599eb94819a47e91caebc4ca38", - "3d303b0386894f179cbb353f6f35268e", - "7338ef4fbea9494198e0b57475cfe58f", - "57d3f1e6eea04c8cb9cd271917504b3d", - "106e8db09a2a448baf06e090ccae8d9b", - "8c0ba62486b4438abdc72c9e7aaa1f22", - "27dc348a6644416cb8868bfd667fa256", - "7d13274d9479473ebf75d8a78c267151", - "bae741fc67ae4322b5870b8c3a3c3fa4", - "a5a37dac254a42ee9861064df64cac17", - "10a01ab2fec24ccc8fdd6c81484580aa", - "08858dcc6727435c8e8d8a744e4257c3", - "fe13250615aa41f88ce445507113d751", - "aa65005be4a84c53af6cf057e6d2c8e5", - "fce476c57e1244a385e4e343410af236", - "4b40ed2c6902493e991b8f0bd16a836e", - "2c229adc7e3b450d9d3b329920041f01", - "9f9dda84183c4b3397a07980a7e65233", - "1b75d1bdf23a4e07ab81d3c35dda4962", - "e7c68a313682417c8c8178a5cc9f3049", - "344ed337767942c89aa43546ff704f8b", - "5cef876e18544cdbbb3bc321f5d7187f", - "8d2d7bcde12e42d68bcc89a48e1979a2", - "5ea72ae8b2244aa0a2f3f9d22fabd233", - "42048b691ae04ad28d99f65a6ea0caa2", - "376d5dd013324333a7571b27927a8baf", - "ca2e68d4d0844d2c8b089a6cb17fb5eb", - "01d8aaee42c245e7b551f1fdf3789f35", - "0caf0db382e94df7a58d7e642d834591", - "eb72bce763b24f9f9c454661968a75ab", - "525a51e903ff43ea913adb46cf346d77", - "eb9b9e47ab8842d4aa2ca84903ee59dd", - "b9e151d13a2c4bf1b91eef94dabc8ad0", - "2791bdaf4a3141bdabc7676970195103", - "c7d5f4ac71c54fb98e6e19c3ee30ac45", - "3adbfd8d3a4e4bdda4a38942f17df89a", - "b9d79e6546db4094bfddb19d97b1e1b8", - "25effbfad2684166b02c8f0d61860976", - "b53e2930b2e944f4835daa6930d5e7bf", - "a702a11a86df444dba4e7698bbdf619e", - "1377c1884a46444790d1ff31cf8641e7", - "00405c1b9ef8443dafce9a1b5780ecd7", - "4b08df5bb8194ab2942df30df70d019c", - "3e0eb347e0d94790be10cf845b99d86a", - "a041b410b0e149b388fdaaf3ee3b7ced", - "efe1ff06a8ad4980bc9c6238190ca54f", - "c1cf30fe197a4cfe9cf7ca2bfa71fe15", - "3d23564c87ac4c0495a996817f561c06", - "4efc75ab99ae408f94d19caf0a0e7ec6", - "4764b0436cc6486695b272b7ba012664", - "c4ab062cead449a09fecd49ce4a40257", - "029fa360bdfc48a090ba2bafd0bcd2c3", - "b2f5f46384f540f6ba712ac32d69c334", - "221ebdbcaa6f425a8822470047f7e2e4", - "f24a24944b724066b703feba2ffc94bb", - "fd9627f91efc430698a153551f57d605", - "7c386cb2f56f4c30a508b5d60917c2db", - "86f7f14b18c94d218b1e1e5736e49433" - ] - }, - "id": "PY7wu5f3_4GJ", - "outputId": "d430fb94-efd0-4b6d-ca9b-0ca5cb95c2c6" - }, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "574bebb9c591418da4274cd6dd2adcb2", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Downloading (\u2026)okenizer_config.json: 0%| | 0.00/59.0 [00:00