From 1deb069e909d282d7568e5cb0b5a39b73db3e1c1 Mon Sep 17 00:00:00 2001 From: Jen Hamon Date: Thu, 29 Jan 2026 10:10:39 -0500 Subject: [PATCH 1/6] fix(learn): modernize audio-search.ipynb to Pinecone SDK v8 - Replace pinecone-client with pinecone package - Use getpass fallback for API key per notebook review template --- .../audio/audio-search/audio-search.ipynb | 2158 ++++++++--------- 1 file changed, 1078 insertions(+), 1080 deletions(-) diff --git a/learn/search/audio/audio-search/audio-search.ipynb b/learn/search/audio/audio-search/audio-search.ipynb index fe44e65a..3e97c0bf 100644 --- a/learn/search/audio/audio-search/audio-search.ipynb +++ b/learn/search/audio/audio-search/audio-search.ipynb @@ -1,1191 +1,1189 @@ { - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pinecone-io/examples/blob/master/learn/search/audio/audio-search/audio-search.ipynb) [![Open nbviewer](https://raw.githubusercontent.com/pinecone-io/examples/master/assets/nbviewer-shield.svg)](https://nbviewer.org/github/pinecone-io/examples/blob/master/learn/search/audio/audio-search/audio-search.ipynb)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "CLWK0hHDNLty" - }, - "source": [ - "# Audio Similarity Search" - ] + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pinecone-io/examples/blob/master/learn/search/audio/audio-search/audio-search.ipynb) [![Open nbviewer](https://raw.githubusercontent.com/pinecone-io/examples/master/assets/nbviewer-shield.svg)](https://nbviewer.org/github/pinecone-io/examples/blob/master/learn/search/audio/audio-search/audio-search.ipynb)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CLWK0hHDNLty" + }, + "source": [ + "# Audio Similarity Search" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Bz1ITFq_NPxP" + }, + "source": [ + "This notebook shows how to use Pinecone as the vector DB within an audio search application. Audio search can be used to find songs and metadata within a catalog, finding similar sounds in an audio library, or detecting who's speaking in an audio file.\n", + "\n", + "We will index a set of audio recordings as vector embeddings. These vector embeddings are rich, mathematical representations of the audio recordings, making it possible to determine how similar the recordings are to one another. We will then take some new (unseen) audio recording, search through the index to find the most similar matches, and play the returned audio in this notebook." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IjQ-SGWuNP1-" + }, + "source": [ + "# Install Dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "lNPlTs8HGWOh" + }, + "outputs": [], + "source": [ + "!pip install -qU pinecone panns-inference datasets librosa" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "TlUIo29INUhs" + }, + "source": [ + "# Load Dataset" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "520whID8rU8g" + }, + "source": [ + "In this demo, we will use audio from the *ESC-50 dataset* — a labeled collection of 2000 environmental audio recordings, which are 5-second-long each. The dataset can be loaded from the HuggingFace model hub as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "-XE4XUVnJnXE", + "outputId": "db23db43-e7c1-424b-f218-13438419943a" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "Bz1ITFq_NPxP" - }, - "source": [ - "This notebook shows how to use Pinecone as the vector DB within an audio search application. Audio search can be used to find songs and metadata within a catalog, finding similar sounds in an audio library, or detecting who's speaking in an audio file.\n", - "\n", - "We will index a set of audio recordings as vector embeddings. These vector embeddings are rich, mathematical representations of the audio recordings, making it possible to determine how similar the recordings are to one another. We will then take some new (unseen) audio recording, search through the index to find the most similar matches, and play the returned audio in this notebook." - ] + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:datasets.builder:Using custom data configuration ashraq--esc50-60cc11cf57cf8497\n", + "WARNING:datasets.builder:Found cached dataset parquet (/root/.cache/huggingface/datasets/ashraq___parquet/ashraq--esc50-60cc11cf57cf8497/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)\n" + ] }, { - "cell_type": "markdown", - "metadata": { - "id": "IjQ-SGWuNP1-" - }, - "source": [ - "# Install Dependencies" + "data": { + "text/plain": [ + "Dataset({\n", + " features: ['filename', 'fold', 'target', 'category', 'esc10', 'src_file', 'take', 'audio'],\n", + " num_rows: 2000\n", + "})" ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from datasets import load_dataset\n", + "\n", + "# load the dataset from huggingface model hub\n", + "data = load_dataset(\"ashraq/esc50\", split=\"train\")\n", + "data" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QeXLy-dFvIL7" + }, + "source": [ + "The audios in the dataset are sampled at 44100Hz and loaded into NumPy arrays. Let's take a look." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "Hcm6KZDsvFZd", + "outputId": "5856454a-03ed-4f9b-cf6d-cabe40e1f39e" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "lNPlTs8HGWOh" - }, - "outputs": [], - "source": [ - "!pip install -qU pinecone-client==3.1.0 panns-inference datasets librosa" + "data": { + "text/plain": [ + "[{'path': None,\n", + " 'array': array([0., 0., 0., ..., 0., 0., 0.]),\n", + " 'sampling_rate': 44100},\n", + " {'path': None,\n", + " 'array': array([-0.01184082, -0.10336304, -0.14141846, ..., 0.06985474,\n", + " 0.04049683, 0.00274658]),\n", + " 'sampling_rate': 44100},\n", + " {'path': None,\n", + " 'array': array([-0.00695801, -0.01251221, -0.01126099, ..., 0.215271 ,\n", + " -0.00875854, -0.28903198]),\n", + " 'sampling_rate': 44100}]" ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# select the audio feature and display top three\n", + "audios = data[\"audio\"]\n", + "audios[:3]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QX94cPW8AtQl" + }, + "source": [ + "We only need the Numpy arrays as these contain all of the audio data. We will later input these Numpy arrays directly into our embedding model to generate audio embeddings." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "jDrQhpJ5Az0G" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "# select only the audio data from the dataset and store in a numpy array\n", + "audios = np.array([a[\"array\"] for a in data[\"audio\"]])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QAhNFeEWNb9_" + }, + "source": [ + "# Load Audio Embedding Model" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HLI4j7798AT9" + }, + "source": [ + "We will use an audio tagging model trained from *PANNs: Large-Scale Pretrained Audio Neural Networks for Audio Pattern Recognition* paper to generate our audio embeddings. We use the *panns_inference* Python package, which provides an easy interface to load and use the model." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "PMwze3wAI4Vg", + "outputId": "60e87993-20c1-45b3-f1c9-b2d78e9b6d5e" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "TlUIo29INUhs" - }, - "source": [ - "# Load Dataset" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "Checkpoint path: /root/panns_data/Cnn14_mAP=0.431.pth\n", + "GPU number: 1\n" + ] + } + ], + "source": [ + "from panns_inference import AudioTagging\n", + "\n", + "# load the default model into the gpu.\n", + "model = AudioTagging(\n", + " checkpoint_path=None, device=\"cuda\"\n", + ") # change device to cpu if a gpu is not available" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Initializing the Index\n", + "\n", + "Now we need a place to store these embeddings and enable a efficient vector search through them all. To do that we use Pinecone, we can get a [free API key](https://app.pinecone.io/) and enter it below where we will initialize our connection to Pinecone and create a new index." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from getpass import getpass\n", + "\n", + "from pinecone import Pinecone\n", + "\n", + "# initialize connection to pinecone (get API key at app.pinecone.io)\n", + "api_key = os.environ.get(\"PINECONE_API_KEY\") or getpass(\"Enter your Pinecone API key: \")\n", + "\n", + "pc = Pinecone(api_key=api_key)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we setup our index specification, this allows us to define the cloud provider and region where we want to deploy our index. You can find a list of all [available providers and regions here](https://docs.pinecone.io/docs/projects)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pinecone import ServerlessSpec\n", + "\n", + "cloud = os.environ.get(\"PINECONE_CLOUD\") or \"aws\"\n", + "region = os.environ.get(\"PINECONE_REGION\") or \"us-east-1\"\n", + "\n", + "spec = ServerlessSpec(cloud=cloud, region=region)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create the index:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "index_name = \"audio-search-demo\"" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "3b_pV4GGNXJb" + }, + "outputs": [], + "source": [ + "import time\n", + "\n", + "# check if index already exists (it shouldn't if this is first time)\n", + "if index_name not in pc.list_indexes().names():\n", + " # if does not exist, create index\n", + " pc.create_index(index_name, dimension=2048, metric=\"cosine\", spec=spec)\n", + " # wait for index to be initialized\n", + " while not pc.describe_index(index_name).status[\"ready\"]:\n", + " time.sleep(1)\n", + "\n", + "# connect to index\n", + "index = pc.Index(index_name)\n", + "# view index stats\n", + "index.describe_index_stats()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4AhWDbI3NiIi" + }, + "source": [ + "# Generate Embeddings and Upsert\n", + "\n", + "Now we generate the embeddings using the audio embedding model. We must do this in batches as processing all items at once will exhaust machine memory limits and API request limits." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 118, + "referenced_widgets": [ + "6a9f8ff091324f1581297fc28c713047", + "e18fc6710ff341d6b80cb93cd6dc5e19", + "0b927de4df4b4f61bbb10721007fb4f8", + "74ea1cff7a2d44d08328a714e2cc384f", + "5890090cbebe417dbac772b18440eaa5", + "27c31780e6c24813a0450aa33605b81f", + "c9c22815392c47b3860786ad891445ad", + "0f3d7cc7013b4b3aa9ed394876004f7e", + "a0dc1d554f2c43869d3f399501d9c689", + "713536b27c57438c8e4d193fbc5c4f4c", + "8fcd7e3260f2445cbe6d082ed56f0a9d" + ] }, + "id": "l_l0qa-qPCqC", + "outputId": "26c363e0-e480-4fbc-a258-e99ed73b1eab" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "520whID8rU8g" + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "6a9f8ff091324f1581297fc28c713047", + "version_major": 2, + "version_minor": 0 }, - "source": [ - "In this demo, we will use audio from the *ESC-50 dataset* \u2014 a labeled collection of 2000 environmental audio recordings, which are 5-second-long each. The dataset can be loaded from the HuggingFace model hub as follows:" + "text/plain": [ + " 0%| | 0/32 [00:00\n", + " \n", + " Your browser does not support the audio element.\n", + " \n", + " " ], - "source": [ - "# select the audio feature and display top three\n", - "audios = data[\"audio\"]\n", - "audios[:3]" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "QX94cPW8AtQl" - }, - "source": [ - "We only need the Numpy arrays as these contain all of the audio data. We will later input these Numpy arrays directly into our embedding model to generate audio embeddings." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "id": "jDrQhpJ5Az0G" - }, - "outputs": [], - "source": [ - "import numpy as np\n", - "\n", - "# select only the audio data from the dataset and store in a numpy array\n", - "audios = np.array([a[\"array\"] for a in data[\"audio\"]])" + "text/plain": [ + "" ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from IPython.display import Audio, display\n", + "\n", + "# we set an audio number to select from the dataset\n", + "audio_num = 400\n", + "# get the audio data of the audio number\n", + "query_audio = data[audio_num][\"audio\"][\"array\"]\n", + "# get the category of the audio number\n", + "category = data[audio_num][\"category\"]\n", + "# print the category and play the audio\n", + "print(\"Query Audio:\", category)\n", + "Audio(query_audio, rate=44100)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HPxbFSqwFE9R" + }, + "source": [ + "We have got the sound of a car horn. Let's generate an embedding for this sound." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "jD1qPLRjFzG0", + "outputId": "948ac450-28ea-4d8c-c99e-4bbaa5d8ef50" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "QAhNFeEWNb9_" - }, - "source": [ - "# Load Audio Embedding Model" + "data": { + "text/plain": [ + "(1, 2048)" ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# reshape query audio\n", + "query_audio = query_audio[None, :]\n", + "# get the embeddings for the audio from the model\n", + "_, xq = model.inference(query_audio)\n", + "xq.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "42V_53GcGaKB" + }, + "source": [ + "We have now converted the audio into a 2048-dimension vector the same way we did for all the other audio we indexed. Let's use this to query our Pinecone index." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "65-In6BEGUw6", + "outputId": "b2550582-38a7-4067-f1a7-fa9e956b0cab" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "HLI4j7798AT9" - }, - "source": [ - "We will use an audio tagging model trained from *PANNs: Large-Scale Pretrained Audio Neural Networks for Audio Pattern Recognition* paper to generate our audio embeddings. We use the *panns_inference* Python package, which provides an easy interface to load and use the model." + "data": { + "text/plain": [ + "{'matches': [{'id': '400', 'score': 1.0, 'values': []},\n", + " {'id': '1667', 'score': 0.842124522, 'values': []},\n", + " {'id': '1666', 'score': 0.831768811, 'values': []}],\n", + " 'namespace': ''}" ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# query pinecone index with the query audio embeddings\n", + "results = index.query(vector=xq.tolist(), top_k=3)\n", + "results" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "q5zJCjxpITgH" + }, + "source": [ + "Notice that the top result is the audio number 400 from our dataset, which is our query audio (the most similar item should always be the query itself). Let's listen to the top three results. " + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 193 }, + "id": "mVk2BGiVI1_a", + "outputId": "0460c9b4-0496-4029-f724-0a442b3bbb6b" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "PMwze3wAI4Vg", - "outputId": "60e87993-20c1-45b3-f1c9-b2d78e9b6d5e" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Checkpoint path: /root/panns_data/Cnn14_mAP=0.431.pth\n", - "GPU number: 1\n" - ] - } + "data": { + "text/html": [ + "\n", + " \n", + " " ], - "source": [ - "from panns_inference import AudioTagging\n", - "\n", - "# load the default model into the gpu.\n", - "model = AudioTagging(checkpoint_path=None, device='cuda') # change device to cpu if a gpu is not available" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Initializing the Index\n", - "\n", - "Now we need a place to store these embeddings and enable a efficient vector search through them all. To do that we use Pinecone, we can get a [free API key](https://app.pinecone.io/) and enter it below where we will initialize our connection to Pinecone and create a new index." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "from pinecone import Pinecone\n", - "\n", - "# initialize connection to pinecone (get API key at app.pinecone.io)\n", - "api_key = os.environ.get('PINECONE_API_KEY') or 'PINECONE_API_KEY'\n", - "\n", - "# configure client\n", - "pc = Pinecone(api_key=api_key)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now we setup our index specification, this allows us to define the cloud provider and region where we want to deploy our index. You can find a list of all [available providers and regions here](https://docs.pinecone.io/docs/projects)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from pinecone import ServerlessSpec\n", - "\n", - "cloud = os.environ.get('PINECONE_CLOUD') or 'aws'\n", - "region = os.environ.get('PINECONE_REGION') or 'us-east-1'\n", - "\n", - "spec = ServerlessSpec(cloud=cloud, region=region)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Create the index:" + "text/plain": [ + "" ] + }, + "metadata": {}, + "output_type": "display_data" }, { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "index_name = \"audio-search-demo\"" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "id": "3b_pV4GGNXJb" - }, - "outputs": [], - "source": [ - "import time\n", - "\n", - "# check if index already exists (it shouldn't if this is first time)\n", - "if index_name not in pc.list_indexes().names():\n", - " # if does not exist, create index\n", - " pc.create_index(\n", - " index_name,\n", - " dimension=2048,\n", - " metric='cosine',\n", - " spec=spec\n", - " )\n", - " # wait for index to be initialized\n", - " while not pc.describe_index(index_name).status['ready']:\n", - " time.sleep(1)\n", - "\n", - "# connect to index\n", - "index = pc.Index(index_name)\n", - "# view index stats\n", - "index.describe_index_stats()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "4AhWDbI3NiIi" - }, - "source": [ - "# Generate Embeddings and Upsert\n", - "\n", - "Now we generate the embeddings using the audio embedding model. We must do this in batches as processing all items at once will exhaust machine memory limits and API request limits." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 118, - "referenced_widgets": [ - "6a9f8ff091324f1581297fc28c713047", - "e18fc6710ff341d6b80cb93cd6dc5e19", - "0b927de4df4b4f61bbb10721007fb4f8", - "74ea1cff7a2d44d08328a714e2cc384f", - "5890090cbebe417dbac772b18440eaa5", - "27c31780e6c24813a0450aa33605b81f", - "c9c22815392c47b3860786ad891445ad", - "0f3d7cc7013b4b3aa9ed394876004f7e", - "a0dc1d554f2c43869d3f399501d9c689", - "713536b27c57438c8e4d193fbc5c4f4c", - "8fcd7e3260f2445cbe6d082ed56f0a9d" - ] - }, - "id": "l_l0qa-qPCqC", - "outputId": "26c363e0-e480-4fbc-a258-e99ed73b1eab" - }, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "6a9f8ff091324f1581297fc28c713047", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/32 [00:00\n", + " \n", + " Your browser does not support the audio element.\n", + " \n", + " " ], - "source": [ - "from tqdm.auto import tqdm\n", - "\n", - "# we will use batches of 64\n", - "batch_size = 64\n", - "\n", - "for i in tqdm(range(0, len(audios), batch_size)):\n", - " # find end of batch\n", - " i_end = min(i+batch_size, len(audios))\n", - " # extract batch\n", - " batch = audios[i:i_end]\n", - " # generate embeddings for all the audios in the batch\n", - " _, emb = model.inference(batch)\n", - " # create unique IDs\n", - " ids = [f\"{idx}\" for idx in range(i, i_end)]\n", - " # add all to upsert list\n", - " to_upsert = list(zip(ids, emb.tolist()))\n", - " # upsert/insert these records to pinecone\n", - " _ = index.upsert(vectors=to_upsert)\n", - "\n", - "# check that we have all vectors in index\n", - "index.describe_index_stats()" + "text/plain": [ + "" ] + }, + "metadata": {}, + "output_type": "display_data" }, { - "cell_type": "markdown", - "metadata": { - "id": "LG-Wg970DKKh" - }, - "source": [ - "We now have *2000* audio records indexed in Pinecone, we're ready to begin querying." + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# play the top 3 similar audios\n", + "for r in results[\"matches\"]:\n", + " # select the audio data from the databse using the id as an index\n", + " a = data[int(r[\"id\"])][\"audio\"][\"array\"]\n", + " display(Audio(a, rate=44100))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tKo2HJp1JwGW" + }, + "source": [ + "We have great results, everything aligns with what seems to be a busy city street with car horns.\n", + "\n", + "Let's write a helper function to run the queries using audio from our dataset easily. We do not need to embed these audio samples again as we have already, they are just stored in Pinecone. So, we specify the `id` of the query audio to search with and tell Pinecone to search with that." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "id": "Gshw-pX8Wbrn" + }, + "outputs": [], + "source": [ + "def find_similar_audios(id):\n", + " print(\"Query Audio:\")\n", + " # select the audio data from the databse using the id as an index\n", + " query_audio = data[id][\"audio\"][\"array\"]\n", + " # play the query audio\n", + " display(Audio(query_audio, rate=44100))\n", + " # query pinecone index with the query audio id\n", + " result = index.query(id=str(id), top_k=5)\n", + " print(\"Result:\")\n", + " # play the top 5 similar audios\n", + " for r in result[\"matches\"]:\n", + " a = data[int(r[\"id\"])][\"audio\"][\"array\"]\n", + " display(Audio(a, rate=44100))" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 403 }, + "id": "1Pxkj0JnQgt7", + "outputId": "752494e7-9354-4007-d603-22d7119b45d3" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "x3BvGOl6TI_Z" - }, - "source": [ - "# Querying" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "Query Audio:\n" + ] }, { - "cell_type": "markdown", - "metadata": { - "id": "e3UOXzJ-DZvo" - }, - "source": [ - "Let's first listen to an audio from our dataset. We will generate embeddings for the audio and use it to find similar audios from the Pinecone index." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 93 - }, - "id": "bwEETtYuDYEZ", - "outputId": "f556e117-f471-4c4d-bbf7-64428b16bf30" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Query Audio: car_horn\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } + "data": { + "text/html": [ + "\n", + " \n", + " " ], - "source": [ - "from IPython.display import Audio, display\n", - "\n", - "# we set an audio number to select from the dataset\n", - "audio_num = 400\n", - "# get the audio data of the audio number\n", - "query_audio = data[audio_num][\"audio\"][\"array\"]\n", - "# get the category of the audio number\n", - "category = data[audio_num][\"category\"]\n", - "# print the category and play the audio\n", - "print(\"Query Audio:\", category)\n", - "Audio(query_audio, rate=44100)" + "text/plain": [ + "" ] + }, + "metadata": {}, + "output_type": "display_data" }, { - "cell_type": "markdown", - "metadata": { - "id": "HPxbFSqwFE9R" - }, - "source": [ - "We have got the sound of a car horn. Let's generate an embedding for this sound." - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "Result:\n" + ] }, { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "jD1qPLRjFzG0", - "outputId": "948ac450-28ea-4d8c-c99e-4bbaa5d8ef50" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "(1, 2048)" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } + "data": { + "text/html": [ + "\n", + " \n", + " " ], - "source": [ - "# reshape query audio\n", - "query_audio = query_audio[None, :]\n", - "# get the embeddings for the audio from the model\n", - "_, xq = model.inference(query_audio)\n", - "xq.shape" + "text/plain": [ + "" ] + }, + "metadata": {}, + "output_type": "display_data" }, { - "cell_type": "markdown", - "metadata": { - "id": "42V_53GcGaKB" - }, - "source": [ - "We have now converted the audio into a 2048-dimension vector the same way we did for all the other audio we indexed. Let's use this to query our Pinecone index." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "65-In6BEGUw6", - "outputId": "b2550582-38a7-4067-f1a7-fa9e956b0cab" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "{'matches': [{'id': '400', 'score': 1.0, 'values': []},\n", - " {'id': '1667', 'score': 0.842124522, 'values': []},\n", - " {'id': '1666', 'score': 0.831768811, 'values': []}],\n", - " 'namespace': ''}" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } + "data": { + "text/html": [ + "\n", + " \n", + " " ], - "source": [ - "# query pinecone index with the query audio embeddings\n", - "results = index.query(vector=xq.tolist(), top_k=3)\n", - "results" + "text/plain": [ + "" ] + }, + "metadata": {}, + "output_type": "display_data" }, { - "cell_type": "markdown", - "metadata": { - "id": "q5zJCjxpITgH" - }, - "source": [ - "Notice that the top result is the audio number 400 from our dataset, which is our query audio (the most similar item should always be the query itself). Let's listen to the top three results. " + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" ] + }, + "metadata": {}, + "output_type": "display_data" }, { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 193 - }, - "id": "mVk2BGiVI1_a", - "outputId": "0460c9b4-0496-4029-f724-0a442b3bbb6b" - }, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } + "data": { + "text/html": [ + "\n", + " \n", + " " ], - "source": [ - "# play the top 3 similar audios\n", - "for r in results[\"matches\"]:\n", - " # select the audio data from the databse using the id as an index\n", - " a = data[int(r[\"id\"])][\"audio\"][\"array\"]\n", - " display(Audio(a, rate=44100))" + "text/plain": [ + "" ] + }, + "metadata": {}, + "output_type": "display_data" }, { - "cell_type": "markdown", - "metadata": { - "id": "tKo2HJp1JwGW" - }, - "source": [ - "We have great results, everything aligns with what seems to be a busy city street with car horns.\n", - "\n", - "Let's write a helper function to run the queries using audio from our dataset easily. We do not need to embed these audio samples again as we have already, they are just stored in Pinecone. So, we specify the `id` of the query audio to search with and tell Pinecone to search with that." + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "find_similar_audios(1642)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here we return a set of revving motors (they seem to either be vehicles or lawnmowers)." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 403 }, + "id": "jVTpCS-nbCEw", + "outputId": "39d6a32a-ee6e-4d74-c038-b97af0e3829d" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "id": "Gshw-pX8Wbrn" - }, - "outputs": [], - "source": [ - "def find_similar_audios(id):\n", - " print(\"Query Audio:\")\n", - " # select the audio data from the databse using the id as an index\n", - " query_audio = data[id][\"audio\"][\"array\"]\n", - " # play the query audio\n", - " display(Audio(query_audio, rate=44100))\n", - " # query pinecone index with the query audio id\n", - " result = index.query(id=str(id), top_k=5)\n", - " print(\"Result:\")\n", - " # play the top 5 similar audios\n", - " for r in result[\"matches\"]:\n", - " a = data[int(r[\"id\"])][\"audio\"][\"array\"]\n", - " display(Audio(a, rate=44100))" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "Query Audio:\n" + ] }, { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 403 - }, - "id": "1Pxkj0JnQgt7", - "outputId": "752494e7-9354-4007-d603-22d7119b45d3" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Query Audio:\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Result:\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } + "data": { + "text/html": [ + "\n", + " \n", + " " ], - "source": [ - "find_similar_audios(1642)" + "text/plain": [ + "" ] + }, + "metadata": {}, + "output_type": "display_data" }, { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Here we return a set of revving motors (they seem to either be vehicles or lawnmowers)." - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "Result:\n" + ] }, { - "cell_type": "code", - "execution_count": 15, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 403 - }, - "id": "jVTpCS-nbCEw", - "outputId": "39d6a32a-ee6e-4d74-c038-b97af0e3829d" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Query Audio:\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Result:\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } + "data": { + "text/html": [ + "\n", + " \n", + " " ], - "source": [ - "find_similar_audios(452)" + "text/plain": [ + "" ] + }, + "metadata": {}, + "output_type": "display_data" }, { - "cell_type": "markdown", - "metadata": { - "id": "WiL-cw_LRvXy" - }, - "source": [ - "And now a more relaxing set of birds chirping in nature.\n", - "\n", - "Let's use another audio sample from elsewhere (eg not this dataset) and see how the search performs with this." + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" ] + }, + "metadata": {}, + "output_type": "display_data" }, { - "cell_type": "code", - "execution_count": 16, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "btsH4EOkbCHe", - "outputId": "8d40a7cf-171f-45a6-9953-2ecdbfaf37af" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "--2022-09-25 20:47:00-- https://storage.googleapis.com/audioset/miaow_16k.wav\n", - "Resolving storage.googleapis.com (storage.googleapis.com)... 74.125.20.128, 108.177.98.128, 74.125.197.128, ...\n", - "Connecting to storage.googleapis.com (storage.googleapis.com)|74.125.20.128|:443... connected.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 215546 (210K) [audio/x-wav]\n", - "Saving to: \u2018miaow_16k.wav.1\u2019\n", - "\n", - "\rmiaow_16k.wav.1 0%[ ] 0 --.-KB/s \rmiaow_16k.wav.1 100%[===================>] 210.49K --.-KB/s in 0.004s \n", - "\n", - "2022-09-25 20:47:00 (54.1 MB/s) - \u2018miaow_16k.wav.1\u2019 saved [215546/215546]\n", - "\n" - ] - } + "data": { + "text/html": [ + "\n", + " \n", + " " ], - "source": [ - "!wget https://storage.googleapis.com/audioset/miaow_16k.wav" + "text/plain": [ + "" ] + }, + "metadata": {}, + "output_type": "display_data" }, { - "cell_type": "markdown", - "metadata": { - "id": "JJRZUWCDR73v" - }, - "source": [ - "We can load the audio into a Numpy array as follows:" + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" ] + }, + "metadata": {}, + "output_type": "display_data" }, { - "cell_type": "code", - "execution_count": 17, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 76 - }, - "id": "JZHilxkISLJb", - "outputId": "80aad9ff-b33d-4522-ed98-23b1bf19559f" - }, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } + "data": { + "text/html": [ + "\n", + " \n", + " " ], - "source": [ - "import librosa\n", - "\n", - "a, _ = librosa.load(\"miaow_16k.wav\", sr=44100)\n", - "Audio(a, rate=44100)" + "text/plain": [ + "" ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "find_similar_audios(452)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "WiL-cw_LRvXy" + }, + "source": [ + "And now a more relaxing set of birds chirping in nature.\n", + "\n", + "Let's use another audio sample from elsewhere (eg not this dataset) and see how the search performs with this." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "btsH4EOkbCHe", + "outputId": "8d40a7cf-171f-45a6-9953-2ecdbfaf37af" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "mKLknBWQV3Jm" - }, - "source": [ - "Now we generate the embeddings for this audio and query the Pinecone index." - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "--2022-09-25 20:47:00-- https://storage.googleapis.com/audioset/miaow_16k.wav\n", + "Resolving storage.googleapis.com (storage.googleapis.com)... 74.125.20.128, 108.177.98.128, 74.125.197.128, ...\n", + "Connecting to storage.googleapis.com (storage.googleapis.com)|74.125.20.128|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 215546 (210K) [audio/x-wav]\n", + "Saving to: ‘miaow_16k.wav.1’\n", + "\n", + "\rmiaow_16k.wav.1 0%[ ] 0 --.-KB/s \rmiaow_16k.wav.1 100%[===================>] 210.49K --.-KB/s in 0.004s \n", + "\n", + "2022-09-25 20:47:00 (54.1 MB/s) - ‘miaow_16k.wav.1’ saved [215546/215546]\n", + "\n" + ] + } + ], + "source": [ + "!wget https://storage.googleapis.com/audioset/miaow_16k.wav" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JJRZUWCDR73v" + }, + "source": [ + "We can load the audio into a Numpy array as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 76 }, + "id": "JZHilxkISLJb", + "outputId": "80aad9ff-b33d-4522-ed98-23b1bf19559f" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 18, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 193 - }, - "id": "vWKRAB5rSy9m", - "outputId": "dcc856fb-4e90-46ee-f980-d46766556726" - }, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } + "data": { + "text/html": [ + "\n", + " \n", + " " ], - "source": [ - "# reshape query audio\n", - "query_audio = a[None, :]\n", - "# get the embeddings for the audio from the model\n", - "_, xq = model.inference(query_audio)\n", - "\n", - "# query pinecone index with the query audio embeddings\n", - "results = index.query(vector=xq.tolist(), top_k=3)\n", - "\n", - "# play the top 3 similar audios\n", - "for r in results[\"matches\"]:\n", - " a = data[int(r[\"id\"])][\"audio\"][\"array\"]\n", - " display(Audio(a, rate=44100))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "g2Ex_gk8Vmee" - }, - "source": [ - "Our audio search application has identified a set of similar cat sounds, which is excellent." + "text/plain": [ + "" ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import librosa\n", + "\n", + "a, _ = librosa.load(\"miaow_16k.wav\", sr=44100)\n", + "Audio(a, rate=44100)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mKLknBWQV3Jm" + }, + "source": [ + "Now we generate the embeddings for this audio and query the Pinecone index." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 193 }, + "id": "vWKRAB5rSy9m", + "outputId": "dcc856fb-4e90-46ee-f980-d46766556726" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "_uNGu-zUTtW6" - }, - "source": [ - "# Delete the Index" + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" ] + }, + "metadata": {}, + "output_type": "display_data" }, { - "cell_type": "markdown", - "metadata": { - "id": "ltMgxnuDTvp6" - }, - "source": [ - "Delete the index once you are sure that you do not want to use it anymore. Once the index is deleted, you cannot use it again." + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" ] + }, + "metadata": {}, + "output_type": "display_data" }, { - "cell_type": "code", - "execution_count": 20, - "metadata": { - "id": "voohGLfrUxHf" - }, - "outputs": [], - "source": [ - "pc.delete_index(index_name)" + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" ] + }, + "metadata": {}, + "output_type": "display_data" } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "collapsed_sections": [], - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3.9.13 64-bit", - "language": "python", - "name": "python3" - }, - "language_info": { - "name": "python", - "version": "3.10.7 (main, Sep 14 2022, 22:38:23) [Clang 14.0.0 (clang-1400.0.29.102)]" - }, - "vscode": { - "interpreter": { - "hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e" - } - } + ], + "source": [ + "# reshape query audio\n", + "query_audio = a[None, :]\n", + "# get the embeddings for the audio from the model\n", + "_, xq = model.inference(query_audio)\n", + "\n", + "# query pinecone index with the query audio embeddings\n", + "results = index.query(vector=xq.tolist(), top_k=3)\n", + "\n", + "# play the top 3 similar audios\n", + "for r in results[\"matches\"]:\n", + " a = data[int(r[\"id\"])][\"audio\"][\"array\"]\n", + " display(Audio(a, rate=44100))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "g2Ex_gk8Vmee" + }, + "source": [ + "Our audio search application has identified a set of similar cat sounds, which is excellent." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_uNGu-zUTtW6" + }, + "source": [ + "# Delete the Index" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ltMgxnuDTvp6" + }, + "source": [ + "Delete the index once you are sure that you do not want to use it anymore. Once the index is deleted, you cannot use it again." + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "id": "voohGLfrUxHf" + }, + "outputs": [], + "source": [ + "pc.delete_index(index_name)" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3.9.13 64-bit", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.10.7 (main, Sep 14 2022, 22:38:23) [Clang 14.0.0 (clang-1400.0.29.102)]" }, - "nbformat": 4, - "nbformat_minor": 0 + "vscode": { + "interpreter": { + "hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e" + } + } + }, + "nbformat": 4, + "nbformat_minor": 0 } \ No newline at end of file From 06970e1fe9cde5859a113a47ceb76e96dc016387 Mon Sep 17 00:00:00 2001 From: Jen Hamon Date: Sat, 31 Jan 2026 20:24:53 +0000 Subject: [PATCH 2/6] fix(learn): fix linting issues in audio-search.ipynb - Move all imports to first code cell following notebook standards - Pin dependency versions (pinecone==8.0.0, panns-inference==0.1.1, datasets==3.2.0, librosa==0.10.2) - Remove scattered import statements from individual cells Co-authored-by: Claude --- .../audio/audio-search/audio-search.ipynb | 246 +++--------------- 1 file changed, 29 insertions(+), 217 deletions(-) diff --git a/learn/search/audio/audio-search/audio-search.ipynb b/learn/search/audio/audio-search/audio-search.ipynb index 3e97c0bf..f7661fe0 100644 --- a/learn/search/audio/audio-search/audio-search.ipynb +++ b/learn/search/audio/audio-search/audio-search.ipynb @@ -44,9 +44,14 @@ "id": "lNPlTs8HGWOh" }, "outputs": [], - "source": [ - "!pip install -qU pinecone panns-inference datasets librosa" - ] + "source": "!pip install -qU pinecone==8.0.0 panns-inference==0.1.1 datasets==3.2.0 librosa==0.10.2" + }, + { + "cell_type": "code", + "source": "try:\n import os\n import time\n from getpass import getpass\n\n import librosa\n import numpy as np\n from datasets import load_dataset\n from IPython.display import Audio, display\n from panns_inference import AudioTagging\n from pinecone import Pinecone, ServerlessSpec\n from tqdm.auto import tqdm\nexcept ImportError as e:\n raise ImportError(f\"Missing required package. Run the dependency cell first: {e}\")", + "metadata": {}, + "execution_count": null, + "outputs": [] }, { "cell_type": "markdown", @@ -68,7 +73,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -76,36 +81,8 @@ "id": "-XE4XUVnJnXE", "outputId": "db23db43-e7c1-424b-f218-13438419943a" }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:datasets.builder:Using custom data configuration ashraq--esc50-60cc11cf57cf8497\n", - "WARNING:datasets.builder:Found cached dataset parquet (/root/.cache/huggingface/datasets/ashraq___parquet/ashraq--esc50-60cc11cf57cf8497/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)\n" - ] - }, - { - "data": { - "text/plain": [ - "Dataset({\n", - " features: ['filename', 'fold', 'target', 'category', 'esc10', 'src_file', 'take', 'audio'],\n", - " num_rows: 2000\n", - "})" - ] - }, - "execution_count": 1, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from datasets import load_dataset\n", - "\n", - "# load the dataset from huggingface model hub\n", - "data = load_dataset(\"ashraq/esc50\", split=\"train\")\n", - "data" - ] + "outputs": [], + "source": "# load the dataset from huggingface model hub\ndata = load_dataset(\"ashraq/esc50\", split=\"train\")\ndata" }, { "cell_type": "markdown", @@ -165,17 +142,12 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": { "id": "jDrQhpJ5Az0G" }, "outputs": [], - "source": [ - "import numpy as np\n", - "\n", - "# select only the audio data from the dataset and store in a numpy array\n", - "audios = np.array([a[\"array\"] for a in data[\"audio\"]])" - ] + "source": "# select only the audio data from the dataset and store in a numpy array\naudios = np.array([a[\"array\"] for a in data[\"audio\"]])" }, { "cell_type": "markdown", @@ -197,7 +169,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -205,24 +177,8 @@ "id": "PMwze3wAI4Vg", "outputId": "60e87993-20c1-45b3-f1c9-b2d78e9b6d5e" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Checkpoint path: /root/panns_data/Cnn14_mAP=0.431.pth\n", - "GPU number: 1\n" - ] - } - ], - "source": [ - "from panns_inference import AudioTagging\n", - "\n", - "# load the default model into the gpu.\n", - "model = AudioTagging(\n", - " checkpoint_path=None, device=\"cuda\"\n", - ") # change device to cpu if a gpu is not available" - ] + "outputs": [], + "source": "# load the default model into the gpu.\nmodel = AudioTagging(\n checkpoint_path=None, device=\"cuda\"\n) # change device to cpu if a gpu is not available" }, { "cell_type": "markdown", @@ -238,17 +194,7 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "import os\n", - "from getpass import getpass\n", - "\n", - "from pinecone import Pinecone\n", - "\n", - "# initialize connection to pinecone (get API key at app.pinecone.io)\n", - "api_key = os.environ.get(\"PINECONE_API_KEY\") or getpass(\"Enter your Pinecone API key: \")\n", - "\n", - "pc = Pinecone(api_key=api_key)" - ] + "source": "# initialize connection to pinecone (get API key at app.pinecone.io)\napi_key = os.environ.get(\"PINECONE_API_KEY\") or getpass(\"Enter your Pinecone API key: \")\n\npc = Pinecone(api_key=api_key)" }, { "cell_type": "markdown", @@ -262,14 +208,7 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "from pinecone import ServerlessSpec\n", - "\n", - "cloud = os.environ.get(\"PINECONE_CLOUD\") or \"aws\"\n", - "region = os.environ.get(\"PINECONE_REGION\") or \"us-east-1\"\n", - "\n", - "spec = ServerlessSpec(cloud=cloud, region=region)" - ] + "source": "cloud = os.environ.get(\"PINECONE_CLOUD\") or \"aws\"\nregion = os.environ.get(\"PINECONE_REGION\") or \"us-east-1\"\n\nspec = ServerlessSpec(cloud=cloud, region=region)" }, { "cell_type": "markdown", @@ -289,27 +228,12 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": { "id": "3b_pV4GGNXJb" }, "outputs": [], - "source": [ - "import time\n", - "\n", - "# check if index already exists (it shouldn't if this is first time)\n", - "if index_name not in pc.list_indexes().names():\n", - " # if does not exist, create index\n", - " pc.create_index(index_name, dimension=2048, metric=\"cosine\", spec=spec)\n", - " # wait for index to be initialized\n", - " while not pc.describe_index(index_name).status[\"ready\"]:\n", - " time.sleep(1)\n", - "\n", - "# connect to index\n", - "index = pc.Index(index_name)\n", - "# view index stats\n", - "index.describe_index_stats()" - ] + "source": "# check if index already exists (it shouldn't if this is first time)\nif index_name not in pc.list_indexes().names():\n # if does not exist, create index\n pc.create_index(index_name, dimension=2048, metric=\"cosine\", spec=spec)\n # wait for index to be initialized\n while not pc.describe_index(index_name).status[\"ready\"]:\n time.sleep(1)\n\n# connect to index\nindex = pc.Index(index_name)\n# view index stats\nindex.describe_index_stats()" }, { "cell_type": "markdown", @@ -324,7 +248,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -346,58 +270,8 @@ "id": "l_l0qa-qPCqC", "outputId": "26c363e0-e480-4fbc-a258-e99ed73b1eab" }, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "6a9f8ff091324f1581297fc28c713047", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/32 [00:00\n", - " \n", - " Your browser does not support the audio element.\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from IPython.display import Audio, display\n", - "\n", - "# we set an audio number to select from the dataset\n", - "audio_num = 400\n", - "# get the audio data of the audio number\n", - "query_audio = data[audio_num][\"audio\"][\"array\"]\n", - "# get the category of the audio number\n", - "category = data[audio_num][\"category\"]\n", - "# print the category and play the audio\n", - "print(\"Query Audio:\", category)\n", - "Audio(query_audio, rate=44100)" - ] + "outputs": [], + "source": "# we set an audio number to select from the dataset\naudio_num = 400\n# get the audio data of the audio number\nquery_audio = data[audio_num][\"audio\"][\"array\"]\n# get the category of the audio number\ncategory = data[audio_num][\"category\"]\n# print the category and play the audio\nprint(\"Query Audio:\", category)\nAudio(query_audio, rate=44100)" }, { "cell_type": "markdown", @@ -1000,7 +836,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1009,32 +845,8 @@ "id": "JZHilxkISLJb", "outputId": "80aad9ff-b33d-4522-ed98-23b1bf19559f" }, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import librosa\n", - "\n", - "a, _ = librosa.load(\"miaow_16k.wav\", sr=44100)\n", - "Audio(a, rate=44100)" - ] + "outputs": [], + "source": "a, _ = librosa.load(\"miaow_16k.wav\", sr=44100)\nAudio(a, rate=44100)" }, { "cell_type": "markdown", From 50cfd3f83306d259dbfc23a17448398700f7c23e Mon Sep 17 00:00:00 2001 From: Jen Hamon Date: Sat, 31 Jan 2026 21:19:34 +0000 Subject: [PATCH 3/6] fix(learn): add torch dependency and update docs link in audio-search.ipynb - Add torch to pip install dependencies (required by panns-inference) - Update Pinecone docs URL from /docs/projects to /troubleshooting/available-cloud-regions Co-authored-by: Claude --- learn/search/audio/audio-search/audio-search.ipynb | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/learn/search/audio/audio-search/audio-search.ipynb b/learn/search/audio/audio-search/audio-search.ipynb index f7661fe0..3c570a16 100644 --- a/learn/search/audio/audio-search/audio-search.ipynb +++ b/learn/search/audio/audio-search/audio-search.ipynb @@ -44,7 +44,7 @@ "id": "lNPlTs8HGWOh" }, "outputs": [], - "source": "!pip install -qU pinecone==8.0.0 panns-inference==0.1.1 datasets==3.2.0 librosa==0.10.2" + "source": "!pip install -qU pinecone==8.0.0 panns-inference==0.1.1 datasets==3.2.0 librosa==0.10.2 torch" }, { "cell_type": "code", @@ -199,9 +199,7 @@ { "cell_type": "markdown", "metadata": {}, - "source": [ - "Now we setup our index specification, this allows us to define the cloud provider and region where we want to deploy our index. You can find a list of all [available providers and regions here](https://docs.pinecone.io/docs/projects)." - ] + "source": "Now we setup our index specification, this allows us to define the cloud provider and region where we want to deploy our index. You can find a list of all [available providers and regions here](https://docs.pinecone.io/troubleshooting/available-cloud-regions)." }, { "cell_type": "code", From 0eb6a8fd82b3008b8d4c0d1640233ebf695479a6 Mon Sep 17 00:00:00 2001 From: Jen Hamon Date: Sat, 31 Jan 2026 21:26:12 +0000 Subject: [PATCH 4/6] fix(learn): pin torch version and update docs link in audio-search.ipynb - Pin torch to version 2.6.0 (required by lint checks) - Update Pinecone docs URL to working link (guides/projects/understanding-projects) - Previous URL was redirecting with 307 status, causing link checker to fail Co-authored-by: Claude --- learn/search/audio/audio-search/audio-search.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/learn/search/audio/audio-search/audio-search.ipynb b/learn/search/audio/audio-search/audio-search.ipynb index 3c570a16..9861485c 100644 --- a/learn/search/audio/audio-search/audio-search.ipynb +++ b/learn/search/audio/audio-search/audio-search.ipynb @@ -44,7 +44,7 @@ "id": "lNPlTs8HGWOh" }, "outputs": [], - "source": "!pip install -qU pinecone==8.0.0 panns-inference==0.1.1 datasets==3.2.0 librosa==0.10.2 torch" + "source": "!pip install -qU pinecone==8.0.0 panns-inference==0.1.1 datasets==3.2.0 librosa==0.10.2 torch==2.6.0" }, { "cell_type": "code", @@ -199,7 +199,7 @@ { "cell_type": "markdown", "metadata": {}, - "source": "Now we setup our index specification, this allows us to define the cloud provider and region where we want to deploy our index. You can find a list of all [available providers and regions here](https://docs.pinecone.io/troubleshooting/available-cloud-regions)." + "source": "Now we setup our index specification, this allows us to define the cloud provider and region where we want to deploy our index. You can find a list of all [available providers and regions here](https://docs.pinecone.io/guides/projects/understanding-projects)." }, { "cell_type": "code", From 8df6c4a72765590f1e90d9da4a73c4583232c1ea Mon Sep 17 00:00:00 2001 From: Jen Hamon Date: Sat, 31 Jan 2026 23:17:46 +0000 Subject: [PATCH 5/6] fix(learn): add automatic device detection in audio-search.ipynb The notebook was hardcoded to use CUDA device, which caused test failures in environments without GPU support. Updated to detect GPU availability and fall back to CPU when necessary. Changes: - Added torch import to cell-5 - Updated cell-15 to automatically detect and use available device - Removed hardcoded "cuda" device parameter Co-authored-by: Claude --- learn/search/audio/audio-search/audio-search.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/learn/search/audio/audio-search/audio-search.ipynb b/learn/search/audio/audio-search/audio-search.ipynb index 9861485c..4116caaf 100644 --- a/learn/search/audio/audio-search/audio-search.ipynb +++ b/learn/search/audio/audio-search/audio-search.ipynb @@ -48,7 +48,7 @@ }, { "cell_type": "code", - "source": "try:\n import os\n import time\n from getpass import getpass\n\n import librosa\n import numpy as np\n from datasets import load_dataset\n from IPython.display import Audio, display\n from panns_inference import AudioTagging\n from pinecone import Pinecone, ServerlessSpec\n from tqdm.auto import tqdm\nexcept ImportError as e:\n raise ImportError(f\"Missing required package. Run the dependency cell first: {e}\")", + "source": "try:\n import os\n import time\n from getpass import getpass\n\n import librosa\n import numpy as np\n import torch\n from datasets import load_dataset\n from IPython.display import Audio, display\n from panns_inference import AudioTagging\n from pinecone import Pinecone, ServerlessSpec\n from tqdm.auto import tqdm\nexcept ImportError as e:\n raise ImportError(f\"Missing required package. Run the dependency cell first: {e}\")", "metadata": {}, "execution_count": null, "outputs": [] @@ -178,7 +178,7 @@ "outputId": "60e87993-20c1-45b3-f1c9-b2d78e9b6d5e" }, "outputs": [], - "source": "# load the default model into the gpu.\nmodel = AudioTagging(\n checkpoint_path=None, device=\"cuda\"\n) # change device to cpu if a gpu is not available" + "source": "# Detect if GPU is available, otherwise use CPU\ndevice = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n\n# load the model with the appropriate device\nmodel = AudioTagging(checkpoint_path=None, device=device)" }, { "cell_type": "markdown", From 498f5c5e37619440492c00492479253c14c3aa75 Mon Sep 17 00:00:00 2001 From: Jen Hamon Date: Sat, 31 Jan 2026 23:37:45 +0000 Subject: [PATCH 6/6] fix(learn): replace wget with urllib in audio-search.ipynb The notebook test was failing because the wget command was being filtered out by the notebook conversion script (which removes lines starting with ! or %). This caused the miaow_16k.wav file download to be skipped, leading to a FileNotFoundError when the notebook tried to load the file. Fixed by: - Replacing !wget command with urllib.request.urlretrieve() - Adding urllib.request import to the main imports cell - Maintaining the same functionality with pure Python code Co-authored-by: Claude --- .../audio/audio-search/audio-search.ipynb | 28 +++---------------- 1 file changed, 4 insertions(+), 24 deletions(-) diff --git a/learn/search/audio/audio-search/audio-search.ipynb b/learn/search/audio/audio-search/audio-search.ipynb index 4116caaf..f42310b6 100644 --- a/learn/search/audio/audio-search/audio-search.ipynb +++ b/learn/search/audio/audio-search/audio-search.ipynb @@ -48,7 +48,7 @@ }, { "cell_type": "code", - "source": "try:\n import os\n import time\n from getpass import getpass\n\n import librosa\n import numpy as np\n import torch\n from datasets import load_dataset\n from IPython.display import Audio, display\n from panns_inference import AudioTagging\n from pinecone import Pinecone, ServerlessSpec\n from tqdm.auto import tqdm\nexcept ImportError as e:\n raise ImportError(f\"Missing required package. Run the dependency cell first: {e}\")", + "source": "try:\n import os\n import time\n import urllib.request\n from getpass import getpass\n\n import librosa\n import numpy as np\n import torch\n from datasets import load_dataset\n from IPython.display import Audio, display\n from panns_inference import AudioTagging\n from pinecone import Pinecone, ServerlessSpec\n from tqdm.auto import tqdm\nexcept ImportError as e:\n raise ImportError(f\"Missing required package. Run the dependency cell first: {e}\")", "metadata": {}, "execution_count": null, "outputs": [] @@ -792,7 +792,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -800,28 +800,8 @@ "id": "btsH4EOkbCHe", "outputId": "8d40a7cf-171f-45a6-9953-2ecdbfaf37af" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "--2022-09-25 20:47:00-- https://storage.googleapis.com/audioset/miaow_16k.wav\n", - "Resolving storage.googleapis.com (storage.googleapis.com)... 74.125.20.128, 108.177.98.128, 74.125.197.128, ...\n", - "Connecting to storage.googleapis.com (storage.googleapis.com)|74.125.20.128|:443... connected.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 215546 (210K) [audio/x-wav]\n", - "Saving to: ‘miaow_16k.wav.1’\n", - "\n", - "\rmiaow_16k.wav.1 0%[ ] 0 --.-KB/s \rmiaow_16k.wav.1 100%[===================>] 210.49K --.-KB/s in 0.004s \n", - "\n", - "2022-09-25 20:47:00 (54.1 MB/s) - ‘miaow_16k.wav.1’ saved [215546/215546]\n", - "\n" - ] - } - ], - "source": [ - "!wget https://storage.googleapis.com/audioset/miaow_16k.wav" - ] + "outputs": [], + "source": "# Download the audio file\nurl = \"https://storage.googleapis.com/audioset/miaow_16k.wav\"\nfilename = \"miaow_16k.wav\"\nurllib.request.urlretrieve(url, filename)\nprint(f\"Downloaded {filename}\")" }, { "cell_type": "markdown",