From 9f5b627f31accb97e294b582454f5bb0632073ae Mon Sep 17 00:00:00 2001
From: Jen Hamon <jhamon@pinecone.io>
Date: Thu, 29 Jan 2026 02:49:16 -0500
Subject: [PATCH 1/5] fix(docs): update it-threat-detection.ipynb to SDK v8

- Update Pinecone SDK install from pinecone-client==3.1.0 to pinecone
- Update API key handling to use getpass fallback pattern
- Use pc.has_index() instead of pc.list_indexes().names()
- Add named keyword argument for create_index name parameter
- Remove outdated TensorFlow/Keras version pins
- Update stale documentation link
- Remove time-sensitive language ("recent academic work")

Resolves SDK-134
---
 learn/security/it-threat-detection.ipynb | 3517 +++++++++++-----------
 1 file changed, 1725 insertions(+), 1792 deletions(-)

diff --git a/learn/security/it-threat-detection.ipynb b/learn/security/it-threat-detection.ipynb
index 7a06c0a7..0e98a4df 100644
--- a/learn/security/it-threat-detection.ipynb
+++ b/learn/security/it-threat-detection.ipynb
@@ -1,1822 +1,1755 @@
 {
-  "cells": [
-    {
-      "attachments": {},
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pinecone-io/examples/blob/master/learn/security/it-threat-detection.ipynb) [![Open nbviewer](https://raw.githubusercontent.com/pinecone-io/examples/master/assets/nbviewer-shield.svg)](https://nbviewer.org/github/pinecone-io/examples/blob/master/learn/security/it-threat-detection.ipynb)"
-      ]
-    },
-    {
-      "attachments": {},
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "b4Yv1jeGywpL"
-      },
-      "source": [
-        "## IT Threat Detection With Similarity Search\n",
-        "\n",
-        "This notebook shows how to use Pinecone's similarity search as a service to build an application for detecting rare events. Such application is common in cyber-security and fraud detection domains wherein only a tiny fraction of the events are malicious. \n",
-        "\n",
-        "Here we will build a network intrusion detector. Network intrusion detection systems monitor incoming and outgoing network traffic flow, raising alarms whenever a threat is detected. Here we use a deep-learning model and similarity search in detecting and classifying network intrusion traffic.\n",
-        "\n",
-        "We will start by indexing a set of labeled traffic events in the form of vector embeddings. Each event is either benign or malicious. The vector embeddings are rich, mathematical representations of the network traffic events. It is making it possible to determine how similar the network events are to one another using similarity-search algorithms built into Pinecone. Here we will transform network traffic events into vectors using a deep learning model from recent academic work.\n",
-        "\n",
-        "\n",
-        "We will then take some new (unseen) network events and search through the index to find the most similar matches, along with their labels. In such a way, we will propagate the matched labels to classify the unseen events as benign or malicious. Mind that the intrusion detection task is a challenging classification task because malicious events are sporadic. The similarity search service helps us sift the most relevant historical labeled events. That way, we identify these rare events while keeping a low rate of false alarms. \n"
-      ]
-    },
-    {
-      "attachments": {},
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "1OUSClcPhU4j"
-      },
-      "source": [
-        "## Setting up Pinecone\n",
-        "\n",
-        "We will first install and initialize Pinecone. You can get your [API Key here](https://app.pinecone.io)."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "251n1avKzCrm"
-      },
-      "outputs": [],
-      "source": [
-        "!pip install -qU pinecone-client==3.1.0"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "_cGTuY8dywpV",
-        "outputId": "da0b57bf-bde2-401f-e502-6ad80f7fc6cc"
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "[]"
-            ]
-          },
-          "execution_count": 1,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "import os\n",
-        "from pinecone import Pinecone\n",
-        "\n",
-        "# initialize connection to pinecone (get API key at app.pinecone.io)\n",
-        "api_key = os.environ.get('PINECONE_API_KEY') or 'PINECONE_API_KEY'\n",
-        "\n",
-        "# configure client\n",
-        "pc = Pinecone(api_key=api_key)"
-      ]
-    },
-    {
-      "attachments": {},
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "a7ysNAlrjD_k"
-      },
-      "source": [
-        "## Installing other dependencies"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "r3g-b61IywpQ"
-      },
-      "outputs": [],
-      "source": [
-        "!pip install -qU pip python-dateutil tensorflow==2.5 keras==2.4.0 scikit-learn matplotlib==3.1.0 seaborn"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "0uuHcP-WywpQ"
-      },
-      "outputs": [],
-      "source": [
-        "from collections import Counter\n",
-        "import matplotlib.pyplot as plt\n",
-        "import pandas as pd\n",
-        "import seaborn as sns\n",
-        "from tensorflow import keras\n",
-        "from tensorflow.keras.models import Model\n",
-        "import tensorflow.keras.backend as K\n",
-        "from sklearn.metrics import accuracy_score, precision_score, recall_score\n",
-        "from sklearn.metrics import confusion_matrix"
-      ]
-    },
-    {
-      "attachments": {},
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "lEwqF1osp83o"
-      },
-      "source": [
-        "We will use some of the code from a recent [academic work](https://github.com/rambasnet/DeepLearning-IDS). Let's clone the repository that we will use to prepare data."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "FUDeovNiywpT"
-      },
-      "outputs": [],
-      "source": [
-        "!git clone -q https://github.com/rambasnet/DeepLearning-IDS.git "
-      ]
-    },
-    {
-      "attachments": {},
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "mc4ERmwniO1H"
-      },
-      "source": [
-        "## Define a New Pinecone Index"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "Now we setup our index specification, this allows us to define the cloud provider and region where we want to deploy our index. You can find a list of all [available providers and regions here](https://docs.pinecone.io/docs/projects)."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "from pinecone import ServerlessSpec\n",
-        "\n",
-        "cloud = os.environ.get('PINECONE_CLOUD') or 'aws'\n",
-        "region = os.environ.get('PINECONE_REGION') or 'us-east-1'\n",
-        "\n",
-        "spec = ServerlessSpec(cloud=cloud, region=region)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "Create the index:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "9_TIgYxBywpV"
-      },
-      "outputs": [],
-      "source": [
-        "# Pick a name for the new service\n",
-        "index_name = 'it-threats'"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "import time\n",
-        "\n",
-        "# check if index already exists (it shouldn't if this is first time)\n",
-        "if index_name not in pc.list_indexes().names():\n",
-        "    # if does not exist, create index\n",
-        "    pc.create_index(\n",
-        "        index_name,\n",
-        "        dimension=128,\n",
-        "        metric='euclidean',\n",
-        "        spec=spec\n",
-        "    )\n",
-        "    # wait for index to be initialized\n",
-        "    while not pc.describe_index(index_name).status['ready']:\n",
-        "        time.sleep(1)\n",
-        "\n",
-        "# connect to index\n",
-        "index = pc.Index(index_name)\n",
-        "# view index stats\n",
-        "index.describe_index_stats()"
-      ]
-    },
-    {
-      "attachments": {},
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "IOP9jCo5ywpX"
-      },
-      "source": [
-        "## Upload\n",
-        "Here we transform network events into vector embeddings, then upload them into Pinecone's vector index. "
-      ]
-    },
-    {
-      "attachments": {},
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "PzkJJd8ZYTNM"
-      },
-      "source": [
-        "### Prepare Data"
-      ]
-    },
-    {
-      "attachments": {},
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "-N0bh6dUYTNN"
-      },
-      "source": [
-        "The datasets we use consist of benign (normal) network traffic and malicious traffic\n",
-        "generated from several different network attacks. We will focus on web attacks only. \n",
-        "\n",
-        "The web attack category consists of three common attacks: \n",
-        "- Cross-site scripting (BruteForce-XSS), \n",
-        "- SQL-Injection (SQL-Injection), \n",
-        "- Brute force administrative and user passwords (BruteForce-Web)\n",
-        "\n",
-        "The original data was recorded over two days."
-      ]
-    },
-    {
-      "attachments": {},
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "vuOLNAehYTNN"
-      },
-      "source": [
-        "**Download data for 22-02-2018 and 23-02-2018**\n",
-        "\n",
-        "\n"
-      ]
-    },
-    {
-      "attachments": {},
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "pCrwPkDJYTNO"
-      },
-      "source": [
-        "Files should be downloaded to the current directory. We will be using one date for training and generating vectors, and another one for testing."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "HshKyF0KywpR",
-        "outputId": "d5b3ceee-b584-47e2-a428-43e5c835968f"
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Thursday-22-02-2018 100%[===================>] 364.91M  3.07MB/s    in 2m 6s   \n",
-            "Friday-23-02-2018_T 100%[===================>] 365.10M  3.07MB/s    in 1m 53s  \n"
-          ]
-        }
-      ],
-      "source": [
-        "!wget \"https://cse-cic-ids2018.s3.ca-central-1.amazonaws.com/Processed%20Traffic%20Data%20for%20ML%20Algorithms/Thursday-22-02-2018_TrafficForML_CICFlowMeter.csv\" -q --show-progress\n",
-        "!wget \"https://cse-cic-ids2018.s3.ca-central-1.amazonaws.com/Processed%20Traffic%20Data%20for%20ML%20Algorithms/Friday-23-02-2018_TrafficForML_CICFlowMeter.csv\" -q --show-progress"
-      ]
-    },
-    {
-      "attachments": {},
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "gr-hay4rfk0d"
-      },
-      "source": [
-        "Let's look at the data events first."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "bSSZ4YCZywpT",
-        "outputId": "27d111b0-c0c1-4694-9e07-65965d2014f1"
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "Benign              1048009\n",
-              "Brute Force -Web        362\n",
-              "Brute Force -XSS        151\n",
-              "SQL Injection            53\n",
-              "Name: Label, dtype: int64"
-            ]
-          },
-          "execution_count": 60,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "data = pd.read_csv('Friday-23-02-2018_TrafficForML_CICFlowMeter.csv')\n",
-        "data.Label.value_counts()"
-      ]
-    },
-    {
-      "attachments": {},
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "vYl82iXeywpT"
-      },
-      "source": [
-        "**Clean the data** using a python script from the cloned repository."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "y-trCZOcywpT",
-        "outputId": "c51b5e77-4e66-47a3-c4f6-d492a7dfaef8"
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "cleaning Friday-23-02-2018_TrafficForML_CICFlowMeter.csv\n",
-            "total rows read = 1048576\n",
-            "all done writing 1042868 rows; dropped 5708 rows\n"
-          ]
-        }
-      ],
-      "source": [
-        "!python DeepLearning-IDS/data_cleanup.py \"Friday-23-02-2018_TrafficForML_CICFlowMeter.csv\" \"result23022018\""
-      ]
-    },
-    {
-      "attachments": {},
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "oKHL8HCjYTNQ"
-      },
-      "source": [
-        "Load the file that you got from the previous step."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 278
-        },
-        "id": "uzH6_tBpywpU",
-        "outputId": "017263dd-8e2a-45a4-9693-5b5b763197ce"
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/html": [
-              "<div>\n",
-              "<style scoped>\n",
-              "    .dataframe tbody tr th:only-of-type {\n",
-              "        vertical-align: middle;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe tbody tr th {\n",
-              "        vertical-align: top;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe thead th {\n",
-              "        text-align: right;\n",
-              "    }\n",
-              "</style>\n",
-              "<table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              "    <tr style=\"text-align: right;\">\n",
-              "      <th></th>\n",
-              "      <th>Dst Port</th>\n",
-              "      <th>Protocol</th>\n",
-              "      <th>Timestamp</th>\n",
-              "      <th>Flow Duration</th>\n",
-              "      <th>Tot Fwd Pkts</th>\n",
-              "      <th>Tot Bwd Pkts</th>\n",
-              "      <th>TotLen Fwd Pkts</th>\n",
-              "      <th>TotLen Bwd Pkts</th>\n",
-              "      <th>Fwd Pkt Len Max</th>\n",
-              "      <th>Fwd Pkt Len Min</th>\n",
-              "      <th>...</th>\n",
-              "      <th>Fwd Seg Size Min</th>\n",
-              "      <th>Active Mean</th>\n",
-              "      <th>Active Std</th>\n",
-              "      <th>Active Max</th>\n",
-              "      <th>Active Min</th>\n",
-              "      <th>Idle Mean</th>\n",
-              "      <th>Idle Std</th>\n",
-              "      <th>Idle Max</th>\n",
-              "      <th>Idle Min</th>\n",
-              "      <th>Label</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <th>0</th>\n",
-              "      <td>22</td>\n",
-              "      <td>6</td>\n",
-              "      <td>1.519374e+09</td>\n",
-              "      <td>1532698</td>\n",
-              "      <td>11</td>\n",
-              "      <td>11</td>\n",
-              "      <td>1179</td>\n",
-              "      <td>1969</td>\n",
-              "      <td>648</td>\n",
-              "      <td>0</td>\n",
-              "      <td>...</td>\n",
-              "      <td>32</td>\n",
-              "      <td>0.0</td>\n",
-              "      <td>0.0</td>\n",
-              "      <td>0</td>\n",
-              "      <td>0</td>\n",
-              "      <td>0.0</td>\n",
-              "      <td>0.000000e+00</td>\n",
-              "      <td>0</td>\n",
-              "      <td>0</td>\n",
-              "      <td>Benign</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>1</th>\n",
-              "      <td>500</td>\n",
-              "      <td>17</td>\n",
-              "      <td>1.519374e+09</td>\n",
-              "      <td>117573855</td>\n",
-              "      <td>3</td>\n",
-              "      <td>0</td>\n",
-              "      <td>1500</td>\n",
-              "      <td>0</td>\n",
-              "      <td>500</td>\n",
-              "      <td>500</td>\n",
-              "      <td>...</td>\n",
-              "      <td>8</td>\n",
-              "      <td>0.0</td>\n",
-              "      <td>0.0</td>\n",
-              "      <td>0</td>\n",
-              "      <td>0</td>\n",
-              "      <td>58786927.5</td>\n",
-              "      <td>2.375324e+07</td>\n",
-              "      <td>75583006</td>\n",
-              "      <td>41990849</td>\n",
-              "      <td>Benign</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>2</th>\n",
-              "      <td>500</td>\n",
-              "      <td>17</td>\n",
-              "      <td>1.519374e+09</td>\n",
-              "      <td>117573848</td>\n",
-              "      <td>3</td>\n",
-              "      <td>0</td>\n",
-              "      <td>1500</td>\n",
-              "      <td>0</td>\n",
-              "      <td>500</td>\n",
-              "      <td>500</td>\n",
-              "      <td>...</td>\n",
-              "      <td>8</td>\n",
-              "      <td>0.0</td>\n",
-              "      <td>0.0</td>\n",
-              "      <td>0</td>\n",
-              "      <td>0</td>\n",
-              "      <td>58786924.0</td>\n",
-              "      <td>2.375325e+07</td>\n",
-              "      <td>75583007</td>\n",
-              "      <td>41990841</td>\n",
-              "      <td>Benign</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>3</th>\n",
-              "      <td>22</td>\n",
-              "      <td>6</td>\n",
-              "      <td>1.519374e+09</td>\n",
-              "      <td>1745392</td>\n",
-              "      <td>11</td>\n",
-              "      <td>11</td>\n",
-              "      <td>1179</td>\n",
-              "      <td>1969</td>\n",
-              "      <td>648</td>\n",
-              "      <td>0</td>\n",
-              "      <td>...</td>\n",
-              "      <td>32</td>\n",
-              "      <td>0.0</td>\n",
-              "      <td>0.0</td>\n",
-              "      <td>0</td>\n",
-              "      <td>0</td>\n",
-              "      <td>0.0</td>\n",
-              "      <td>0.000000e+00</td>\n",
-              "      <td>0</td>\n",
-              "      <td>0</td>\n",
-              "      <td>Benign</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>4</th>\n",
-              "      <td>500</td>\n",
-              "      <td>17</td>\n",
-              "      <td>1.519374e+09</td>\n",
-              "      <td>89483474</td>\n",
-              "      <td>6</td>\n",
-              "      <td>0</td>\n",
-              "      <td>3000</td>\n",
-              "      <td>0</td>\n",
-              "      <td>500</td>\n",
-              "      <td>500</td>\n",
-              "      <td>...</td>\n",
-              "      <td>8</td>\n",
-              "      <td>4000364.0</td>\n",
-              "      <td>0.0</td>\n",
-              "      <td>4000364</td>\n",
-              "      <td>4000364</td>\n",
-              "      <td>21370777.5</td>\n",
-              "      <td>1.528092e+07</td>\n",
-              "      <td>41989576</td>\n",
-              "      <td>7200485</td>\n",
-              "      <td>Benign</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table>\n",
-              "<p>5 rows \u00d7 80 columns</p>\n",
-              "</div>"
-            ],
-            "text/plain": [
-              "   Dst Port  Protocol     Timestamp  Flow Duration  Tot Fwd Pkts  \\\n",
-              "0        22         6  1.519374e+09        1532698            11   \n",
-              "1       500        17  1.519374e+09      117573855             3   \n",
-              "2       500        17  1.519374e+09      117573848             3   \n",
-              "3        22         6  1.519374e+09        1745392            11   \n",
-              "4       500        17  1.519374e+09       89483474             6   \n",
-              "\n",
-              "   Tot Bwd Pkts  TotLen Fwd Pkts  TotLen Bwd Pkts  Fwd Pkt Len Max  \\\n",
-              "0            11             1179             1969              648   \n",
-              "1             0             1500                0              500   \n",
-              "2             0             1500                0              500   \n",
-              "3            11             1179             1969              648   \n",
-              "4             0             3000                0              500   \n",
-              "\n",
-              "   Fwd Pkt Len Min  ...  Fwd Seg Size Min  Active Mean  Active Std  \\\n",
-              "0                0  ...                32          0.0         0.0   \n",
-              "1              500  ...                 8          0.0         0.0   \n",
-              "2              500  ...                 8          0.0         0.0   \n",
-              "3                0  ...                32          0.0         0.0   \n",
-              "4              500  ...                 8    4000364.0         0.0   \n",
-              "\n",
-              "   Active Max  Active Min   Idle Mean      Idle Std  Idle Max  Idle Min  \\\n",
-              "0           0           0         0.0  0.000000e+00         0         0   \n",
-              "1           0           0  58786927.5  2.375324e+07  75583006  41990849   \n",
-              "2           0           0  58786924.0  2.375325e+07  75583007  41990841   \n",
-              "3           0           0         0.0  0.000000e+00         0         0   \n",
-              "4     4000364     4000364  21370777.5  1.528092e+07  41989576   7200485   \n",
-              "\n",
-              "    Label  \n",
-              "0  Benign  \n",
-              "1  Benign  \n",
-              "2  Benign  \n",
-              "3  Benign  \n",
-              "4  Benign  \n",
-              "\n",
-              "[5 rows x 80 columns]"
-            ]
-          },
-          "execution_count": 62,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "data_23_cleaned = pd.read_csv('result23022018.csv')\n",
-        "data_23_cleaned.head()"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "cGXf6PmhYTNR",
-        "outputId": "3371cd8e-e8e3-4382-ad17-bfffd8e3d8ac"
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "Benign              1042301\n",
-              "Brute Force -Web        362\n",
-              "Brute Force -XSS        151\n",
-              "SQL Injection            53\n",
-              "Name: Label, dtype: int64"
-            ]
-          },
-          "execution_count": 63,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "data_23_cleaned.Label.value_counts()"
-      ]
-    },
-    {
-      "attachments": {},
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "bWUf-dk1ywpU"
-      },
-      "source": [
-        "### Load the Model"
-      ]
-    },
-    {
-      "attachments": {},
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "97lIL236YTNR"
-      },
-      "source": [
-        "Here we load the pretrained model. The model is trained using the data from the same date."
-      ]
-    },
-    {
-      "attachments": {},
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Bq8_hM-RfEeR"
-      },
-      "source": [
-        "We have modified [the original model](https://github.com/rambasnet/DeepLearning-IDS/blob/master/keras_tensorflow_models/02-23-2018.csv_adam_10_10_multiclass_baseline_model_1561316601.model) slightly and changed the number of classes from four (Benign, BruteForce-Web, BruteForce-XSS, SQL-Injection) to two (Benign and Attack). In the step below we will download and unzip our modified model."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "eRTvBzhoqPkR"
-      },
-      "outputs": [],
-      "source": [
-        "!wget -q -O it_threat_model.model.zip \"https://drive.google.com/uc?export=download&id=1VYMHOk_XMAc-QFJ_8CAPvWFfHnLpS2J_\" \n",
-        "!unzip -q it_threat_model.model.zip\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "WP44njScywpU",
-        "outputId": "2a9d2001-0328-4602-c595-372c5bf67aa4"
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "WARNING:tensorflow:SavedModel saved prior to TF 2.5 detected when loading Keras model. Please ensure that you are saving the model with model.save() or tf.keras.models.save_model(), *NOT* tf.saved_model.save(). To confirm, there should be a file named \"keras_metadata.pb\" in the SavedModel directory.\n",
-            "Model: \"sequential\"\n",
-            "_________________________________________________________________\n",
-            "Layer (type)                 Output Shape              Param #   \n",
-            "=================================================================\n",
-            "dense (Dense)                (None, 128)               10240     \n",
-            "_________________________________________________________________\n",
-            "dense_1 (Dense)              (None, 64)                8256      \n",
-            "_________________________________________________________________\n",
-            "dense_2 (Dense)              (None, 1)                 65        \n",
-            "=================================================================\n",
-            "Total params: 18,561\n",
-            "Trainable params: 18,561\n",
-            "Non-trainable params: 0\n",
-            "_________________________________________________________________\n"
-          ]
-        }
-      ],
-      "source": [
-        "model = keras.models.load_model('it_threat_model.model')\n",
-        "model.summary()"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "2rCdzSrfywpV"
-      },
-      "outputs": [],
-      "source": [
-        "# Select the first layer\n",
-        "layer_name = 'dense' \n",
-        "intermediate_layer_model = Model(inputs=model.input,\n",
-        "                                 outputs=model.get_layer(layer_name).output)"
-      ]
-    },
-    {
-      "attachments": {},
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "V3dx9XkPYTNV"
-      },
-      "source": [
-        "### Upload Data"
-      ]
-    },
-    {
-      "attachments": {},
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "yO1eySIdjSOG"
-      },
-      "source": [
-        "\n",
-        "Let's define the item's ids in a way that will reflect the event's label.  Then, we index the events in Pinecone's vector index."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "nk4ZjGg-ywpX",
-        "outputId": "bce8f1fd-98e3-4313-d625-c404a4eca184"
-      },
-      "outputs": [
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 1042867/1042867 [01:43<00:00, 10067.22it/s]\n"
-          ]
-        }
-      ],
-      "source": [
-        "from tqdm import tqdm\n",
-        "items_to_upload = []\n",
-        "\n",
-        "model_res = intermediate_layer_model.predict(K.constant(data_23_cleaned.iloc[:,:-1]))\n",
-        "\n",
-        "for i, res in tqdm(zip(data_23_cleaned.iterrows(), model_res), total=len(model_res)):\n",
-        "    benign_or_attack = i[1]['Label'][:3]\n",
-        "    items_to_upload.append((benign_or_attack + '_' + str(i[0]), res.tolist()))"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "cMzD8s3ps3k0"
-      },
-      "outputs": [],
-      "source": [
-        "import itertools\n",
-        "\n",
-        "def chunks(iterable, batch_size=100):\n",
-        "    it = iter(iterable)\n",
-        "    chunk = tuple(itertools.islice(it, batch_size))\n",
-        "    while chunk:\n",
-        "        yield chunk\n",
-        "        chunk = tuple(itertools.islice(it, batch_size))"
-      ]
-    },
-    {
-      "attachments": {},
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "6C7er-8Gl2Rg"
-      },
-      "source": [
-        "You can lower the NUMBER_OF_ITEMS and, by doing so, limit the number of uploaded items. "
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "_Ti9p0P-ywpX",
-        "scrolled": true
-      },
-      "outputs": [],
-      "source": [
-        "NUMBER_OF_ITEMS = len(items_to_upload)\n",
-        "\n",
-        "for batch in chunks(items_to_upload[:NUMBER_OF_ITEMS], 50):\n",
-        "    index.upsert(vectors=batch)"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "XgF8vW8PtaRX"
-      },
-      "outputs": [],
-      "source": [
-        "items_to_upload.clear()"
-      ]
-    },
-    {
-      "attachments": {},
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "fglWJfAq_kw3"
-      },
-      "source": [
-        "Let's verify all items were inserted. "
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "xU172A4EywpY",
-        "outputId": "80008942-0000-40e5-92a1-66b767a489c6"
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "{'dimension': 128, 'namespaces': {'': {'vector_count': 1042867}}}"
-            ]
-          },
-          "execution_count": 69,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "index.describe_index_stats()"
-      ]
-    },
-    {
-      "attachments": {},
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "3CGzW3mVywpY"
-      },
-      "source": [
-        "## Query"
-      ]
-    },
-    {
-      "attachments": {},
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Ywuld4BylAIu"
-      },
-      "source": [
-        "First, we will randomly select a Benign/Attack event and query the vector index using the event embedding. Then, we will use data from different day, that contains same set of attacks to query on a bigger sample."
-      ]
-    },
-    {
-      "attachments": {},
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "m5H4rMyOYTNX"
-      },
-      "source": [
-        "\n",
-        "### Evaluate the Rare Event Classification Model"
-      ]
-    },
-    {
-      "attachments": {},
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "velMK_XlYTNX"
-      },
-      "source": [
-        "We will use network intrusion dataset for 22-02-2018 for querying and testing the Pinecone."
-      ]
-    },
-    {
-      "attachments": {},
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "VC2AVfWsj7em"
-      },
-      "source": [
-        "First, let's clean the data."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "bW9mhYvOYTNX",
-        "outputId": "84a9d548-de90-40fe-c183-fc2ca48de2ec",
-        "scrolled": true
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "cleaning Thursday-22-02-2018_TrafficForML_CICFlowMeter.csv\n",
-            "total rows read = 1048576\n",
-            "all done writing 1042966 rows; dropped 5610 rows\n"
-          ]
-        }
-      ],
-      "source": [
-        "!python DeepLearning-IDS/data_cleanup.py \"Thursday-22-02-2018_TrafficForML_CICFlowMeter.csv\" \"result22022018\""
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 278
-        },
-        "id": "xqMuz0jKYTNX",
-        "outputId": "d9f8c333-5e3f-4509-b84a-901bacf42487"
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/html": [
-              "<div>\n",
-              "<style scoped>\n",
-              "    .dataframe tbody tr th:only-of-type {\n",
-              "        vertical-align: middle;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe tbody tr th {\n",
-              "        vertical-align: top;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe thead th {\n",
-              "        text-align: right;\n",
-              "    }\n",
-              "</style>\n",
-              "<table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              "    <tr style=\"text-align: right;\">\n",
-              "      <th></th>\n",
-              "      <th>Dst Port</th>\n",
-              "      <th>Protocol</th>\n",
-              "      <th>Timestamp</th>\n",
-              "      <th>Flow Duration</th>\n",
-              "      <th>Tot Fwd Pkts</th>\n",
-              "      <th>Tot Bwd Pkts</th>\n",
-              "      <th>TotLen Fwd Pkts</th>\n",
-              "      <th>TotLen Bwd Pkts</th>\n",
-              "      <th>Fwd Pkt Len Max</th>\n",
-              "      <th>Fwd Pkt Len Min</th>\n",
-              "      <th>...</th>\n",
-              "      <th>Fwd Seg Size Min</th>\n",
-              "      <th>Active Mean</th>\n",
-              "      <th>Active Std</th>\n",
-              "      <th>Active Max</th>\n",
-              "      <th>Active Min</th>\n",
-              "      <th>Idle Mean</th>\n",
-              "      <th>Idle Std</th>\n",
-              "      <th>Idle Max</th>\n",
-              "      <th>Idle Min</th>\n",
-              "      <th>Label</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <th>0</th>\n",
-              "      <td>22</td>\n",
-              "      <td>6</td>\n",
-              "      <td>1.519288e+09</td>\n",
-              "      <td>20553406</td>\n",
-              "      <td>10</td>\n",
-              "      <td>7</td>\n",
-              "      <td>1063</td>\n",
-              "      <td>1297</td>\n",
-              "      <td>744</td>\n",
-              "      <td>0</td>\n",
-              "      <td>...</td>\n",
-              "      <td>20</td>\n",
-              "      <td>1027304.0</td>\n",
-              "      <td>0.0</td>\n",
-              "      <td>1027304</td>\n",
-              "      <td>1027304</td>\n",
-              "      <td>1.952608e+07</td>\n",
-              "      <td>0.000000e+00</td>\n",
-              "      <td>19526080</td>\n",
-              "      <td>19526080</td>\n",
-              "      <td>Benign</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>1</th>\n",
-              "      <td>34989</td>\n",
-              "      <td>6</td>\n",
-              "      <td>1.519288e+09</td>\n",
-              "      <td>790</td>\n",
-              "      <td>2</td>\n",
-              "      <td>0</td>\n",
-              "      <td>848</td>\n",
-              "      <td>0</td>\n",
-              "      <td>848</td>\n",
-              "      <td>0</td>\n",
-              "      <td>...</td>\n",
-              "      <td>20</td>\n",
-              "      <td>0.0</td>\n",
-              "      <td>0.0</td>\n",
-              "      <td>0</td>\n",
-              "      <td>0</td>\n",
-              "      <td>0.000000e+00</td>\n",
-              "      <td>0.000000e+00</td>\n",
-              "      <td>0</td>\n",
-              "      <td>0</td>\n",
-              "      <td>Benign</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>2</th>\n",
-              "      <td>500</td>\n",
-              "      <td>17</td>\n",
-              "      <td>1.519288e+09</td>\n",
-              "      <td>99745913</td>\n",
-              "      <td>5</td>\n",
-              "      <td>0</td>\n",
-              "      <td>2500</td>\n",
-              "      <td>0</td>\n",
-              "      <td>500</td>\n",
-              "      <td>500</td>\n",
-              "      <td>...</td>\n",
-              "      <td>8</td>\n",
-              "      <td>4000203.0</td>\n",
-              "      <td>0.0</td>\n",
-              "      <td>4000203</td>\n",
-              "      <td>4000203</td>\n",
-              "      <td>3.191524e+07</td>\n",
-              "      <td>3.792787e+07</td>\n",
-              "      <td>75584115</td>\n",
-              "      <td>7200679</td>\n",
-              "      <td>Benign</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>3</th>\n",
-              "      <td>500</td>\n",
-              "      <td>17</td>\n",
-              "      <td>1.519288e+09</td>\n",
-              "      <td>99745913</td>\n",
-              "      <td>5</td>\n",
-              "      <td>0</td>\n",
-              "      <td>2500</td>\n",
-              "      <td>0</td>\n",
-              "      <td>500</td>\n",
-              "      <td>500</td>\n",
-              "      <td>...</td>\n",
-              "      <td>8</td>\n",
-              "      <td>4000189.0</td>\n",
-              "      <td>0.0</td>\n",
-              "      <td>4000189</td>\n",
-              "      <td>4000189</td>\n",
-              "      <td>3.191524e+07</td>\n",
-              "      <td>3.792788e+07</td>\n",
-              "      <td>75584130</td>\n",
-              "      <td>7200693</td>\n",
-              "      <td>Benign</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>4</th>\n",
-              "      <td>500</td>\n",
-              "      <td>17</td>\n",
-              "      <td>1.519288e+09</td>\n",
-              "      <td>89481361</td>\n",
-              "      <td>6</td>\n",
-              "      <td>0</td>\n",
-              "      <td>3000</td>\n",
-              "      <td>0</td>\n",
-              "      <td>500</td>\n",
-              "      <td>500</td>\n",
-              "      <td>...</td>\n",
-              "      <td>8</td>\n",
-              "      <td>4000554.0</td>\n",
-              "      <td>0.0</td>\n",
-              "      <td>4000554</td>\n",
-              "      <td>4000554</td>\n",
-              "      <td>2.137020e+07</td>\n",
-              "      <td>1.528109e+07</td>\n",
-              "      <td>41990741</td>\n",
-              "      <td>7200848</td>\n",
-              "      <td>Benign</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table>\n",
-              "<p>5 rows \u00d7 80 columns</p>\n",
-              "</div>"
-            ],
-            "text/plain": [
-              "   Dst Port  Protocol     Timestamp  Flow Duration  Tot Fwd Pkts  \\\n",
-              "0        22         6  1.519288e+09       20553406            10   \n",
-              "1     34989         6  1.519288e+09            790             2   \n",
-              "2       500        17  1.519288e+09       99745913             5   \n",
-              "3       500        17  1.519288e+09       99745913             5   \n",
-              "4       500        17  1.519288e+09       89481361             6   \n",
-              "\n",
-              "   Tot Bwd Pkts  TotLen Fwd Pkts  TotLen Bwd Pkts  Fwd Pkt Len Max  \\\n",
-              "0             7             1063             1297              744   \n",
-              "1             0              848                0              848   \n",
-              "2             0             2500                0              500   \n",
-              "3             0             2500                0              500   \n",
-              "4             0             3000                0              500   \n",
-              "\n",
-              "   Fwd Pkt Len Min  ...  Fwd Seg Size Min  Active Mean  Active Std  \\\n",
-              "0                0  ...                20    1027304.0         0.0   \n",
-              "1                0  ...                20          0.0         0.0   \n",
-              "2              500  ...                 8    4000203.0         0.0   \n",
-              "3              500  ...                 8    4000189.0         0.0   \n",
-              "4              500  ...                 8    4000554.0         0.0   \n",
-              "\n",
-              "   Active Max  Active Min     Idle Mean      Idle Std  Idle Max  Idle Min  \\\n",
-              "0     1027304     1027304  1.952608e+07  0.000000e+00  19526080  19526080   \n",
-              "1           0           0  0.000000e+00  0.000000e+00         0         0   \n",
-              "2     4000203     4000203  3.191524e+07  3.792787e+07  75584115   7200679   \n",
-              "3     4000189     4000189  3.191524e+07  3.792788e+07  75584130   7200693   \n",
-              "4     4000554     4000554  2.137020e+07  1.528109e+07  41990741   7200848   \n",
-              "\n",
-              "    Label  \n",
-              "0  Benign  \n",
-              "1  Benign  \n",
-              "2  Benign  \n",
-              "3  Benign  \n",
-              "4  Benign  \n",
-              "\n",
-              "[5 rows x 80 columns]"
-            ]
-          },
-          "execution_count": 71,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "data_22_cleaned = pd.read_csv('result22022018.csv')\n",
-        "data_22_cleaned.head()"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "262mxbQDYTNY",
-        "outputId": "7540316e-2c26-49ad-a487-85a4fe0bd84c"
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "Benign              1042603\n",
-              "Brute Force -Web        249\n",
-              "Brute Force -XSS         79\n",
-              "SQL Injection            34\n",
-              "Name: Label, dtype: int64"
-            ]
-          },
-          "execution_count": 72,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "data_22_cleaned.Label.value_counts()"
-      ]
-    },
-    {
-      "attachments": {},
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "P6nJdtveYTNY"
-      },
-      "source": [
-        "Let's define a sample that will include all different types of web attacks for this specific date."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "N7vwZk6HYTNY",
-        "outputId": "96c03fc3-c086-4398-b58a-2c5c57eb4ded"
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "Benign              1638\n",
-              "Brute Force -Web     249\n",
-              "Brute Force -XSS      79\n",
-              "SQL Injection         34\n",
-              "Name: Label, dtype: int64"
-            ]
-          },
-          "execution_count": 73,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "data_sample = data_22_cleaned[-2000:]\n",
-        "data_sample.Label.value_counts()"
-      ]
-    },
-    {
-      "attachments": {},
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "neSxNwYckGMK"
-      },
-      "source": [
-        "Now, we will query the test dataset and save predicted and expected results to create a confusion matrix."
-      ]
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pinecone-io/examples/blob/master/learn/security/it-threat-detection.ipynb) [![Open nbviewer](https://raw.githubusercontent.com/pinecone-io/examples/master/assets/nbviewer-shield.svg)](https://nbviewer.org/github/pinecone-io/examples/blob/master/learn/security/it-threat-detection.ipynb)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "b4Yv1jeGywpL"
+   },
+   "source": [
+    "## IT Threat Detection With Similarity Search\n",
+    "\n",
+    "This notebook shows how to use Pinecone's similarity search as a service to build an application for detecting rare events. Such application is common in cyber-security and fraud detection domains wherein only a tiny fraction of the events are malicious. \n",
+    "\n",
+    "Here we will build a network intrusion detector. Network intrusion detection systems monitor incoming and outgoing network traffic flow, raising alarms whenever a threat is detected. Here we use a deep-learning model and similarity search in detecting and classifying network intrusion traffic.\n",
+    "\n",
+    "We will start by indexing a set of labeled traffic events in the form of vector embeddings. Each event is either benign or malicious. The vector embeddings are rich, mathematical representations of the network traffic events. It is making it possible to determine how similar the network events are to one another using similarity-search algorithms built into Pinecone. Here we will transform network traffic events into vectors using a deep learning model from recent academic work.\n",
+    "\n",
+    "\n",
+    "We will then take some new (unseen) network events and search through the index to find the most similar matches, along with their labels. In such a way, we will propagate the matched labels to classify the unseen events as benign or malicious. Mind that the intrusion detection task is a challenging classification task because malicious events are sporadic. The similarity search service helps us sift the most relevant historical labeled events. That way, we identify these rare events while keeping a low rate of false alarms. \n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "1OUSClcPhU4j"
+   },
+   "source": [
+    "## Setting up Pinecone\n",
+    "\n",
+    "We will first install and initialize Pinecone. You can get your [API Key here](https://app.pinecone.io)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "251n1avKzCrm"
+   },
+   "outputs": [],
+   "source": [
+    "!pip install -qU pinecone"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "_cGTuY8dywpV",
+    "outputId": "da0b57bf-bde2-401f-e502-6ad80f7fc6cc"
+   },
+   "outputs": [
     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 136,
-          "referenced_widgets": [
-            "567d04116b1b4d21bac2348535b750a0",
-            "8f92a228cbc54b30bcb22d0598e9577f",
-            "8fe529938a4f40ac905e145e423d856e",
-            "a4b449113a734e90825bc1ef87ca3d3c",
-            "cda25dc5f86344b3850c92e59085c06c",
-            "3586c81ff82048ed80d69b7a4b5bd6b3",
-            "c427de521f054c4997d69586251bed4f",
-            "90d8d1d9da814d90b0f0bf331102d4df",
-            "7f2cabefd9eb4674a63ef3d56a5be122",
-            "dda94f4a1ea946b7996a928374dda4a5",
-            "ea6f763369cd4b478998ea4d3e8f20e6",
-            "e752273786584dd4baa77ac3f4528849",
-            "aef6058200fa454c90f51760685e25db",
-            "87b4d4b12452401cb82285364dae3576",
-            "8a60896e0288471a91089a03a75b210b",
-            "fe75c2a93c6e4619a2731a4c01a71619",
-            "5c64d617d0d94a6a9797d20f0d1e80f3",
-            "25ac309f2f5d43169ed0bda88300c8d2",
-            "9ebe5f94bbb743058602c9af26cd4eaf",
-            "3db0b4717ac140b78bf7d75e7ebeaf39",
-            "fcc29e1c6b304044a53522c550c4b49d",
-            "cd9e8a060d05491f8c4d74871c9560fa"
-          ]
-        },
-        "id": "8u6cg_1tYTNY",
-        "outputId": "dbe81cb3-88fb-4cd9-91fe-773c960ca108",
-        "scrolled": true
-      },
-      "outputs": [
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 20/20 [10:48<00:00, 32.44s/it]\n"
-          ]
-        }
-      ],
-      "source": [
-        "y_true = []\n",
-        "y_pred = []\n",
-        "\n",
-        "BATCH_SIZE = 100\n",
-        "\n",
-        "for i in tqdm(range(0, len(data_sample), BATCH_SIZE)):\n",
-        "    test_data = data_sample.iloc[i:i+BATCH_SIZE, :]\n",
-        "    \n",
-        "    # Create vector embedding using the model\n",
-        "    test_vector = intermediate_layer_model.predict(K.constant(test_data.iloc[:, :-1]))\n",
-        "    # Query using the vector embedding\n",
-        "    query_results = []\n",
-        "\n",
-        "    for xq in test_vector.tolist():\n",
-        "        query_res = index.query(vector=xq, top_k=50)\n",
-        "        query_results.append(query_res)\n",
-        "    \n",
-        "    ids = [res.id for result in query_results for res in result.matches]\n",
-        "    \n",
-        "    for label, res in zip(test_data.Label.values, query_results):\n",
-        "        # Add to the true list\n",
-        "        if label == 'Benign':\n",
-        "            y_true.append(0)\n",
-        "        else:\n",
-        "            y_true.append(1)\n",
-        "        \n",
-        "        counter = Counter(match.id.split('_')[0] for match in res.matches)\n",
-        "\n",
-        "        # Add to the predicted list\n",
-        "        if counter['Bru'] or counter['SQL']:\n",
-        "            y_pred.append(1)\n",
-        "        else:\n",
-        "            y_pred.append(0)\n"
+     "data": {
+      "text/plain": [
+       "[]"
       ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 313
-        },
-        "id": "HV3-gkdWYTNZ",
-        "outputId": "20103ea7-713b-4e2b-9590-ecc70ef9b76e"
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "[Text(0, 0.5, 'Benign'), Text(0, 1.5, 'Attack')]"
-            ]
-          },
-          "execution_count": 83,
-          "metadata": {},
-          "output_type": "execute_result"
-        },
-        {
-          "data": {
-            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAEWCAYAAABliCz2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAbKUlEQVR4nO3debxVdb3/8debc5RBQEZxKBAcILUUtX5eS1IzE6+aM6JlGE455HXWMBwoLbNBrzl3zTmnLEwihyupZDeQxCEpUqIUQTgCKjId+Pz+WOvQ5sg5Z4t7nQ3n+34+HufBXsNe38+Gzfus/V3f/V2KCMzMrO1rV+0CzMysdTjwzcwS4cA3M0uEA9/MLBEOfDOzRDjwzcwS4cC3NkNSR0kPS1oo6f6PcJxjJD1aydqqQdJvJX2t2nXYusOBb61O0tGSJkt6T9KbeTB9rgKHPhzoA/SMiCPW9iARcVdE7FuBelYjaU9JIemXjdbvmK+fUOZxLpF0Z0v7RcTQiLhtLcu1NsiBb61K0lnAT4DLycK5L3Ad8OUKHL4f8LeIqK/AsYoyF9hdUs+SdV8D/lapBpTx/237AL8prNVI2hi4DDg1In4ZEYsiYnlEPBwR5+b7tJf0E0mz8p+fSGqfb9tT0uuSzpb0Vv7p4Lh826XAaGBY/slhZOMzYUlb5mfStfnyCEmvSXpX0gxJx5Ssf6bkebtLmpR3FU2StHvJtgmSxkiamB/nUUm9mvlrWAb8Cjgqf34NcCRwV6O/q6sl/UvSO5Kek7RHvn4/4Fslr3NqSR3flTQReB8YkK87Pt9+vaQHSo7/fUlPSFLZ/4C23nPgW2v6D6AD8FAz+4wCdgN2AnYEPgNcVLJ9U2BjYAtgJPBTSd0j4mKyTw33RkTniPhZc4VI2gi4BhgaEV2A3YHn17BfD+CRfN+ewI+ARxqdoR8NHAdsAmwInNNc28DtwLH54y8BLwOzGu0ziezvoAdwN3C/pA4RMb7R69yx5DlfBU4EugAzGx3vbOBT+S+zPcj+7r4WnlslKQ58a009gXktdLkcA1wWEW9FxFzgUrIga7A83748IsYB7wED17KelcAOkjpGxJsR8fIa9vlPYHpE3BER9RFxDzANOLBkn1sj4m8RsRi4jyyomxQRfwB6SBpIFvy3r2GfOyOiLm/zh0B7Wn6dP4+Il/PnLG90vPeBr5D9wroTOD0iXm/heNbGOPCtNdUBvRq6VJqwOaufnc7M1606RqNfGO8DnT9sIRGxCBgGnAy8KekRSYPKqKehpi1KlmevRT13AKcBe7GGTzx5t9UreTfSArJPNc11FQH8q7mNEfEn4DVAZL+YLDEOfGtNzwJLgIOb2WcW2cXXBn35YHdHuRYBnUqWNy3dGBG/i4gvApuRnbXfXEY9DTW9sZY1NbgDOAUYl599r5J3uZxP1rffPSK6AQvJghqgqW6YZrtnJJ1K9klhFnDe2pdu6ysHvrWaiFhIdmH1p5IOltRJ0gaShkq6Mt/tHuAiSb3zi5+jybog1sbzwBBJffMLxhc2bJDUR9JBeV/+UrKuoRVrOMY4YNt8KGmtpGHAdsBv1rImACJiBvB5smsWjXUB6slG9NRKGg10Ldk+B9jyw4zEkbQt8B2ybp2vAudJarbrydoeB761qoj4EXAW2YXYuWTdEKeRjVyBLJQmAy8ALwJT8nVr09ZjwL35sZ5j9ZBuR3YhcxbwNln4nrKGY9QBB+T71pGdGR8QEfPWpqZGx34mItb06eV3wG/JhmrOJPtUVNpd0/ClsjpJU1pqJ+9CuxP4fkRMjYjpZCN97mgYAWVpkC/Sm5mlwWf4ZmaJcOCbmSXCgW9mlggHvplZIpr7AkxVdRx8mq8m2zpp/qRrq12CWZM61NLk/Eg+wzczS4QD38wsEQ58M7NEOPDNzBLhwDczS4QD38wsEQ58M7NEOPDNzBLhwDczS4QD38wsEQ58M7NEOPDNzBLhwDczS4QD38wsEQ58M7NEOPDNzBLhwDczS4QD38wsEQ58M7NEOPDNzBLhwDczS4QD38wsEQ58M7NEOPDNzBLhwDczS4QD38wsEQ58M7NEOPDNzBLhwDczS4QD38wsEQ58M7NEOPDNzBLhwDczS4QD38wsEQ58M7NEOPDNzBLhwDczS4QD38wsEQ58M7NE1BbdgKQaoE9pWxHxz6LbNTOz1RUa+JJOBy4G5gAr89UBfKrIds3M7IOKPsM/AxgYEXUFt2NmZi0oug//X8DCgtswM7MyFH2G/xowQdIjwNKGlRHxo4LbNTOzRooO/H/mPxvmP2ZmViWFBn5EXFrk8c3MrHxFj9J5mGxUTqmFwGTgxohYUmT7Zmb2b0VftH0NeA+4Of95h2yI5rb5spmZtZKi+/AHR8SQkuWHJT0VEUMkvVxw22ZmVqLoM/zekvo2LOSPe+WLywpu28zMShR9hn828IykVwEB/YFTJG0E3FZw223aDRcfw9AhOzD37XfZ9YjLV63/xlGf5+RhQ6hfsZLxT7/EqKt/za7b9+Pabw8HQILv3jCOsU++QOdO7Xn8f85c9dwtNunGL8ZN4tyrHmz112PpGX3RhTz1+wn06NGTX/76N9UuJwmKaHxNtcINSO2BQWSBP63cC7UdB59WbGHruc/uvBWL3l/KLWOOXRX4Q3bdhvOP/xKHnH4Dy5bX07t7Z+bOf4+OHTZg2fIVrFixkk17deX/7r2QAfuOYsWKlasdc+Jd53HeDx9k4pRXq/GS1hvzJ11b7RLahOcmT6JTp06MuvB8B34FdahFTW0r5Axf0t4R8b+SDm20aYAkIuKXRbSbkolTXqXvZj1WW3fiEXtw1a2PsWx5PQBz578HwOIly1ft037DDVjTL/mt+vZmkx5dHPbWanbZ9dO88cbr1S4jKUV16Xwe+F/gwDVsC8CBX4Ct+23CZwdvxaWnHsiSZcu58EcP8dxfsolJP71DP2645Cv03awHIy+67QNn90futwsPPDqlGmWbWSspJPAj4uL8z+M+zPMknQicCFD7sT2p7bV9AdW1XbU17ejetRNDjr2KXbfvx51Xfp1PHHAJAJNemskuh3+Xgf37cMtlX+V3E//C0mX1q557xJd2YeRFt1epcjNrDUV/8ao9cBiwJavPh3/ZmvaPiJuAm8B9+GvjjTkL+NUTUwGY/PJMVq4MenXvzLy8awfgrzPmsGjxMrbfenOm5Gf/n9x2C2pravjzK/+qSt1m1jqKHpb5a+DLQD2wqOTHCvDwhBfY8zPbArB1303YcINa5s1/j36b96SmJvun7rtZd7bdsg8zZ/17xuoj99uF+8ZPrkrNZtZ6ih6W+bGI2K/gNpJ02xUj2GOXbejVrTN/Hz+GMTeM47ZfPcuNlxzD5Pu/xbLlKzh+9B0A7D54AOccty/L61ewcmVwxuX3Urfg3793D/vizhx8+vXVeimWqPPPOYvJk/7EggXz+eLeQ/jGqadz6GFHVLusNq3QYZmSbgL+OyJe/LDPdZeOras8LNPWZa0+LLPE54ARkmaQzYcvICLCtzg0M2tlRQf+0IKPb2ZmZSr0om1EzAQ+DuydP36/6DbNzGzNCg1fSRcD5wMX5qs2AO4ssk0zM1uzos+2DwEOIh+KGRGzgC4Ft2lmZmtQdOAvi2wYUADks2SamVkVFB3490m6Eegm6QTgcXynKzOzqij6JuZXSfoi2a0NBwKjI+KxIts0M7M1K3pYJnnAPyapF1DX0v5mZlaMQrp0JO0maYKkX0oaLOkl4CVgjiRPtWBmVgVFneFfC3wL2JhsXvyhEfFHSYOAe4DxBbVrZmZNKOqibW1EPBoR9wOzI+KPABExraD2zMysBUUFfuntlBY32uZJ0czMqqCoLp0dJb1DNllax/wx+XKHgto0M7NmFHWLw5oijmtmZmvPE5mZmSXCgW9mlggHvplZIhz4ZmaJcOCbmSXCgW9mlggHvplZIhz4ZmaJcOCbmSXCgW9mlggHvplZIhz4ZmaJcOCbmSXCgW9mlggHvplZIhz4ZmaJcOCbmSXCgW9mlogmb3EoaefmnhgRUypfjpmZFaW5e9r+sJltAexd4VrMzKxATQZ+ROzVmoWYmVmxmjvDX0XSDsB2QIeGdRFxe1FFmZlZ5bUY+JIuBvYkC/xxwFDgGcCBb2a2HilnlM7hwBeA2RFxHLAj0L7QqszMrOLKCfzFEbESqJfUFXgLGFBsWWZmVmnl9OFPltQNuBl4DngP+FOhVZmZWcW1GPgRcUr+8AZJ44GuEfFCsWWZmVmllXPRdsia1kXEU8WUZGZmRSinS+fckscdgM+Qde34i1dmZuuRcrp0DixdlvRx4MrCKjIzs0KszeRprwM7VLoQMzMrVjl9+P9NNncOZL8gdgKmFlmUmZlVXlnDMkse1wP3RMTEguoxM7OClBP43SLi6tIVks5ovM7MzNZtiojmd5CmRMTOjdb9OSIGF1nYrAXLmi/MrEqW1q+sdglmTerfq4Oa2tbcDVCGA0cD/SWNLdnUBairXHlmZtYamuvS+QPwJtCL1W+G8i7gb9qama1nmrsBykxgpqRjgFkRsQRAUkfgY8A/WqVCMzOriHLG4d8HlHZargDuL6YcMzMrSjmBXxsRyxoW8scbFleSmZkVoZzAnyvpoIYFSV8G5hVXkpmZFaGccfgnA3dJujZffh04triSzMysCOVMnvYqsJukzmTj9t8tviwzM6u0Frt0JF0uqVtEvBcR70rqLuk7rVGcmZlVTjl9+EMjYkHDQkTMB/YvriQzMytCOYFfI6l9w0I+Dr99M/ubmdk6qJyLtncCT0i6NV8+DrituJLMzKwI5Vy0vVLSC8A+gIDxQL+iCzMzs8oq945Xs8m+bXsY8AXglcIqMjOzQjQ3W+a2wFHAcLLZMe8lG5a5VyvVZmZmFdRcl8404GngwIj4O4CkM1ulKjMzq7jmunQOI+vKeVLSzZK+QNaHb2Zm66EmAz8iHoqIYcAgYAJwJtBH0vWS9m2l+szMrEJavGgbEYsi4q6IOIBsHvzngQsKr8zMzCqqxXvaVovvaWvrKt/T1tZlzd3TttxhmWZmtp5z4JuZJcKBb2aWCAe+mVkiHPhmZolw4JuZJcKBb2aWCAe+mVkiHPhmZolw4JuZJcKBb2aWCAe+mVkiHPhmZolw4JuZJcKBb2aWCAe+mVkiHPhmZolw4JuZJcKBb2aWCAe+mVkiCgt8SSPXsO57RbVnZmbNqy3w2IdLWhIRdwFIug5oX2B7ZmbWjCID/1BgrKSVwFDg7Yg4pcD2zMysGRUPfEk9ShaPB34FTAQuk9QjIt6udJtmZtYyRURlDyjNAAJQyZ8NIiIGlHOcWQuWVbYwswpZWr+y2iWYNal/rw5qalvFz/Ajon+lj2lmZh9dkaN0TpXUrWS5uyT34ZuZVUmR4/BPiIgFDQsRMR84ocD2zMysGUUGfjtJq/qSJNUAGxbYnpmZNaPIYZm/A+6TdAPZxduTgfEFtpes74/5Nn+c+BTduvfg1nseAuDnN1/HI79+kI27dQfg+G98k90+O4RXXn6RH15xKQARwYgTTmGPPb9QtdqtbZs7ZzY/GDOK+W/XIYn9v3w4Bx95DO++s5DLv30ec2bPos+mm/OtMT+gS9euLF++nGuuvIzp0/6C2rXj5DPOY8edP13tl9FmVHyUzqoDS+2Ak4AvkI3UeRS4JSJWlPN8j9Ip39Q/T6Zjx05ccemo1QK/Y8dODPvKiNX2XbJkMRvUbkBNbS118+Zy/FcO54HfPEFNbZG/+9sWj9IpX928ubxdN49tBn6C9xct4vSRRzH6ip/w2LixdOnalWFfHcm9d/yM9959h5GnnMnYB3/B9Gkvc/aoMSyYX8dFZ5/KNbfcTbt2ngWmXM2N0insbzEiVkbE9RFxeEQcFhE3lhv29uHsOHhXunbduKx9O3TouCrcly1bSpPvDLMK6NmrN9sM/AQAnTbaiI/3G0Dd3Ld49ukn2WfoQQDsM/Qg/vDUkwD88x+vsdOu/w+Abt170rlzF6ZPe7k6xbdBRY7S2UbSA5L+Ium1hp+i2rMPeuiBexh5zKF8f8y3efedhavW/+WlFxhx1MF8/ehDOfOC0T67t1Yx+803eHX6NAZu/0kWzH+bnr16A9kvhYULsu9jDth6W559egIr6uuZPet1pv/1FebOmVPNstuUIj8n3QpcD9QDewG3A3c09wRJJ0qaLGnynT+/pcDS2r6DDj2Sux4cx813PEDPXr257uqrVm3bbodP8fNf/Iobbv0Fd992C8uWLq1ipZaCxe+/z3dGnc1J3zyXjTbq3OR+X/rPg+nduw+njzyaG67+AdvtsCM1tTWtWGnbVuSpXceIeEKSImImcImkp4GLm3pCRNwE3ATuw/+oevTsterxAV8+jAvPPu0D+/TrP4AOHToy47W/M/AT27dmeZaQ+vrljBl1Fnvtuz+f23MfALp170HdvLn07NWbunlz2bhbNiNLTW0tJ51x7qrnnnnSsWz+sb5VqbstKvIMf0l+4Xa6pNMkHQJsUmB7VqJu3txVj5/+/RP0H7A1AG/Oep0V9fUAzH5zFv/65z/YdLPNq1KjtX0RwY+vuIS+/QZw2FHHrlq/2+f25PHfjgXg8d+O5T/22AvIBhUsWfw+AFP+9Cw1NTX0679V6xfeRhU5SufTwCtAN2AM0BW4MiL+r5zn+wy/fGMuOo/np0xi4YIFdO/RgxEnnsrU5ybx9+nTkMSmm23BWReMpmev3jw67mHuvv1n1NbW0q5dO44deRKf+7yHZX4YHqVTvpemTuGcU45jy622oZ2y88sRJ53OoO0/yeXfPpe35sxmkz6bMuo7V9Gl68bMfvMNRp35Ddq1a0fP3ptw5oWX0GdTn5B8GM2N0iky8I+IiPtbWtcUB76tqxz4ti6ryrBM4MIy15mZWSsoYj78ocD+wBaSrinZ1JVsxI6ZmVVBEaN0ZgGTgYOA50rWvwucWUB7ZmZWhiLmw58KTJXUJyJuK90m6Qzg6kq3aWZmLSuyD/+oNawbUWB7ZmbWjCL68IcDRwP9JY0t2dQVmFfp9szMrDxF9OH/AXgT6AX8sGR9AMMKaM/MzMpQRB/+TGAm8B+SdiI72z8SmAE8WOn2zMysPEV06WxL1n8/HKgD7iX7gtdelW7LzMzKV0SXzjTgaeDAiPg7gCQPxzQzq7IiRukcBswGnpR0s6SGO16ZmVkVVTzwI+KhiBgGDAImkH3Zqo+k6yXtW+n2zMysPEXe4nBRRNwVEQcAHwOeBy4oqj0zM2teYbNlflSeLdPWVZ4t09Zl1Zot08zM1iEOfDOzRDjwzcwS4cA3M0uEA9/MLBEOfDOzRDjwzcwS4cA3M0uEA9/MLBEOfDOzRDjwzcwS4cA3M0uEA9/MLBEOfDOzRDjwzcwS4cA3M0uEA9/MLBEOfDOzRDjwzcwS4cA3M0uEA9/MLBEOfDOzRDjwzcwS4cA3M0uEA9/MLBEOfDOzRDjwzcwS4cA3M0uEA9/MLBEOfDOzRDjwzcwS4cA3M0uEA9/MLBEOfDOzRDjwzcwS4cA3M0uEIqLaNVgrkHRiRNxU7TrMGvN7s/X4DD8dJ1a7ALMm+L3ZShz4ZmaJcOCbmSXCgZ8O95HausrvzVbii7ZmZonwGb6ZWSIc+GZmiXDgrwckrZD0vKSpkqZI2v0jHOsySftUsj4zAEmHSApJg/LlLSUdXbJ9J0n7f4Tj/0NSr0rUmioH/vphcUTsFBE7AhcCV6ztgSJidEQ8XrnSzFYZDjwDHJUvbwkcXbJ9J2CtA98+Ogf++qcrML9hQdK5kiZJekHSpfm6LSW9IulmSS9LelRSx3zbzyUdnj/eX9I0Sc9IukbSb/L1l0j6H0kTJL0m6ZtVeJ22HpHUGfgsMJJ/B/73gD3yT6fnA5cBw/LlYZI+I+kPkv6c/zkwP1aNpKskvZi/r09v1FZHSeMlndCKL7FNqK12AVaWjpKeBzoAmwF7A0jaF9gG+AwgYKykIcA/8/XDI+IESfcBhwF3NhxQUgfgRmBIRMyQdE+jNgcBewFdgL9Kuj4ilhf5Im29djAwPiL+JultSTsDFwDnRMQBAJLmALtGxGn5cley91993s14Odn79ESgPzA439ajpJ3OwC+A2yPi9lZ7dW2Ez/DXDw1dOoOA/YDbJQnYN//5MzCFLKS3yZ8zIyKezx8/R/bxutQg4LWImJEvNw78RyJiaUTMA94C+lTyBVmbM5wsiMn/HF7GczYG7pf0EvBjYPt8/T7ADRFRDxARb5c859fArQ77teMz/PVMRDybX7jqTXZWf0VE3Fi6j6QtgaUlq1YAHRsdSi001fj5fq/YGknqSfapcwdJAdQAAYxr4aljgCcj4pD8PTuh4ZD589dkIjBU0t3hLxF9aD7DX8/kIyBqgDrgd8DX8/5TJG0haZMyDzUNGJD/RwMYVuFSLR2Hk3Wx9IuILSPi48AMYCVZl2CDdxstbwy8kT8eUbL+UeBkSbUAjbp0RpO996+r6CtIhAN//dAxv9D1PHAv8LWIWBERjwJ3A89KehF4gNX/QzUpIhYDpwDjJT0DzAEWFlO+tXHDgYcarXuQ7OJtfT6c+EzgSWC7hou2wJXAFZImkp3ENLiF7DrUC5KmsvpIH4D/AjpIurKA19KmeWqFhEnqHBHv5dcDfgpMj4gfV7suMyuGz/DTdkL+qeFlso/XN7awv5mtx3yGb2aWCJ/hm5klwoFvZpYIB76ZWSIc+NZmlcwy+pKk+yV1+gjH2rNkrqGDJF3QzL7dJJ2yFm1cIumcta3RrCUOfGvLGqak2AFYBpxculGZD/1/ICLGRsT3mtmlG9l3HMzWKQ58S8XTwNYlM4leRzb/0Mcl7Svp2fxeA/eXfHN5v4bZRIFDGw4kaYSka/PHfSQ9lH+5aKqyexV8D9gq/3Txg3y/D8xqmq8fJemvkh4HBrba34YlyYFvbV7+Ff2hwIv5qoFkUwEMBhYBFwH7RMTOwGTgrHw20ZuBA4E9gE2bOPw1wO/zexXsTPadhguAV/NPF+c2mtV0J2AXSUMk7UL2bdTBZL9QPl3hl262Gk+IZW1Zw7TSkJ3h/wzYHJgZEX/M1+8GbAdMzL5wzIbAs2Szic6IiOkAku4km7a3sb2BYwEiYgWwUFL3RvuUzmoK2RS/25BNg/FQRLyftzH2I71asxY48K0tWxwRO5WuyEN9Uekq4LGIGN5ov51oesbGD6upWU3/q4JtmLXIXTqWuj8Cn5W0NYCkTpK2JZtNtL+krfL9mprf/QngG/lza/KbejSeFbKpWU2fAg7J7+DUhaz7yKwwDnxLWkTMJZua9x5JL5D9AhgUEUvIunAeyS/azmziEGcAe+WzlT4HbB8RdWRdRC9J+kFTs5pGxBSy2U+fJ5td8unCXqgZnkvHzCwZPsM3M0uEA9/MLBEOfDOzRDjwzcwS4cA3M0uEA9/MLBEOfDOzRPx/QQOreDywqpcAAAAASUVORK5CYII=",
-            "text/plain": [
-              "<Figure size 432x288 with 1 Axes>"
-            ]
-          },
-          "metadata": {
-            "needs_background": "light"
-          },
-          "output_type": "display_data"
-        }
+     },
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import os\n",
+    "from getpass import getpass\n",
+    "from pinecone import Pinecone\n",
+    "\n",
+    "# initialize connection to pinecone (get API key at app.pinecone.io)\n",
+    "api_key = os.environ.get(\"PINECONE_API_KEY\") or getpass(\"Enter your Pinecone API key: \")\n",
+    "\n",
+    "# configure client\n",
+    "pc = Pinecone(api_key=api_key)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "a7ysNAlrjD_k"
+   },
+   "source": [
+    "## Installing other dependencies"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "r3g-b61IywpQ"
+   },
+   "outputs": [],
+   "source": [
+    "!pip install -qU pip python-dateutil tensorflow scikit-learn matplotlib seaborn"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "0uuHcP-WywpQ"
+   },
+   "outputs": [],
+   "source": [
+    "from collections import Counter\n",
+    "import matplotlib.pyplot as plt\n",
+    "import pandas as pd\n",
+    "import seaborn as sns\n",
+    "from tensorflow import keras\n",
+    "from tensorflow.keras.models import Model\n",
+    "import tensorflow.keras.backend as K\n",
+    "from sklearn.metrics import accuracy_score, precision_score, recall_score\n",
+    "from sklearn.metrics import confusion_matrix"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "lEwqF1osp83o"
+   },
+   "source": [
+    "We will use some of the code from an [academic work on deep learning for intrusion detection](https://github.com/rambasnet/DeepLearning-IDS). Let's clone the repository that we will use to prepare data."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "FUDeovNiywpT"
+   },
+   "outputs": [],
+   "source": [
+    "!git clone -q https://github.com/rambasnet/DeepLearning-IDS.git "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "mc4ERmwniO1H"
+   },
+   "source": [
+    "## Define a New Pinecone Index"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now we setup our index specification, this allows us to define the cloud provider and region where we want to deploy our index. You can find a list of all [available providers and regions here](https://docs.pinecone.io/guides/get-started/understanding-organizations#regions)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pinecone import ServerlessSpec\n",
+    "\n",
+    "cloud = os.environ.get(\"PINECONE_CLOUD\") or \"aws\"\n",
+    "region = os.environ.get(\"PINECONE_REGION\") or \"us-east-1\"\n",
+    "\n",
+    "spec = ServerlessSpec(cloud=cloud, region=region)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Create the index:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "9_TIgYxBywpV"
+   },
+   "outputs": [],
+   "source": [
+    "# Pick a name for the new service\n",
+    "index_name = \"it-threats\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import time\n",
+    "\n",
+    "# check if index already exists (it shouldn't if this is first time)\n",
+    "if not pc.has_index(index_name):\n",
+    "    # if does not exist, create index\n",
+    "    pc.create_index(name=index_name, dimension=128, metric=\"euclidean\", spec=spec)\n",
+    "    # wait for index to be initialized\n",
+    "    while not pc.describe_index(index_name).status[\"ready\"]:\n",
+    "        time.sleep(1)\n",
+    "\n",
+    "# connect to index\n",
+    "index = pc.Index(index_name)\n",
+    "# view index stats\n",
+    "index.describe_index_stats()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "IOP9jCo5ywpX"
+   },
+   "source": [
+    "## Upload\n",
+    "Here we transform network events into vector embeddings, then upload them into Pinecone's vector index. "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "PzkJJd8ZYTNM"
+   },
+   "source": [
+    "### Prepare Data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "-N0bh6dUYTNN"
+   },
+   "source": [
+    "The datasets we use consist of benign (normal) network traffic and malicious traffic\n",
+    "generated from several different network attacks. We will focus on web attacks only. \n",
+    "\n",
+    "The web attack category consists of three common attacks: \n",
+    "- Cross-site scripting (BruteForce-XSS), \n",
+    "- SQL-Injection (SQL-Injection), \n",
+    "- Brute force administrative and user passwords (BruteForce-Web)\n",
+    "\n",
+    "The original data was recorded over two days."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "vuOLNAehYTNN"
+   },
+   "source": [
+    "**Download data for 22-02-2018 and 23-02-2018**\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "pCrwPkDJYTNO"
+   },
+   "source": [
+    "Files should be downloaded to the current directory. We will be using one date for training and generating vectors, and another one for testing."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "HshKyF0KywpR",
+    "outputId": "d5b3ceee-b584-47e2-a428-43e5c835968f"
+   },
+   "outputs": [
+    {
+     "output_type": "stream",
+     "text": [
+      "Thursday-22-02-2018 100%[===================>] 364.91M  3.07MB/s    in 2m 6s   \n",
+      "Friday-23-02-2018_T 100%[===================>] 365.10M  3.07MB/s    in 1m 53s  \n"
+     ]
+    }
+   ],
+   "source": [
+    "!wget \"https://cse-cic-ids2018.s3.ca-central-1.amazonaws.com/Processed%20Traffic%20Data%20for%20ML%20Algorithms/Thursday-22-02-2018_TrafficForML_CICFlowMeter.csv\" -q --show-progress\n",
+    "!wget \"https://cse-cic-ids2018.s3.ca-central-1.amazonaws.com/Processed%20Traffic%20Data%20for%20ML%20Algorithms/Friday-23-02-2018_TrafficForML_CICFlowMeter.csv\" -q --show-progress"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "gr-hay4rfk0d"
+   },
+   "source": [
+    "Let's look at the data events first."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "bSSZ4YCZywpT",
+    "outputId": "27d111b0-c0c1-4694-9e07-65965d2014f1"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Benign              1048009\n",
+       "Brute Force -Web        362\n",
+       "Brute Force -XSS        151\n",
+       "SQL Injection            53\n",
+       "Name: Label, dtype: int64"
+      ]
+     },
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data = pd.read_csv(\"Friday-23-02-2018_TrafficForML_CICFlowMeter.csv\")\n",
+    "data.Label.value_counts()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "vYl82iXeywpT"
+   },
+   "source": [
+    "**Clean the data** using a python script from the cloned repository."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "y-trCZOcywpT",
+    "outputId": "c51b5e77-4e66-47a3-c4f6-d492a7dfaef8"
+   },
+   "outputs": [
+    {
+     "output_type": "stream",
+     "text": [
+      "cleaning Friday-23-02-2018_TrafficForML_CICFlowMeter.csv\n",
+      "total rows read = 1048576\n",
+      "all done writing 1042868 rows; dropped 5708 rows\n"
+     ]
+    }
+   ],
+   "source": [
+    "!python DeepLearning-IDS/data_cleanup.py \"Friday-23-02-2018_TrafficForML_CICFlowMeter.csv\" \"result23022018\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "oKHL8HCjYTNQ"
+   },
+   "source": [
+    "Load the file that you got from the previous step."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 278
+    },
+    "id": "uzH6_tBpywpU",
+    "outputId": "017263dd-8e2a-45a4-9693-5b5b763197ce"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Dst Port</th>\n",
+       "      <th>Protocol</th>\n",
+       "      <th>Timestamp</th>\n",
+       "      <th>Flow Duration</th>\n",
+       "      <th>Tot Fwd Pkts</th>\n",
+       "      <th>Tot Bwd Pkts</th>\n",
+       "      <th>TotLen Fwd Pkts</th>\n",
+       "      <th>TotLen Bwd Pkts</th>\n",
+       "      <th>Fwd Pkt Len Max</th>\n",
+       "      <th>Fwd Pkt Len Min</th>\n",
+       "      <th>...</th>\n",
+       "      <th>Fwd Seg Size Min</th>\n",
+       "      <th>Active Mean</th>\n",
+       "      <th>Active Std</th>\n",
+       "      <th>Active Max</th>\n",
+       "      <th>Active Min</th>\n",
+       "      <th>Idle Mean</th>\n",
+       "      <th>Idle Std</th>\n",
+       "      <th>Idle Max</th>\n",
+       "      <th>Idle Min</th>\n",
+       "      <th>Label</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>22</td>\n",
+       "      <td>6</td>\n",
+       "      <td>1.519374e+09</td>\n",
+       "      <td>1532698</td>\n",
+       "      <td>11</td>\n",
+       "      <td>11</td>\n",
+       "      <td>1179</td>\n",
+       "      <td>1969</td>\n",
+       "      <td>648</td>\n",
+       "      <td>0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>32</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.000000e+00</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>Benign</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>500</td>\n",
+       "      <td>17</td>\n",
+       "      <td>1.519374e+09</td>\n",
+       "      <td>117573855</td>\n",
+       "      <td>3</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1500</td>\n",
+       "      <td>0</td>\n",
+       "      <td>500</td>\n",
+       "      <td>500</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>58786927.5</td>\n",
+       "      <td>2.375324e+07</td>\n",
+       "      <td>75583006</td>\n",
+       "      <td>41990849</td>\n",
+       "      <td>Benign</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>500</td>\n",
+       "      <td>17</td>\n",
+       "      <td>1.519374e+09</td>\n",
+       "      <td>117573848</td>\n",
+       "      <td>3</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1500</td>\n",
+       "      <td>0</td>\n",
+       "      <td>500</td>\n",
+       "      <td>500</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>58786924.0</td>\n",
+       "      <td>2.375325e+07</td>\n",
+       "      <td>75583007</td>\n",
+       "      <td>41990841</td>\n",
+       "      <td>Benign</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>22</td>\n",
+       "      <td>6</td>\n",
+       "      <td>1.519374e+09</td>\n",
+       "      <td>1745392</td>\n",
+       "      <td>11</td>\n",
+       "      <td>11</td>\n",
+       "      <td>1179</td>\n",
+       "      <td>1969</td>\n",
+       "      <td>648</td>\n",
+       "      <td>0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>32</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.000000e+00</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>Benign</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>500</td>\n",
+       "      <td>17</td>\n",
+       "      <td>1.519374e+09</td>\n",
+       "      <td>89483474</td>\n",
+       "      <td>6</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3000</td>\n",
+       "      <td>0</td>\n",
+       "      <td>500</td>\n",
+       "      <td>500</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8</td>\n",
+       "      <td>4000364.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>4000364</td>\n",
+       "      <td>4000364</td>\n",
+       "      <td>21370777.5</td>\n",
+       "      <td>1.528092e+07</td>\n",
+       "      <td>41989576</td>\n",
+       "      <td>7200485</td>\n",
+       "      <td>Benign</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 80 columns</p>\n",
+       "</div>"
       ],
-      "source": [
-        "# Create confusion matrix\n",
-        "conf_matrix = confusion_matrix(y_true, y_pred)\n",
-        "\n",
-        "# Show confusion matrix\n",
-        "ax = plt.subplot()\n",
-        "sns.heatmap(conf_matrix, annot=True, ax = ax, cmap='Blues', fmt='g', cbar=False)\n",
-        "\n",
-        "# Add labels, title and ticks\n",
-        "ax.set_xlabel('Predicted')\n",
-        "ax.set_ylabel('Acctual')\n",
-        "ax.set_title('Confusion Matrix')\n",
-        "ax.xaxis.set_ticklabels(['Benign', 'Attack'])\n",
-        "ax.yaxis.set_ticklabels(['Benign', 'Attack'])"
-      ]
+      "text/plain": [
+       "   Dst Port  Protocol     Timestamp  Flow Duration  Tot Fwd Pkts  \\\n",
+       "0        22         6  1.519374e+09        1532698            11   \n",
+       "1       500        17  1.519374e+09      117573855             3   \n",
+       "2       500        17  1.519374e+09      117573848             3   \n",
+       "3        22         6  1.519374e+09        1745392            11   \n",
+       "4       500        17  1.519374e+09       89483474             6   \n",
+       "\n",
+       "   Tot Bwd Pkts  TotLen Fwd Pkts  TotLen Bwd Pkts  Fwd Pkt Len Max  \\\n",
+       "0            11             1179             1969              648   \n",
+       "1             0             1500                0              500   \n",
+       "2             0             1500                0              500   \n",
+       "3            11             1179             1969              648   \n",
+       "4             0             3000                0              500   \n",
+       "\n",
+       "   Fwd Pkt Len Min  ...  Fwd Seg Size Min  Active Mean  Active Std  \\\n",
+       "0                0  ...                32          0.0         0.0   \n",
+       "1              500  ...                 8          0.0         0.0   \n",
+       "2              500  ...                 8          0.0         0.0   \n",
+       "3                0  ...                32          0.0         0.0   \n",
+       "4              500  ...                 8    4000364.0         0.0   \n",
+       "\n",
+       "   Active Max  Active Min   Idle Mean      Idle Std  Idle Max  Idle Min  \\\n",
+       "0           0           0         0.0  0.000000e+00         0         0   \n",
+       "1           0           0  58786927.5  2.375324e+07  75583006  41990849   \n",
+       "2           0           0  58786924.0  2.375325e+07  75583007  41990841   \n",
+       "3           0           0         0.0  0.000000e+00         0         0   \n",
+       "4     4000364     4000364  21370777.5  1.528092e+07  41989576   7200485   \n",
+       "\n",
+       "    Label  \n",
+       "0  Benign  \n",
+       "1  Benign  \n",
+       "2  Benign  \n",
+       "3  Benign  \n",
+       "4  Benign  \n",
+       "\n",
+       "[5 rows x 80 columns]"
+      ]
+     },
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data_23_cleaned = pd.read_csv(\"result23022018.csv\")\n",
+    "data_23_cleaned.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "cGXf6PmhYTNR",
+    "outputId": "3371cd8e-e8e3-4382-ad17-bfffd8e3d8ac"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Benign              1042301\n",
+       "Brute Force -Web        362\n",
+       "Brute Force -XSS        151\n",
+       "SQL Injection            53\n",
+       "Name: Label, dtype: int64"
+      ]
+     },
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data_23_cleaned.Label.value_counts()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "bWUf-dk1ywpU"
+   },
+   "source": [
+    "### Load the Model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "97lIL236YTNR"
+   },
+   "source": [
+    "Here we load the pretrained model. The model is trained using the data from the same date."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "Bq8_hM-RfEeR"
+   },
+   "source": [
+    "We have modified [the original model](https://github.com/rambasnet/DeepLearning-IDS/blob/master/keras_tensorflow_models/02-23-2018.csv_adam_10_10_multiclass_baseline_model_1561316601.model) slightly and changed the number of classes from four (Benign, BruteForce-Web, BruteForce-XSS, SQL-Injection) to two (Benign and Attack). In the step below we will download and unzip our modified model."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "eRTvBzhoqPkR"
+   },
+   "outputs": [],
+   "source": [
+    "!wget -q -O it_threat_model.model.zip \"https://drive.google.com/uc?export=download&id=1VYMHOk_XMAc-QFJ_8CAPvWFfHnLpS2J_\" \n",
+    "!unzip -q it_threat_model.model.zip"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "WP44njScywpU",
+    "outputId": "2a9d2001-0328-4602-c595-372c5bf67aa4"
+   },
+   "outputs": [
+    {
+     "output_type": "stream",
+     "text": [
+      "WARNING:tensorflow:SavedModel saved prior to TF 2.5 detected when loading Keras model. Please ensure that you are saving the model with model.save() or tf.keras.models.save_model(), *NOT* tf.saved_model.save(). To confirm, there should be a file named \"keras_metadata.pb\" in the SavedModel directory.\n",
+      "Model: \"sequential\"\n",
+      "_________________________________________________________________\n",
+      "Layer (type)                 Output Shape              Param #   \n",
+      "=================================================================\n",
+      "dense (Dense)                (None, 128)               10240     \n",
+      "_________________________________________________________________\n",
+      "dense_1 (Dense)              (None, 64)                8256      \n",
+      "_________________________________________________________________\n",
+      "dense_2 (Dense)              (None, 1)                 65        \n",
+      "=================================================================\n",
+      "Total params: 18,561\n",
+      "Trainable params: 18,561\n",
+      "Non-trainable params: 0\n",
+      "_________________________________________________________________\n"
+     ]
+    }
+   ],
+   "source": [
+    "model = keras.models.load_model(\"it_threat_model.model\")\n",
+    "model.summary()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "2rCdzSrfywpV"
+   },
+   "outputs": [],
+   "source": [
+    "# Select the first layer\n",
+    "layer_name = \"dense\"\n",
+    "intermediate_layer_model = Model(\n",
+    "    inputs=model.input, outputs=model.get_layer(layer_name).output\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "V3dx9XkPYTNV"
+   },
+   "source": [
+    "### Upload Data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "yO1eySIdjSOG"
+   },
+   "source": [
+    "\n",
+    "Let's define the item's ids in a way that will reflect the event's label.  Then, we index the events in Pinecone's vector index."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "nk4ZjGg-ywpX",
+    "outputId": "bce8f1fd-98e3-4313-d625-c404a4eca184"
+   },
+   "outputs": [
     {
-      "attachments": {},
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "-H7rWFguYTNZ"
-      },
-      "source": [
-        "Now we can calculate overall accuracy and per class accuracy."
-      ]
+     "output_type": "stream",
+     "text": [
+      "100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1042867/1042867 [01:43<00:00, 10067.22it/s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "from tqdm import tqdm\n",
+    "\n",
+    "items_to_upload = []\n",
+    "\n",
+    "model_res = intermediate_layer_model.predict(K.constant(data_23_cleaned.iloc[:, :-1]))\n",
+    "\n",
+    "for i, res in tqdm(zip(data_23_cleaned.iterrows(), model_res), total=len(model_res)):\n",
+    "    benign_or_attack = i[1][\"Label\"][:3]\n",
+    "    items_to_upload.append((benign_or_attack + \"_\" + str(i[0]), res.tolist()))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "cMzD8s3ps3k0"
+   },
+   "outputs": [],
+   "source": [
+    "import itertools\n",
+    "\n",
+    "\n",
+    "def chunks(iterable, batch_size=100):\n",
+    "    it = iter(iterable)\n",
+    "    chunk = tuple(itertools.islice(it, batch_size))\n",
+    "    while chunk:\n",
+    "        yield chunk\n",
+    "        chunk = tuple(itertools.islice(it, batch_size))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "6C7er-8Gl2Rg"
+   },
+   "source": [
+    "You can lower the NUMBER_OF_ITEMS and, by doing so, limit the number of uploaded items. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "_Ti9p0P-ywpX",
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "NUMBER_OF_ITEMS = len(items_to_upload)\n",
+    "\n",
+    "for batch in chunks(items_to_upload[:NUMBER_OF_ITEMS], 50):\n",
+    "    index.upsert(vectors=batch)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "XgF8vW8PtaRX"
+   },
+   "outputs": [],
+   "source": [
+    "items_to_upload.clear()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "fglWJfAq_kw3"
+   },
+   "source": [
+    "Let's verify all items were inserted. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "xU172A4EywpY",
+    "outputId": "80008942-0000-40e5-92a1-66b767a489c6"
+   },
+   "outputs": [
     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "uHTInLt2YTNZ",
-        "outputId": "e48f2726-5eba-4d17-ffe7-7b17c62d5ee8"
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Accuracy: 0.923\n",
-            "Precision: 0.995\n",
-            "Recall: 0.577\n"
-          ]
-        }
-      ],
-      "source": [
-        "# Calculate accuracy\n",
-        "acc = accuracy_score(y_true, y_pred, normalize=True, sample_weight=None)\n",
-        "precision = precision_score(y_true, y_pred)\n",
-        "recall = recall_score(y_true, y_pred)\n",
-        "\n",
-        "print(f\"Accuracy: {acc:.3f}\")\n",
-        "print(f\"Precision: {precision:.3f}\")\n",
-        "print(f\"Recall: {recall:.3f}\")"
+     "data": {
+      "text/plain": [
+       "{'dimension': 128, 'namespaces': {'': {'vector_count': 1042867}}}"
       ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 112
-        },
-        "id": "ZNzyqAH9YTNZ",
-        "outputId": "7d2e35a1-df80-47bc-f5e7-fd425e79a7f4"
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/html": [
-              "<div>\n",
-              "<style scoped>\n",
-              "    .dataframe tbody tr th:only-of-type {\n",
-              "        vertical-align: middle;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe tbody tr th {\n",
-              "        vertical-align: top;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe thead th {\n",
-              "        text-align: right;\n",
-              "    }\n",
-              "</style>\n",
-              "<table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              "    <tr style=\"text-align: right;\">\n",
-              "      <th></th>\n",
-              "      <th>type</th>\n",
-              "      <th>accuracy</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <th>0</th>\n",
-              "      <td>Benign</td>\n",
-              "      <td>1.00</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>1</th>\n",
-              "      <td>Attack</td>\n",
-              "      <td>0.58</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table>\n",
-              "</div>"
-            ],
-            "text/plain": [
-              "     type  accuracy\n",
-              "0  Benign      1.00\n",
-              "1  Attack      0.58"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        }
+     },
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "index.describe_index_stats()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "3CGzW3mVywpY"
+   },
+   "source": [
+    "## Query"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "Ywuld4BylAIu"
+   },
+   "source": [
+    "First, we will randomly select a Benign/Attack event and query the vector index using the event embedding. Then, we will use data from different day, that contains same set of attacks to query on a bigger sample."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "m5H4rMyOYTNX"
+   },
+   "source": [
+    "\n",
+    "### Evaluate the Rare Event Classification Model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "velMK_XlYTNX"
+   },
+   "source": [
+    "We will use network intrusion dataset for 22-02-2018 for querying and testing the Pinecone."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "VC2AVfWsj7em"
+   },
+   "source": [
+    "First, let's clean the data."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "bW9mhYvOYTNX",
+    "outputId": "84a9d548-de90-40fe-c183-fc2ca48de2ec",
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "output_type": "stream",
+     "text": [
+      "cleaning Thursday-22-02-2018_TrafficForML_CICFlowMeter.csv\n",
+      "total rows read = 1048576\n",
+      "all done writing 1042966 rows; dropped 5610 rows\n"
+     ]
+    }
+   ],
+   "source": [
+    "!python DeepLearning-IDS/data_cleanup.py \"Thursday-22-02-2018_TrafficForML_CICFlowMeter.csv\" \"result22022018\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 278
+    },
+    "id": "xqMuz0jKYTNX",
+    "outputId": "d9f8c333-5e3f-4509-b84a-901bacf42487"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Dst Port</th>\n",
+       "      <th>Protocol</th>\n",
+       "      <th>Timestamp</th>\n",
+       "      <th>Flow Duration</th>\n",
+       "      <th>Tot Fwd Pkts</th>\n",
+       "      <th>Tot Bwd Pkts</th>\n",
+       "      <th>TotLen Fwd Pkts</th>\n",
+       "      <th>TotLen Bwd Pkts</th>\n",
+       "      <th>Fwd Pkt Len Max</th>\n",
+       "      <th>Fwd Pkt Len Min</th>\n",
+       "      <th>...</th>\n",
+       "      <th>Fwd Seg Size Min</th>\n",
+       "      <th>Active Mean</th>\n",
+       "      <th>Active Std</th>\n",
+       "      <th>Active Max</th>\n",
+       "      <th>Active Min</th>\n",
+       "      <th>Idle Mean</th>\n",
+       "      <th>Idle Std</th>\n",
+       "      <th>Idle Max</th>\n",
+       "      <th>Idle Min</th>\n",
+       "      <th>Label</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>22</td>\n",
+       "      <td>6</td>\n",
+       "      <td>1.519288e+09</td>\n",
+       "      <td>20553406</td>\n",
+       "      <td>10</td>\n",
+       "      <td>7</td>\n",
+       "      <td>1063</td>\n",
+       "      <td>1297</td>\n",
+       "      <td>744</td>\n",
+       "      <td>0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>20</td>\n",
+       "      <td>1027304.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1027304</td>\n",
+       "      <td>1027304</td>\n",
+       "      <td>1.952608e+07</td>\n",
+       "      <td>0.000000e+00</td>\n",
+       "      <td>19526080</td>\n",
+       "      <td>19526080</td>\n",
+       "      <td>Benign</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>34989</td>\n",
+       "      <td>6</td>\n",
+       "      <td>1.519288e+09</td>\n",
+       "      <td>790</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>848</td>\n",
+       "      <td>0</td>\n",
+       "      <td>848</td>\n",
+       "      <td>0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>20</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.000000e+00</td>\n",
+       "      <td>0.000000e+00</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>Benign</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>500</td>\n",
+       "      <td>17</td>\n",
+       "      <td>1.519288e+09</td>\n",
+       "      <td>99745913</td>\n",
+       "      <td>5</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2500</td>\n",
+       "      <td>0</td>\n",
+       "      <td>500</td>\n",
+       "      <td>500</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8</td>\n",
+       "      <td>4000203.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>4000203</td>\n",
+       "      <td>4000203</td>\n",
+       "      <td>3.191524e+07</td>\n",
+       "      <td>3.792787e+07</td>\n",
+       "      <td>75584115</td>\n",
+       "      <td>7200679</td>\n",
+       "      <td>Benign</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>500</td>\n",
+       "      <td>17</td>\n",
+       "      <td>1.519288e+09</td>\n",
+       "      <td>99745913</td>\n",
+       "      <td>5</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2500</td>\n",
+       "      <td>0</td>\n",
+       "      <td>500</td>\n",
+       "      <td>500</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8</td>\n",
+       "      <td>4000189.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>4000189</td>\n",
+       "      <td>4000189</td>\n",
+       "      <td>3.191524e+07</td>\n",
+       "      <td>3.792788e+07</td>\n",
+       "      <td>75584130</td>\n",
+       "      <td>7200693</td>\n",
+       "      <td>Benign</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>500</td>\n",
+       "      <td>17</td>\n",
+       "      <td>1.519288e+09</td>\n",
+       "      <td>89481361</td>\n",
+       "      <td>6</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3000</td>\n",
+       "      <td>0</td>\n",
+       "      <td>500</td>\n",
+       "      <td>500</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8</td>\n",
+       "      <td>4000554.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>4000554</td>\n",
+       "      <td>4000554</td>\n",
+       "      <td>2.137020e+07</td>\n",
+       "      <td>1.528109e+07</td>\n",
+       "      <td>41990741</td>\n",
+       "      <td>7200848</td>\n",
+       "      <td>Benign</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 80 columns</p>\n",
+       "</div>"
       ],
-      "source": [
-        "# Calculate per class accuracy\n",
-        "cmd = confusion_matrix(y_true, y_pred, normalize=\"true\").diagonal()\n",
-        "per_class_accuracy_df = pd.DataFrame([(index, round(value,4)) for index, value in zip(['Benign', 'Attack'], cmd)], columns = ['type', 'accuracy'])\n",
-        "per_class_accuracy_df = per_class_accuracy_df.round(2)\n",
-        "display(per_class_accuracy_df)"
-      ]
-    },
-    {
-      "attachments": {},
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Gfy_LW5zXIj6"
-      },
-      "source": [
-        "We got great results using Pinecone! Let's see what happens if we skip the similarity search step and predict values from the model directly. In other words, let's use the model that created the embeddings as a classifier. It would be interesting to compare its and the similarity search approach accuracy. "
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "Oxya9-mMYh5A"
-      },
-      "outputs": [],
-      "source": [
-        "from keras.utils.np_utils import normalize\n",
-        "import numpy as np\n",
-        "\n",
-        "data_sample = normalize(data_22_cleaned.iloc[:, :-1])[-2000:]\n",
-        "y_pred_model = model.predict(normalize(data_sample)).flatten()\n",
-        "y_pred_model = np.round(y_pred_model)"
-      ]
+      "text/plain": [
+       "   Dst Port  Protocol     Timestamp  Flow Duration  Tot Fwd Pkts  \\\n",
+       "0        22         6  1.519288e+09       20553406            10   \n",
+       "1     34989         6  1.519288e+09            790             2   \n",
+       "2       500        17  1.519288e+09       99745913             5   \n",
+       "3       500        17  1.519288e+09       99745913             5   \n",
+       "4       500        17  1.519288e+09       89481361             6   \n",
+       "\n",
+       "   Tot Bwd Pkts  TotLen Fwd Pkts  TotLen Bwd Pkts  Fwd Pkt Len Max  \\\n",
+       "0             7             1063             1297              744   \n",
+       "1             0              848                0              848   \n",
+       "2             0             2500                0              500   \n",
+       "3             0             2500                0              500   \n",
+       "4             0             3000                0              500   \n",
+       "\n",
+       "   Fwd Pkt Len Min  ...  Fwd Seg Size Min  Active Mean  Active Std  \\\n",
+       "0                0  ...                20    1027304.0         0.0   \n",
+       "1                0  ...                20          0.0         0.0   \n",
+       "2              500  ...                 8    4000203.0         0.0   \n",
+       "3              500  ...                 8    4000189.0         0.0   \n",
+       "4              500  ...                 8    4000554.0         0.0   \n",
+       "\n",
+       "   Active Max  Active Min     Idle Mean      Idle Std  Idle Max  Idle Min  \\\n",
+       "0     1027304     1027304  1.952608e+07  0.000000e+00  19526080  19526080   \n",
+       "1           0           0  0.000000e+00  0.000000e+00         0         0   \n",
+       "2     4000203     4000203  3.191524e+07  3.792787e+07  75584115   7200679   \n",
+       "3     4000189     4000189  3.191524e+07  3.792788e+07  75584130   7200693   \n",
+       "4     4000554     4000554  2.137020e+07  1.528109e+07  41990741   7200848   \n",
+       "\n",
+       "    Label  \n",
+       "0  Benign  \n",
+       "1  Benign  \n",
+       "2  Benign  \n",
+       "3  Benign  \n",
+       "4  Benign  \n",
+       "\n",
+       "[5 rows x 80 columns]"
+      ]
+     },
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data_22_cleaned = pd.read_csv(\"result22022018.csv\")\n",
+    "data_22_cleaned.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "262mxbQDYTNY",
+    "outputId": "7540316e-2c26-49ad-a487-85a4fe0bd84c"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Benign              1042603\n",
+       "Brute Force -Web        249\n",
+       "Brute Force -XSS         79\n",
+       "SQL Injection            34\n",
+       "Name: Label, dtype: int64"
+      ]
+     },
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data_22_cleaned.Label.value_counts()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "P6nJdtveYTNY"
+   },
+   "source": [
+    "Let's define a sample that will include all different types of web attacks for this specific date."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "N7vwZk6HYTNY",
+    "outputId": "96c03fc3-c086-4398-b58a-2c5c57eb4ded"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Benign              1638\n",
+       "Brute Force -Web     249\n",
+       "Brute Force -XSS      79\n",
+       "SQL Injection         34\n",
+       "Name: Label, dtype: int64"
+      ]
+     },
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data_sample = data_22_cleaned[-2000:]\n",
+    "data_sample.Label.value_counts()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "neSxNwYckGMK"
+   },
+   "source": [
+    "Now, we will query the test dataset and save predicted and expected results to create a confusion matrix."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 136,
+     "referenced_widgets": [
+      "567d04116b1b4d21bac2348535b750a0",
+      "8f92a228cbc54b30bcb22d0598e9577f",
+      "8fe529938a4f40ac905e145e423d856e",
+      "a4b449113a734e90825bc1ef87ca3d3c",
+      "cda25dc5f86344b3850c92e59085c06c",
+      "3586c81ff82048ed80d69b7a4b5bd6b3",
+      "c427de521f054c4997d69586251bed4f",
+      "90d8d1d9da814d90b0f0bf331102d4df",
+      "7f2cabefd9eb4674a63ef3d56a5be122",
+      "dda94f4a1ea946b7996a928374dda4a5",
+      "ea6f763369cd4b478998ea4d3e8f20e6",
+      "e752273786584dd4baa77ac3f4528849",
+      "aef6058200fa454c90f51760685e25db",
+      "87b4d4b12452401cb82285364dae3576",
+      "8a60896e0288471a91089a03a75b210b",
+      "fe75c2a93c6e4619a2731a4c01a71619",
+      "5c64d617d0d94a6a9797d20f0d1e80f3",
+      "25ac309f2f5d43169ed0bda88300c8d2",
+      "9ebe5f94bbb743058602c9af26cd4eaf",
+      "3db0b4717ac140b78bf7d75e7ebeaf39",
+      "fcc29e1c6b304044a53522c550c4b49d",
+      "cd9e8a060d05491f8c4d74871c9560fa"
+     ]
+    },
+    "id": "8u6cg_1tYTNY",
+    "outputId": "dbe81cb3-88fb-4cd9-91fe-773c960ca108",
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "output_type": "stream",
+     "text": [
+      "100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [10:48<00:00, 32.44s/it]\n"
+     ]
+    }
+   ],
+   "source": [
+    "y_true = []\n",
+    "y_pred = []\n",
+    "\n",
+    "BATCH_SIZE = 100\n",
+    "\n",
+    "for i in tqdm(range(0, len(data_sample), BATCH_SIZE)):\n",
+    "    test_data = data_sample.iloc[i : i + BATCH_SIZE, :]\n",
+    "\n",
+    "    # Create vector embedding using the model\n",
+    "    test_vector = intermediate_layer_model.predict(K.constant(test_data.iloc[:, :-1]))\n",
+    "    # Query using the vector embedding\n",
+    "    query_results = []\n",
+    "\n",
+    "    for xq in test_vector.tolist():\n",
+    "        query_res = index.query(vector=xq, top_k=50)\n",
+    "        query_results.append(query_res)\n",
+    "\n",
+    "    ids = [res.id for result in query_results for res in result.matches]\n",
+    "\n",
+    "    for label, res in zip(test_data.Label.values, query_results):\n",
+    "        # Add to the true list\n",
+    "        if label == \"Benign\":\n",
+    "            y_true.append(0)\n",
+    "        else:\n",
+    "            y_true.append(1)\n",
+    "\n",
+    "        counter = Counter(match.id.split(\"_\")[0] for match in res.matches)\n",
+    "\n",
+    "        # Add to the predicted list\n",
+    "        if counter[\"Bru\"] or counter[\"SQL\"]:\n",
+    "            y_pred.append(1)\n",
+    "        else:\n",
+    "            y_pred.append(0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 313
     },
+    "id": "HV3-gkdWYTNZ",
+    "outputId": "20103ea7-713b-4e2b-9590-ecc70ef9b76e"
+   },
+   "outputs": [
     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 313
-        },
-        "id": "GWssFePDXEks",
-        "outputId": "9eee2c60-f1c1-4a34-c682-665389fe3aee"
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "[Text(0, 0.5, 'Benign'), Text(0, 1.5, 'Attack')]"
-            ]
-          },
-          "execution_count": 87,
-          "metadata": {},
-          "output_type": "execute_result"
-        },
-        {
-          "data": {
-            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAEWCAYAAABliCz2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAbqElEQVR4nO3dd5xV9Z3G8c8DI02kI1ixy0Z3xRLjakBRg2JXVAQrKqyxxMTgWhcs0aBJTIwaxbI2jFGz9oKokYglu6JR7A2ChSJNlCbM8N0/zhlyGZmZK94zl5nzvF+vec099fe9w/DMub9zzu8oIjAzs6avWbkLMDOzhuHANzPLCQe+mVlOOPDNzHLCgW9mlhMOfDOznHDgW5MhqbWkRyTNl3Tfd9jP0ZLGlbK2cpD0hKTjy12HrTkc+NbgJA2WNFHSAknT02D6YQl2fTjQDegcEUes7k4i4q6I6FeCelYiaQ9JIen+GvO3S+ePL3I/F0kaU996EdE/Im5fzXKtCXLgW4OSdBbwO+ByknDeGPgDcHAJdt8DeD8iKkuwr6zMAnaV1Llg3vHA+6VqQAn/37Zv8C+FNRhJ7YFLgNMi4v6IWBgRyyLikYg4O12npaTfSZqWfv1OUst02R6SPpX0c0mfp58OhqTLLgZGAAPTTw4n1TwSlrRJeiRdkU6fIGmypK8kTZF0dMH85wu221XSy2lX0cuSdi1YNl7SpZJeSPczTlKXOn4MS4EHgaPS7ZsDRwJ31fhZXS3pE0lfSnpFUu90/r7A+QXv8/WCOi6T9AKwCNgsnXdyuvx6SX8u2P8Vkp6RpKL/Aa3Rc+BbQ/p3oBXwQB3rXADsAvQCtgN2Bi4sWN4daA9sAJwEXCepY0SMJPnUcE9EtI2IW+oqRNLawO+B/hGxDrAr8Noq1usEPJau2xm4CnisxhH6YGAIsC7QAhheV9vAHcBx6et9gLeAaTXWeZnkZ9AJ+CNwn6RWETG2xvvcrmCbY4FhwDrA1Br7+znwb+kfs94kP7vjw2Or5IoD3xpSZ2B2PV0uRwOXRMTnETELuJgkyKotS5cvi4jHgQXA1qtZz3JgW0mtI2J6RLy1inX2Bz6IiDsjojIi7gbeBQ4sWOfWiHg/IhYD95IEda0i4kWgk6StSYL/jlWsMyYi5qRt/gZoSf3v87aIeCvdZlmN/S0CjiH5gzUGOCMiPq1nf9bEOPCtIc0BulR3qdRifVY+Op2azluxjxp/MBYBbb9tIRGxEBgInAJMl/SYpJ5F1FNd0wYF0zNWo547gdOBvqziE0/abfVO2o30Bcmnmrq6igA+qWthRPwfMBkQyR8myxkHvjWkl4AlwCF1rDON5ORrtY35ZndHsRYCbQqmuxcujIgnI+JHwHokR+03FVFPdU2frWZN1e4ETgUeT4++V0i7XM4h6dvvGBEdgPkkQQ1QWzdMnd0zkk4j+aQwDfjP1S/dGisHvjWYiJhPcmL1OkmHSGojaS1J/SVdma52N3ChpK7pyc8RJF0Qq+M1oI+kjdMTxudVL5DUTdJBaV/+1yRdQ1Wr2MfjwFbppaQVkgYC3wMeXc2aAIiIKcDuJOcsaloHqCS5oqdC0gigXcHymcAm3+ZKHElbAb8g6dY5FvhPSXV2PVnT48C3BhURVwFnkZyInUXSDXE6yZUrkITSRGAS8Abwajpvddp6Crgn3dcrrBzSzUhOZE4D5pKE76mr2Mcc4IB03TkkR8YHRMTs1ampxr6fj4hVfXp5EniC5FLNqSSfigq7a6pvKpsj6dX62km70MYAV0TE6xHxAcmVPndWXwFl+SCfpDczywcf4ZuZ5YQD38wsJxz4ZmY54cA3M8uJum6AKavW25/us8m2Rpr38rXlLsGsVq0qqHV8JB/hm5nlhAPfzCwnHPhmZjnhwDczywkHvplZTjjwzcxywoFvZpYTDnwzs5xw4JuZ5YQD38wsJxz4ZmY54cA3M8sJB76ZWU448M3McsKBb2aWEw58M7OccOCbmeWEA9/MLCcc+GZmOeHANzPLCQe+mVlOOPDNzHLCgW9mlhMOfDOznHDgm5nlhAPfzCwnHPhmZjnhwDczywkHvplZTjjwzcxywoFvZpYTDnwzs5xw4JuZ5YQD38wsJxz4ZmY54cA3M8sJB76ZWU448M3McsKBb2aWExVZNyCpOdCtsK2I+Djrds3MbGWZBr6kM4CRwExgeTo7gH/Lsl0zM/umrI/wzwS2jog5GbdjZmb1yLoP/xNgfsZtmJlZEbI+wp8MjJf0GPB19cyIuCrjds3MrIasA//j9KtF+mVmZmWSaeBHxMVZ7t/MzIqX9VU6j5BclVNoPjARGB0RS7Js38zM/inrk7aTgQXATenXlySXaG6VTpuZWQPJug9/+4joUzD9iKTnIqKPpLcybtvMzApkfYTfVdLG1RPp6y7p5NKM2zYzswJZH+H/HHhe0keAgE2BUyWtDdyecdtN2g0jj6Z/n22ZNfcrdjri8hXzf3zU7pwysA+VVcsZO+FNLrj6IXbapgfX/tcgACS47IbHefjZSQAcue+OnH3iPkQE02fN58QLb2fOFwvL8p4sf16Y8BxXjLqM5VXLOXTAEZw0dFi5S2rSFFHznGqJG5BaAj1JAv/dYk/Utt7+9GwLa+R222FzFi76mpsvPW5F4PfZaUvOOXkfDj3jBpYuq6Rrx7bMmreA1q3WYumyKqqqltO9Szv+957z2KzfBQBMHncZOwz4BXO+WMhlZx7MoiXLuGz04+V8a2u8eS9fW+4SmoSqqioO2n8fRt90K926dWPwwMMZ9aur2HyLLcpdWqPWqgLVtiyTLh1Je6bfDwP2BzYHNgP2S+fZd/TCqx8xd/6ileYNO6I3v771KZYuqwRg1rwFACxesoyqqmQoo5Yt1qL6j7yUfK3dOrlFYp22rZk+yzdGW8N4841JbLRRDzbcaCPWatGCfffbn/HPPlPuspq0rLp0dgf+Ahy4imUB3J9Ru7m2RY912W37zbn4tANZsnQZ5131AK+8nQxM+v1te3DDRcew8XqdOOnC21f8ATjz8nt4+d7zWbh4KR99Mouf/vKecr4Fy5HPZ86k+3rdV0yv260bb0yaVMaKmr5MjvAjYmT6fcgqvk6sbTtJwyRNlDSxcrYv4vm2Kpo3o2O7NvQ57tec/9sHGXPlP3/UL785lR0Pv4wfHnMlZ5/Yj5YtKqioaMbQw3uzy6Ar2KzfBbz5/mecfWK/Mr4Dy5P4xi06INXaG2ElkPWNVy2BAcAmrDwe/iWrWj8ibgRuBPfhr47PZn7Bg8+8DsDEt6ayfHnQpWNbZqddOwDvTZnJwsVL2WaL9Vd09E35dDYAf37qVYYPceBbw+jWrTszps9YMf35zJmsu+66Zayo6cv6ssyHgIOBSmBhwZdl4JHxk9hj560A2GLjdWmxVgWz5y2gx/qdad48+afeeL2ObLVJN6ZOm8O0WfPpuVl3unRsC8Beu/TkvSkzat2/WSlts+2/8vHH/+DTTz9h2dKljH38MXbvu2e5y2rSsr4sc8OI2DfjNnLp9l+eQO8dt6RLh7Z8OPZSLr3hcW5/8CVGX3Q0E+87n6XLqjh5xJ0A7Lr9Zgwf0o9llVUsXx6cefk9Ky69vPzGJ3jq5p+yrLKKj6fPZdjIMeV8W5YjFRUVnHfBCH487GSWL6/ikEMHsMUWW5a7rCYt08syJd0IXBMRb3zbbd2lY2sqX5Zpa7K6LsvM+gj/h8AJkqaQjIcvICLCjzg0M2tgWQd+/4z3b2ZmRcr0pG1ETAU2AvZMXy/Kuk0zM1u1TMNX0kjgHOC8dNZagM8KmpmVQdZH24cCB5FeihkR04B1Mm7TzMxWIevAXxrJZUABkI6SaWZmZZB14N8raTTQQdJQ4Gn8pCszs7LI+iHmv5b0I5JHG24NjIiIp7Js08zMVi3ryzJJA/4pSV2AOVm3Z2Zmq5bVePi7SBov6X5J20t6E3gTmCnJQy2YmZVBVkf41wLnA+1JxsXvHxF/k9QTuBsYm1G7ZmZWi6xO2lZExLiIuA+YERF/A4iIdzNqz8zM6pFV4C8veL24xjIPimZmVgZZdelsJ+lLksHSWqevSadbZdSmmZnVIZPAj4jmWezXzMxWnwcyMzPLCQe+mVlOOPDNzHLCgW9mlhMOfDOznHDgm5nlhAPfzCwnHPhmZjnhwDczywkHvplZTjjwzcxywoFvZpYTDnwzs5xw4JuZ5YQD38wsJxz4ZmY54cA3M8sJB76ZWU7U+ohDSTvUtWFEvFr6cszMLCt1PdP2N3UsC2DPEtdiZmYZqjXwI6JvQxZiZmbZqusIfwVJ2wLfA1pVz4uIO7IqyszMSq/ewJc0EtiDJPAfB/oDzwMOfDOzRqSYq3QOB/YCZkTEEGA7oGWmVZmZWckVE/iLI2I5UCmpHfA5sFm2ZZmZWakV04c/UVIH4CbgFWAB8H+ZVmVmZiVXb+BHxKnpyxskjQXaRcSkbMsyM7NSK+akbZ9VzYuI57IpyczMslBMl87ZBa9bATuTdO34xiszs0akmC6dAwunJW0EXJlZRWZmlonVGTztU2DbUhdiZmbZKqYP/xqSsXMg+QPRC3g9y6LMzKz0iross+B1JXB3RLyQUT1mZpaRYgK/Q0RcXThD0pk155mZ2ZpNEVH3CtKrEbFDjXl/j4jtsyzsvRmL6i7MrEw6tW1R7hLMatW1bYVqW1bXA1AGAYOBTSU9XLBoHWBO6cozM7OGUFeXzovAdKALKz8M5SvAd9qamTUydT0AZSowVdLRwLSIWAIgqTWwIfCPBqnQzMxKopjr8O8FlhdMVwH3ZVOOmZllpZjAr4iIpdUT6WuftTIza2SKCfxZkg6qnpB0MDA7u5LMzCwLxVyHfwpwl6Rr0+lPgeOyK8nMzLJQzOBpHwG7SGpLct3+V9mXZWZmpVZvl46kyyV1iIgFEfGVpI6SftEQxZmZWekU04ffPyK+qJ6IiHnAftmVZGZmWSgm8JtLalk9kV6H37KO9c3MbA1UzEnbMcAzkm5Np4cAt2dXkpmZZaGYk7ZXSpoE7A0IGAv0yLowMzMrrWKfeDWD5G7bAcBewDuZVWRmZpmoa7TMrYCjgEEko2PeQ3JZZt8Gqs3MzEqori6dd4EJwIER8SGApJ81SFVmZlZydXXpDCDpynlW0k2S9iLpwzczs0ao1sCPiAciYiDQExgP/AzoJul6Sf0aqD4zMyuRek/aRsTCiLgrIg4gGQf/NeDczCszM7OSqveZtuXiZ9ramsrPtLU1WV3PtC32skwzM2vkHPhmZjnhwDczywkHvplZTjjwzcxywoFvZpYTDnwzs5xw4JuZ5YQD38wsJxz4ZmY54cA3M8sJB76ZWU448M3McsKBb2aWEw58M7OccOCbmeWEA9/MLCcc+GZmOeHANzPLCQe+mVlOZBb4kk5axbxRWbVnZmZ1q8hw34dLWhIRdwFI+gPQMsP2zMysDlkG/mHAw5KWA/2BuRFxaobtmZlZHUoe+JI6FUyeDDwIvABcIqlTRMwtdZtmZlY/RURpdyhNAQJQwfdqERGbFbOf92YsKm1hZiXSqW2LcpdgVquubStU27KSH+FHxKal3qeZmX13WV6lc5qkDgXTHSW5D9/MrEyyvA5/aER8UT0REfOAoRm2Z2Zmdcgy8JtJWtGXJKk54M5PM7MyyfKyzCeBeyXdQHLy9hRgbIbt5dasz2fwu8v+i3lz56BmYp8DB3DQ4YP54603MO7R+2nfoSMAxw49nZ126U1l5TKuufISJr//LlVVVfTdZ3+OOOYb98mZlcTlF1/IixP+SsdOnbjz3ocA+HL+F4w4bzgzpn1G9/U34JJRv6Fdu/YrtpkxfRrHHnEQQ4adxuDjhpSr9CYny8A/B/gP4MckV+qMA27OsL3cat68OSeedhabb/UvLFq0kLOGDqbXTj8A4OAjjuHQo45baf0Xnn2aymVLuea2+/h6yWJOO34AffbqT7f11i9H+dbE7XfgIQw4cjC/GHneinljbruZHb//A44dMpQ7b72JMbfdzKk/+fmK5ddcdQU/2LV3Ocpt0jLr0omI5RFxfUQcHhEDImJ0RFRl1V6ederclc23+hcA2rRZmw17bMqcWbNq30CwZPESqior+frrr6moWIs2a6/dQNVa3vTaYSfatW+/0rwJf32W/gccAkD/Aw5hwvi/rFj23LPPsP4GG7Hp5ls0aJ15kOVVOltK+rOktyVNrv7Kqj1LzJw+jckfvMfW39sWgMce+BNnDDmSq0ddxIKvvgRgtz32plXrVhx/2I846cj+HDLwONZp176u3ZqV1Lw5c+jStSsAXbp2Zd7c5H7MxYsXcdfttzBk2I/LWV6TleVJ21uB64FKoC9wB3BnXRtIGiZpoqSJ99z53xmW1jQtXrSIUSOGc/IZw2mzdlv6H3wEo//4CFff8ic6de7CLdddBcD777xFs2bNue3+cdz0p8d46N47mTHt0zJXbwa33HAdRw4+jjZt/IkzC1n24beOiGckKSKmAhdJmgCMrG2DiLgRuBF8p+23VVm5jFEjhrP73v3Ztc9eAHTs1HnF8n4HHMal5/0EgOeefoIddt6Vioq16NCxEz237cWH775N9/U3LEvtlj8dO3dm9qxZdOnaldmzZtGxUzIiy9tvTmL8M+O4/ve/YcFXX6FmomXLFgwYeHSZK24asgz8JZKaAR9IOh34DFg3w/ZyKyK45oqL2bDHphwy8NgV8+fOmUWnzsnH5r9N+As9Nt0cgK7dujPp1ZfZo9/+fL1kCe+/PYmDjhhcltotn37Ypy9PPPogxw4ZyhOPPkjv3fsC8Idb/tkJcMvo62jduo3DvoRKPpbOih1L3wfeAToAlwLtgCsj4n+L2d5H+MV7e9LfOfeME+mx2ZY0a5bc+nDs0NN57uknmfLheyDRrft6nDr8Qjp17sriRYu4etRIPpk6GSLYq//BHDbo+DK/i8bDY+l8OyPPH85rE1/miy++oFPnzpz0H6fRe4+9GHHuWcycMZ1u3dfj0iuuol37DittVx34vizz26lrLJ0sA/+IiLivvnm1ceDbmsqBb2uyugI/y5O25xU5z8zMGkAW4+H3B/YDNpD0+4JF7Uiu2DEzszLI4qTtNGAicBDwSsH8r4CfZdCemZkVIYvx8F8HXpfULSJuL1wm6Uzg6lK3aWZm9cuyD/+oVcw7IcP2zMysDln04Q8CBgObSnq4YFE7YHap2zMzs+Jk0Yf/IjAd6AL8pmB+AAMzaM/MzIqQRR/+VGAq8O+SepEc7R8JTAH+p9TtmZlZcbLo0tmKpP9+EDAHuIfkBq++pW7LzMyKl0WXzrvABODAiPgQQJIvxzQzK7MsrtIZAMwAnpV0k6S9SJ54ZWZmZVTywI+IByJiINATGE9ys1U3SddL6lfq9szMrDhZPuJwYUTcFREHABsCrwHnZtWemZnVLbPRMr8rj5ZpayqPlmlrsnKNlmlmZmsQB76ZWU448M3McsKBb2aWEw58M7OccOCbmeWEA9/MLCcc+GZmOeHANzPLCQe+mVlOOPDNzHLCgW9mlhMOfDOznHDgm5nlhAPfzCwnHPhmZjnhwDczywkHvplZTjjwzcxywoFvZpYTDnwzs5xw4JuZ5YQD38wsJxz4ZmY54cA3M8sJB76ZWU448M3McsKBb2aWEw58M7OccOCbmeWEA9/MLCcc+GZmOeHANzPLCQe+mVlOOPDNzHLCgW9mlhOKiHLXYA1A0rCIuLHcdZjV5N/NhuMj/PwYVu4CzGrh380G4sA3M8sJB76ZWU448PPDfaS2pvLvZgPxSVszs5zwEb6ZWU448M3McsKB3whIqpL0mqTXJb0qadfvsK9LJO1dyvrMACQdKikk9UynN5E0uGB5L0n7fYf9/0NSl1LUmlcO/MZhcUT0iojtgPOAX67ujiJiREQ8XbrSzFYYBDwPHJVObwIMLljeC1jtwLfvzoHf+LQD5lVPSDpb0suSJkm6OJ23iaR3JN0k6S1J4yS1TpfdJunw9PV+kt6V9Lyk30t6NJ1/kaT/ljRe0mRJPynD+7RGRFJbYDfgJP4Z+KOA3umn03OAS4CB6fRASTtLelHS39PvW6f7ai7p15LeSH+vz6jRVmtJYyUNbcC32CRUlLsAK0prSa8BrYD1gD0BJPUDtgR2BgQ8LKkP8HE6f1BEDJV0LzAAGFO9Q0mtgNFAn4iYIunuGm32BPoC6wDvSbo+IpZl+SatUTsEGBsR70uaK2kH4FxgeEQcACBpJrBTRJyeTrcj+f2rTLsZLyf5PR0GbApsny7rVNBOW+BPwB0RcUeDvbsmwkf4jUN1l05PYF/gDkkC+qVffwdeJQnpLdNtpkTEa+nrV0g+XhfqCUyOiCnpdM3Afywivo6I2cDnQLdSviFrcgaRBDHp90FFbNMeuE/Sm8BvgW3S+XsDN0REJUBEzC3Y5iHgVof96vERfiMTES+lJ666khzV/zIiRheuI2kT4OuCWVVA6xq7Uj1N1dzevyu2SpI6k3zq3FZSAM2BAB6vZ9NLgWcj4tD0d3Z89S7T7VflBaC/pD+GbyL61nyE38ikV0A0B+YATwInpv2nSNpA0rpF7updYLP0PxrAwBKXavlxOEkXS4+I2CQiNgKmAMtJugSrfVVjuj3wWfr6hIL544BTJFUA1OjSGUHyu/+Hkr6DnHDgNw6t0xNdrwH3AMdHRFVEjAP+CLwk6Q3gz6z8H6pWEbEYOBUYK+l5YCYwP5vyrYkbBDxQY97/kJy8rUwvJ/4Z8CzwveqTtsCVwC8lvUByEFPtZpLzUJMkvc7KV/oA/BRoJenKDN5Lk+ahFXJMUtuIWJCeD7gO+CAiflvuuswsGz7Cz7eh6aeGt0g+Xo+uZ30za8R8hG9mlhM+wjczywkHvplZTjjwzcxywoFvTVbBKKNvSrpPUpvvsK89CsYaOkjSuXWs20HSqavRxkWShq9ujWb1ceBbU1Y9JMW2wFLglMKFSnzr/wMR8XBEjKpjlQ4k9ziYrVEc+JYXE4AtCkYS/QPJ+EMbSeon6aX0WQP3Fdy5vG/1aKLAYdU7knSCpGvT190kPZDeXPS6kmcVjAI2Tz9d/Cpd7xujmqbzL5D0nqSnga0b7KdhueTAtyYvvUW/P/BGOmtrkqEAtgcWAhcCe0fEDsBE4Kx0NNGbgAOB3kD3Wnb/e+Cv6bMKdiC5p+Fc4KP008XZNUY17QXsKKmPpB1J7kbdnuQPyvdL/NbNVuIBsawpqx5WGpIj/FuA9YGpEfG3dP4uwPeAF5IbjmkBvEQymuiUiPgAQNIYkmF7a9oTOA4gIqqA+ZI61lincFRTSIb43ZJkGIwHImJR2sbD3+ndmtXDgW9N2eKI6FU4Iw31hYWzgKciYlCN9XpR+4iN31Zto5r+tIRtmNXLXTqWd38DdpO0BYCkNpK2IhlNdFNJm6fr1Ta++zPAj9Ntm6cP9ag5KmRto5o+BxyaPsFpHZLuI7PMOPAt1yJiFsnQvHdLmkTyB6BnRCwh6cJ5LD1pO7WWXZwJ9E1HK30F2CYi5pB0Eb0p6Ve1jWoaEa+SjH76GsnokhMye6NmeCwdM7Pc8BG+mVlOOPDNzHLCgW9mlhMOfDOznHDgm5nlhAPfzCwnHPhmZjnx/zT+z1Wh63oMAAAAAElFTkSuQmCC",
-            "text/plain": [
-              "<Figure size 432x288 with 1 Axes>"
-            ]
-          },
-          "metadata": {
-            "needs_background": "light"
-          },
-          "output_type": "display_data"
-        }
-      ],
-      "source": [
-        "# Create confusion matrix\n",
-        "conf_matrix = confusion_matrix(y_true, y_pred_model)\n",
-        "\n",
-        "# Show confusion matrix\n",
-        "ax = plt.subplot()\n",
-        "sns.heatmap(conf_matrix, annot=True, ax = ax, cmap='Blues', fmt='g', cbar=False)\n",
-        "\n",
-        "# Add labels, title and ticks\n",
-        "ax.set_xlabel('Predicted')\n",
-        "ax.set_ylabel('Acctual')\n",
-        "ax.set_title('Confusion Matrix')\n",
-        "ax.xaxis.set_ticklabels(['Benign', 'Attack'])\n",
-        "ax.yaxis.set_ticklabels(['Benign', 'Attack'])"
+     "data": {
+      "text/plain": [
+       "[Text(0, 0.5, 'Benign'), Text(0, 1.5, 'Attack')]"
       ]
+     },
+     "output_type": "execute_result"
     },
     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "VJRwvXOvYtBL",
-        "outputId": "9d45426b-6bd8-4374-dcf8-9ffecedd7176"
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Accuracy: 0.871\n",
-            "Precision: 1.000\n",
-            "Recall: 0.287\n"
-          ]
-        }
-      ],
-      "source": [
-        "# Calculate accuracy\n",
-        "acc = accuracy_score(y_true, y_pred_model, normalize=True, sample_weight=None)\n",
-        "precision = precision_score(y_true, y_pred_model)\n",
-        "recall = recall_score(y_true, y_pred_model)\n",
-        "\n",
-        "print(f\"Accuracy: {acc:.3f}\")\n",
-        "print(f\"Precision: {precision:.3f}\")\n",
-        "print(f\"Recall: {recall:.3f}\")"
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAEWCAYAAABliCz2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAbKUlEQVR4nO3debxVdb3/8debc5RBQEZxKBAcILUUtX5eS1IzE6+aM6JlGE455HXWMBwoLbNBrzl3zTmnLEwihyupZDeQxCEpUqIUQTgCKjId+Pz+WOvQ5sg5Z4t7nQ3n+34+HufBXsNe38+Gzfus/V3f/V2KCMzMrO1rV+0CzMysdTjwzcwS4cA3M0uEA9/MLBEOfDOzRDjwzcwS4cC3NkNSR0kPS1oo6f6PcJxjJD1aydqqQdJvJX2t2nXYusOBb61O0tGSJkt6T9KbeTB9rgKHPhzoA/SMiCPW9iARcVdE7FuBelYjaU9JIemXjdbvmK+fUOZxLpF0Z0v7RcTQiLhtLcu1NsiBb61K0lnAT4DLycK5L3Ad8OUKHL4f8LeIqK/AsYoyF9hdUs+SdV8D/lapBpTx/237AL8prNVI2hi4DDg1In4ZEYsiYnlEPBwR5+b7tJf0E0mz8p+fSGqfb9tT0uuSzpb0Vv7p4Lh826XAaGBY/slhZOMzYUlb5mfStfnyCEmvSXpX0gxJx5Ssf6bkebtLmpR3FU2StHvJtgmSxkiamB/nUUm9mvlrWAb8Cjgqf34NcCRwV6O/q6sl/UvSO5Kek7RHvn4/4Fslr3NqSR3flTQReB8YkK87Pt9+vaQHSo7/fUlPSFLZ/4C23nPgW2v6D6AD8FAz+4wCdgN2AnYEPgNcVLJ9U2BjYAtgJPBTSd0j4mKyTw33RkTniPhZc4VI2gi4BhgaEV2A3YHn17BfD+CRfN+ewI+ARxqdoR8NHAdsAmwInNNc28DtwLH54y8BLwOzGu0ziezvoAdwN3C/pA4RMb7R69yx5DlfBU4EugAzGx3vbOBT+S+zPcj+7r4WnlslKQ58a009gXktdLkcA1wWEW9FxFzgUrIga7A83748IsYB7wED17KelcAOkjpGxJsR8fIa9vlPYHpE3BER9RFxDzANOLBkn1sj4m8RsRi4jyyomxQRfwB6SBpIFvy3r2GfOyOiLm/zh0B7Wn6dP4+Il/PnLG90vPeBr5D9wroTOD0iXm/heNbGOPCtNdUBvRq6VJqwOaufnc7M1606RqNfGO8DnT9sIRGxCBgGnAy8KekRSYPKqKehpi1KlmevRT13AKcBe7GGTzx5t9UreTfSArJPNc11FQH8q7mNEfEn4DVAZL+YLDEOfGtNzwJLgIOb2WcW2cXXBn35YHdHuRYBnUqWNy3dGBG/i4gvApuRnbXfXEY9DTW9sZY1NbgDOAUYl599r5J3uZxP1rffPSK6AQvJghqgqW6YZrtnJJ1K9klhFnDe2pdu6ysHvrWaiFhIdmH1p5IOltRJ0gaShkq6Mt/tHuAiSb3zi5+jybog1sbzwBBJffMLxhc2bJDUR9JBeV/+UrKuoRVrOMY4YNt8KGmtpGHAdsBv1rImACJiBvB5smsWjXUB6slG9NRKGg10Ldk+B9jyw4zEkbQt8B2ybp2vAudJarbrydoeB761qoj4EXAW2YXYuWTdEKeRjVyBLJQmAy8ALwJT8nVr09ZjwL35sZ5j9ZBuR3YhcxbwNln4nrKGY9QBB+T71pGdGR8QEfPWpqZGx34mItb06eV3wG/JhmrOJPtUVNpd0/ClsjpJU1pqJ+9CuxP4fkRMjYjpZCN97mgYAWVpkC/Sm5mlwWf4ZmaJcOCbmSXCgW9mlggHvplZIpr7AkxVdRx8mq8m2zpp/qRrq12CWZM61NLk/Eg+wzczS4QD38wsEQ58M7NEOPDNzBLhwDczS4QD38wsEQ58M7NEOPDNzBLhwDczS4QD38wsEQ58M7NEOPDNzBLhwDczS4QD38wsEQ58M7NEOPDNzBLhwDczS4QD38wsEQ58M7NEOPDNzBLhwDczS4QD38wsEQ58M7NEOPDNzBLhwDczS4QD38wsEQ58M7NEOPDNzBLhwDczS4QD38wsEQ58M7NEOPDNzBLhwDczS4QD38wsEQ58M7NEOPDNzBLhwDczS4QD38wsEQ58M7NE1BbdgKQaoE9pWxHxz6LbNTOz1RUa+JJOBy4G5gAr89UBfKrIds3M7IOKPsM/AxgYEXUFt2NmZi0oug//X8DCgtswM7MyFH2G/xowQdIjwNKGlRHxo4LbNTOzRooO/H/mPxvmP2ZmViWFBn5EXFrk8c3MrHxFj9J5mGxUTqmFwGTgxohYUmT7Zmb2b0VftH0NeA+4Of95h2yI5rb5spmZtZKi+/AHR8SQkuWHJT0VEUMkvVxw22ZmVqLoM/zekvo2LOSPe+WLywpu28zMShR9hn828IykVwEB/YFTJG0E3FZw223aDRcfw9AhOzD37XfZ9YjLV63/xlGf5+RhQ6hfsZLxT7/EqKt/za7b9+Pabw8HQILv3jCOsU++QOdO7Xn8f85c9dwtNunGL8ZN4tyrHmz112PpGX3RhTz1+wn06NGTX/76N9UuJwmKaHxNtcINSO2BQWSBP63cC7UdB59WbGHruc/uvBWL3l/KLWOOXRX4Q3bdhvOP/xKHnH4Dy5bX07t7Z+bOf4+OHTZg2fIVrFixkk17deX/7r2QAfuOYsWKlasdc+Jd53HeDx9k4pRXq/GS1hvzJ11b7RLahOcmT6JTp06MuvB8B34FdahFTW0r5Axf0t4R8b+SDm20aYAkIuKXRbSbkolTXqXvZj1WW3fiEXtw1a2PsWx5PQBz578HwOIly1ft037DDVjTL/mt+vZmkx5dHPbWanbZ9dO88cbr1S4jKUV16Xwe+F/gwDVsC8CBX4Ct+23CZwdvxaWnHsiSZcu58EcP8dxfsolJP71DP2645Cv03awHIy+67QNn90futwsPPDqlGmWbWSspJPAj4uL8z+M+zPMknQicCFD7sT2p7bV9AdW1XbU17ejetRNDjr2KXbfvx51Xfp1PHHAJAJNemskuh3+Xgf37cMtlX+V3E//C0mX1q557xJd2YeRFt1epcjNrDUV/8ao9cBiwJavPh3/ZmvaPiJuAm8B9+GvjjTkL+NUTUwGY/PJMVq4MenXvzLy8awfgrzPmsGjxMrbfenOm5Gf/n9x2C2pravjzK/+qSt1m1jqKHpb5a+DLQD2wqOTHCvDwhBfY8zPbArB1303YcINa5s1/j36b96SmJvun7rtZd7bdsg8zZ/17xuoj99uF+8ZPrkrNZtZ6ih6W+bGI2K/gNpJ02xUj2GOXbejVrTN/Hz+GMTeM47ZfPcuNlxzD5Pu/xbLlKzh+9B0A7D54AOccty/L61ewcmVwxuX3Urfg3793D/vizhx8+vXVeimWqPPPOYvJk/7EggXz+eLeQ/jGqadz6GFHVLusNq3QYZmSbgL+OyJe/LDPdZeOras8LNPWZa0+LLPE54ARkmaQzYcvICLCtzg0M2tlRQf+0IKPb2ZmZSr0om1EzAQ+DuydP36/6DbNzGzNCg1fSRcD5wMX5qs2AO4ssk0zM1uzos+2DwEOIh+KGRGzgC4Ft2lmZmtQdOAvi2wYUADks2SamVkVFB3490m6Eegm6QTgcXynKzOzqij6JuZXSfoi2a0NBwKjI+KxIts0M7M1K3pYJnnAPyapF1DX0v5mZlaMQrp0JO0maYKkX0oaLOkl4CVgjiRPtWBmVgVFneFfC3wL2JhsXvyhEfFHSYOAe4DxBbVrZmZNKOqibW1EPBoR9wOzI+KPABExraD2zMysBUUFfuntlBY32uZJ0czMqqCoLp0dJb1DNllax/wx+XKHgto0M7NmFHWLw5oijmtmZmvPE5mZmSXCgW9mlggHvplZIhz4ZmaJcOCbmSXCgW9mlggHvplZIhz4ZmaJcOCbmSXCgW9mlggHvplZIhz4ZmaJcOCbmSXCgW9mlggHvplZIhz4ZmaJcOCbmSXCgW9mlogmb3EoaefmnhgRUypfjpmZFaW5e9r+sJltAexd4VrMzKxATQZ+ROzVmoWYmVmxmjvDX0XSDsB2QIeGdRFxe1FFmZlZ5bUY+JIuBvYkC/xxwFDgGcCBb2a2HilnlM7hwBeA2RFxHLAj0L7QqszMrOLKCfzFEbESqJfUFXgLGFBsWWZmVmnl9OFPltQNuBl4DngP+FOhVZmZWcW1GPgRcUr+8AZJ44GuEfFCsWWZmVmllXPRdsia1kXEU8WUZGZmRSinS+fckscdgM+Qde34i1dmZuuRcrp0DixdlvRx4MrCKjIzs0KszeRprwM7VLoQMzMrVjl9+P9NNncOZL8gdgKmFlmUmZlVXlnDMkse1wP3RMTEguoxM7OClBP43SLi6tIVks5ovM7MzNZtiojmd5CmRMTOjdb9OSIGF1nYrAXLmi/MrEqW1q+sdglmTerfq4Oa2tbcDVCGA0cD/SWNLdnUBairXHlmZtYamuvS+QPwJtCL1W+G8i7gb9qama1nmrsBykxgpqRjgFkRsQRAUkfgY8A/WqVCMzOriHLG4d8HlHZargDuL6YcMzMrSjmBXxsRyxoW8scbFleSmZkVoZzAnyvpoIYFSV8G5hVXkpmZFaGccfgnA3dJujZffh04triSzMysCOVMnvYqsJukzmTj9t8tviwzM6u0Frt0JF0uqVtEvBcR70rqLuk7rVGcmZlVTjl9+EMjYkHDQkTMB/YvriQzMytCOYFfI6l9w0I+Dr99M/ubmdk6qJyLtncCT0i6NV8+DrituJLMzKwI5Vy0vVLSC8A+gIDxQL+iCzMzs8oq945Xs8m+bXsY8AXglcIqMjOzQjQ3W+a2wFHAcLLZMe8lG5a5VyvVZmZmFdRcl8404GngwIj4O4CkM1ulKjMzq7jmunQOI+vKeVLSzZK+QNaHb2Zm66EmAz8iHoqIYcAgYAJwJtBH0vWS9m2l+szMrEJavGgbEYsi4q6IOIBsHvzngQsKr8zMzCqqxXvaVovvaWvrKt/T1tZlzd3TttxhmWZmtp5z4JuZJcKBb2aWCAe+mVkiHPhmZolw4JuZJcKBb2aWCAe+mVkiHPhmZolw4JuZJcKBb2aWCAe+mVkiHPhmZolw4JuZJcKBb2aWCAe+mVkiHPhmZolw4JuZJcKBb2aWCAe+mVkiCgt8SSPXsO57RbVnZmbNqy3w2IdLWhIRdwFIug5oX2B7ZmbWjCID/1BgrKSVwFDg7Yg4pcD2zMysGRUPfEk9ShaPB34FTAQuk9QjIt6udJtmZtYyRURlDyjNAAJQyZ8NIiIGlHOcWQuWVbYwswpZWr+y2iWYNal/rw5qalvFz/Ajon+lj2lmZh9dkaN0TpXUrWS5uyT34ZuZVUmR4/BPiIgFDQsRMR84ocD2zMysGUUGfjtJq/qSJNUAGxbYnpmZNaPIYZm/A+6TdAPZxduTgfEFtpes74/5Nn+c+BTduvfg1nseAuDnN1/HI79+kI27dQfg+G98k90+O4RXXn6RH15xKQARwYgTTmGPPb9QtdqtbZs7ZzY/GDOK+W/XIYn9v3w4Bx95DO++s5DLv30ec2bPos+mm/OtMT+gS9euLF++nGuuvIzp0/6C2rXj5DPOY8edP13tl9FmVHyUzqoDS+2Ak4AvkI3UeRS4JSJWlPN8j9Ip39Q/T6Zjx05ccemo1QK/Y8dODPvKiNX2XbJkMRvUbkBNbS118+Zy/FcO54HfPEFNbZG/+9sWj9IpX928ubxdN49tBn6C9xct4vSRRzH6ip/w2LixdOnalWFfHcm9d/yM9959h5GnnMnYB3/B9Gkvc/aoMSyYX8dFZ5/KNbfcTbt2ngWmXM2N0insbzEiVkbE9RFxeEQcFhE3lhv29uHsOHhXunbduKx9O3TouCrcly1bSpPvDLMK6NmrN9sM/AQAnTbaiI/3G0Dd3Ld49ukn2WfoQQDsM/Qg/vDUkwD88x+vsdOu/w+Abt170rlzF6ZPe7k6xbdBRY7S2UbSA5L+Ium1hp+i2rMPeuiBexh5zKF8f8y3efedhavW/+WlFxhx1MF8/ehDOfOC0T67t1Yx+803eHX6NAZu/0kWzH+bnr16A9kvhYULsu9jDth6W559egIr6uuZPet1pv/1FebOmVPNstuUIj8n3QpcD9QDewG3A3c09wRJJ0qaLGnynT+/pcDS2r6DDj2Sux4cx813PEDPXr257uqrVm3bbodP8fNf/Iobbv0Fd992C8uWLq1ipZaCxe+/z3dGnc1J3zyXjTbq3OR+X/rPg+nduw+njzyaG67+AdvtsCM1tTWtWGnbVuSpXceIeEKSImImcImkp4GLm3pCRNwE3ATuw/+oevTsterxAV8+jAvPPu0D+/TrP4AOHToy47W/M/AT27dmeZaQ+vrljBl1Fnvtuz+f23MfALp170HdvLn07NWbunlz2bhbNiNLTW0tJ51x7qrnnnnSsWz+sb5VqbstKvIMf0l+4Xa6pNMkHQJsUmB7VqJu3txVj5/+/RP0H7A1AG/Oep0V9fUAzH5zFv/65z/YdLPNq1KjtX0RwY+vuIS+/QZw2FHHrlq/2+f25PHfjgXg8d+O5T/22AvIBhUsWfw+AFP+9Cw1NTX0679V6xfeRhU5SufTwCtAN2AM0BW4MiL+r5zn+wy/fGMuOo/np0xi4YIFdO/RgxEnnsrU5ybx9+nTkMSmm23BWReMpmev3jw67mHuvv1n1NbW0q5dO44deRKf+7yHZX4YHqVTvpemTuGcU45jy622oZ2y88sRJ53OoO0/yeXfPpe35sxmkz6bMuo7V9Gl68bMfvMNRp35Ddq1a0fP3ptw5oWX0GdTn5B8GM2N0iky8I+IiPtbWtcUB76tqxz4ti6ryrBM4MIy15mZWSsoYj78ocD+wBaSrinZ1JVsxI6ZmVVBEaN0ZgGTgYOA50rWvwucWUB7ZmZWhiLmw58KTJXUJyJuK90m6Qzg6kq3aWZmLSuyD/+oNawbUWB7ZmbWjCL68IcDRwP9JY0t2dQVmFfp9szMrDxF9OH/AXgT6AX8sGR9AMMKaM/MzMpQRB/+TGAm8B+SdiI72z8SmAE8WOn2zMysPEV06WxL1n8/HKgD7iX7gtdelW7LzMzKV0SXzjTgaeDAiPg7gCQPxzQzq7IiRukcBswGnpR0s6SGO16ZmVkVVTzwI+KhiBgGDAImkH3Zqo+k6yXtW+n2zMysPEXe4nBRRNwVEQcAHwOeBy4oqj0zM2teYbNlflSeLdPWVZ4t09Zl1Zot08zM1iEOfDOzRDjwzcwS4cA3M0uEA9/MLBEOfDOzRDjwzcwS4cA3M0uEA9/MLBEOfDOzRDjwzcwS4cA3M0uEA9/MLBEOfDOzRDjwzcwS4cA3M0uEA9/MLBEOfDOzRDjwzcwS4cA3M0uEA9/MLBEOfDOzRDjwzcwS4cA3M0uEA9/MLBEOfDOzRDjwzcwS4cA3M0uEA9/MLBEOfDOzRDjwzcwS4cA3M0uEA9/MLBEOfDOzRDjwzcwS4cA3M0uEIqLaNVgrkHRiRNxU7TrMGvN7s/X4DD8dJ1a7ALMm+L3ZShz4ZmaJcOCbmSXCgZ8O95HausrvzVbii7ZmZonwGb6ZWSIc+GZmiXDgrwckrZD0vKSpkqZI2v0jHOsySftUsj4zAEmHSApJg/LlLSUdXbJ9J0n7f4Tj/0NSr0rUmioH/vphcUTsFBE7AhcCV6ztgSJidEQ8XrnSzFYZDjwDHJUvbwkcXbJ9J2CtA98+Ogf++qcrML9hQdK5kiZJekHSpfm6LSW9IulmSS9LelRSx3zbzyUdnj/eX9I0Sc9IukbSb/L1l0j6H0kTJL0m6ZtVeJ22HpHUGfgsMJJ/B/73gD3yT6fnA5cBw/LlYZI+I+kPkv6c/zkwP1aNpKskvZi/r09v1FZHSeMlndCKL7FNqK12AVaWjpKeBzoAmwF7A0jaF9gG+AwgYKykIcA/8/XDI+IESfcBhwF3NhxQUgfgRmBIRMyQdE+jNgcBewFdgL9Kuj4ilhf5Im29djAwPiL+JultSTsDFwDnRMQBAJLmALtGxGn5cley91993s14Odn79ESgPzA439ajpJ3OwC+A2yPi9lZ7dW2Ez/DXDw1dOoOA/YDbJQnYN//5MzCFLKS3yZ8zIyKezx8/R/bxutQg4LWImJEvNw78RyJiaUTMA94C+lTyBVmbM5wsiMn/HF7GczYG7pf0EvBjYPt8/T7ADRFRDxARb5c859fArQ77teMz/PVMRDybX7jqTXZWf0VE3Fi6j6QtgaUlq1YAHRsdSi001fj5fq/YGknqSfapcwdJAdQAAYxr4aljgCcj4pD8PTuh4ZD589dkIjBU0t3hLxF9aD7DX8/kIyBqgDrgd8DX8/5TJG0haZMyDzUNGJD/RwMYVuFSLR2Hk3Wx9IuILSPi48AMYCVZl2CDdxstbwy8kT8eUbL+UeBkSbUAjbp0RpO996+r6CtIhAN//dAxv9D1PHAv8LWIWBERjwJ3A89KehF4gNX/QzUpIhYDpwDjJT0DzAEWFlO+tXHDgYcarXuQ7OJtfT6c+EzgSWC7hou2wJXAFZImkp3ENLiF7DrUC5KmsvpIH4D/AjpIurKA19KmeWqFhEnqHBHv5dcDfgpMj4gfV7suMyuGz/DTdkL+qeFlso/XN7awv5mtx3yGb2aWCJ/hm5klwoFvZpYIB76ZWSIc+NZmlcwy+pKk+yV1+gjH2rNkrqGDJF3QzL7dJJ2yFm1cIumcta3RrCUOfGvLGqak2AFYBpxculGZD/1/ICLGRsT3mtmlG9l3HMzWKQ58S8XTwNYlM4leRzb/0Mcl7Svp2fxeA/eXfHN5v4bZRIFDGw4kaYSka/PHfSQ9lH+5aKqyexV8D9gq/3Txg3y/D8xqmq8fJemvkh4HBrba34YlyYFvbV7+Ff2hwIv5qoFkUwEMBhYBFwH7RMTOwGTgrHw20ZuBA4E9gE2bOPw1wO/zexXsTPadhguAV/NPF+c2mtV0J2AXSUMk7UL2bdTBZL9QPl3hl262Gk+IZW1Zw7TSkJ3h/wzYHJgZEX/M1+8GbAdMzL5wzIbAs2Szic6IiOkAku4km7a3sb2BYwEiYgWwUFL3RvuUzmoK2RS/25BNg/FQRLyftzH2I71asxY48K0tWxwRO5WuyEN9Uekq4LGIGN5ov51oesbGD6upWU3/q4JtmLXIXTqWuj8Cn5W0NYCkTpK2JZtNtL+krfL9mprf/QngG/lza/KbejSeFbKpWU2fAg7J7+DUhaz7yKwwDnxLWkTMJZua9x5JL5D9AhgUEUvIunAeyS/azmziEGcAe+WzlT4HbB8RdWRdRC9J+kFTs5pGxBSy2U+fJ5td8unCXqgZnkvHzCwZPsM3M0uEA9/MLBEOfDOzRDjwzcwS4cA3M0uEA9/MLBEOfDOzRPx/QQOreDywqpcAAAAASUVORK5CYII=",
+      "text/plain": [
+       "<Figure size 432x288 with 1 Axes>"
       ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 112
-        },
-        "id": "TFbfVi_-W6GT",
-        "outputId": "68facf43-ac41-44de-d59a-a489627cc893"
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/html": [
-              "<div>\n",
-              "<style scoped>\n",
-              "    .dataframe tbody tr th:only-of-type {\n",
-              "        vertical-align: middle;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe tbody tr th {\n",
-              "        vertical-align: top;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe thead th {\n",
-              "        text-align: right;\n",
-              "    }\n",
-              "</style>\n",
-              "<table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              "    <tr style=\"text-align: right;\">\n",
-              "      <th></th>\n",
-              "      <th>type</th>\n",
-              "      <th>accuracy</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <th>0</th>\n",
-              "      <td>Benign</td>\n",
-              "      <td>1.00</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>1</th>\n",
-              "      <td>Attack</td>\n",
-              "      <td>0.29</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table>\n",
-              "</div>"
-            ],
-            "text/plain": [
-              "     type  accuracy\n",
-              "0  Benign      1.00\n",
-              "1  Attack      0.29"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        }
+     },
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# Create confusion matrix\n",
+    "conf_matrix = confusion_matrix(y_true, y_pred)\n",
+    "\n",
+    "# Show confusion matrix\n",
+    "ax = plt.subplot()\n",
+    "sns.heatmap(conf_matrix, annot=True, ax=ax, cmap=\"Blues\", fmt=\"g\", cbar=False)\n",
+    "\n",
+    "# Add labels, title and ticks\n",
+    "ax.set_xlabel(\"Predicted\")\n",
+    "ax.set_ylabel(\"Acctual\")\n",
+    "ax.set_title(\"Confusion Matrix\")\n",
+    "ax.xaxis.set_ticklabels([\"Benign\", \"Attack\"])\n",
+    "ax.yaxis.set_ticklabels([\"Benign\", \"Attack\"])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "-H7rWFguYTNZ"
+   },
+   "source": [
+    "Now we can calculate overall accuracy and per class accuracy."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "uHTInLt2YTNZ",
+    "outputId": "e48f2726-5eba-4d17-ffe7-7b17c62d5ee8"
+   },
+   "outputs": [
+    {
+     "output_type": "stream",
+     "text": [
+      "Accuracy: 0.923\n",
+      "Precision: 0.995\n",
+      "Recall: 0.577\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Calculate accuracy\n",
+    "acc = accuracy_score(y_true, y_pred, normalize=True, sample_weight=None)\n",
+    "precision = precision_score(y_true, y_pred)\n",
+    "recall = recall_score(y_true, y_pred)\n",
+    "\n",
+    "print(f\"Accuracy: {acc:.3f}\")\n",
+    "print(f\"Precision: {precision:.3f}\")\n",
+    "print(f\"Recall: {recall:.3f}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 112
+    },
+    "id": "ZNzyqAH9YTNZ",
+    "outputId": "7d2e35a1-df80-47bc-f5e7-fd425e79a7f4"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>type</th>\n",
+       "      <th>accuracy</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Benign</td>\n",
+       "      <td>1.00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Attack</td>\n",
+       "      <td>0.58</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
       ],
-      "source": [
-        "# Calculate per class accuracy\n",
-        "cmd = confusion_matrix(y_true, y_pred_model, normalize=\"true\").diagonal()\n",
-        "per_class_accuracy_df = pd.DataFrame([(index, round(value,4)) for index, value in zip(['Benign', 'Attack'], cmd)], columns = ['type', 'accuracy'])\n",
-        "per_class_accuracy_df = per_class_accuracy_df.round(2)\n",
-        "display(per_class_accuracy_df)"
-      ]
-    },
-    {
-      "attachments": {},
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "_E5rnFehXSls"
-      },
-      "source": [
-        "As we can see, the direct application of our model produced much worse results. Pinecone's similarity search over the same model's embeddings improved our threat detection (i.e., \"Attack\") accuracy by over 50%!"
-      ]
-    },
-    {
-      "attachments": {},
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "OiT0JgQxktOC"
-      },
-      "source": [
-        "### Result summary"
-      ]
-    },
-    {
-      "attachments": {},
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "UBBv6-dnfEeX"
-      },
-      "source": [
-        "Using standard vector embeddings with Pinecone's similarity search service, we detected 85% of the attacks while keeping a low 3% false-positive rate. We also showed that our similarity search approach outperforms the direct classification approach that utilizes the classifier's embedding model. Similarity search-based detection gained 50% higher accuracy compared to the direct detector.\n",
-        "\n",
-        "[Original published results](https://github.com/rambasnet/DeepLearning-IDS/blob/master/graphics/confusion_matrices/) for 02-22-2018 show that the model was able to correctly detect 208520 benign cases out of 208520 benign cases, and 24 (18+1+5) attacks out of 70 attacks in the test set making this model **34.3% accurate in predicting attacks**. For testing purposes, 20% of the data for 02-22-2018 was used. \n",
-        "\n",
-        "![02-22-2018--6-15%281%29.png](https://raw.githubusercontent.com/rambasnet/DeepLearning-IDS/master/graphics/confusion_matrices/02-22-2018--6-15(1).png)\n",
-        "\n",
-        "As you can see, the model's performance for creating embeddings for Pinecone was much higher. \n",
-        "\n",
-        "The model we have created follows the academic paper ([model for the same date](https://github.com/rambasnet/DeepLearning-IDS/blob/master/keras_tensorflow_models/) (02-23-2018)) and is slightly modified, but still a straightforward, sequential, shallow model. We have changed the number of classes from four (Benign, BruteForce-Web, BruteForce-XSS, SQL-Injection) to two (Benign and Attack), only interested in whether we are detecting an attack or not. We have also changed validation metrics to precision and recall. These changes improved our results. Yet, there is still room for further improvements, for example, by adding more data covering multiple days and different types of attacks."
-      ]
-    },
-    {
-      "attachments": {},
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "JdZcEl1pfEeX"
-      },
-      "source": [
-        "## Delete the Index"
-      ]
-    },
-    {
-      "attachments": {},
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "hyxLeOnSfEeX"
-      },
-      "source": [
-        "Delete the index once you are sure that you do not want to use it anymore. Once it is deleted, you cannot reuse it."
+      "text/plain": [
+       "     type  accuracy\n",
+       "0  Benign      1.00\n",
+       "1  Attack      0.58"
       ]
+     },
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# Calculate per class accuracy\n",
+    "cmd = confusion_matrix(y_true, y_pred, normalize=\"true\").diagonal()\n",
+    "per_class_accuracy_df = pd.DataFrame(\n",
+    "    [(index, round(value, 4)) for index, value in zip([\"Benign\", \"Attack\"], cmd)],\n",
+    "    columns=[\"type\", \"accuracy\"],\n",
+    ")\n",
+    "per_class_accuracy_df = per_class_accuracy_df.round(2)\n",
+    "display(per_class_accuracy_df)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "Gfy_LW5zXIj6"
+   },
+   "source": [
+    "We got great results using Pinecone! Let's see what happens if we skip the similarity search step and predict values from the model directly. In other words, let's use the model that created the embeddings as a classifier. It would be interesting to compare its and the similarity search approach accuracy. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "Oxya9-mMYh5A"
+   },
+   "outputs": [],
+   "source": [
+    "from keras.utils.np_utils import normalize\n",
+    "import numpy as np\n",
+    "\n",
+    "data_sample = normalize(data_22_cleaned.iloc[:, :-1])[-2000:]\n",
+    "y_pred_model = model.predict(normalize(data_sample)).flatten()\n",
+    "y_pred_model = np.round(y_pred_model)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 313
     },
+    "id": "GWssFePDXEks",
+    "outputId": "9eee2c60-f1c1-4a34-c682-665389fe3aee"
+   },
+   "outputs": [
     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "ywvaJrVkfEeX"
-      },
-      "outputs": [],
-      "source": [
-        "pc.delete_index(index_name)"
+     "data": {
+      "text/plain": [
+       "[Text(0, 0.5, 'Benign'), Text(0, 1.5, 'Attack')]"
       ]
+     },
+     "output_type": "execute_result"
     },
     {
-      "attachments": {},
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Hu2mIbHms3k7"
-      },
-      "source": [
-        "---"
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAEWCAYAAABliCz2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAbqElEQVR4nO3dd5xV9Z3G8c8DI02kI1ixy0Z3xRLjakBRg2JXVAQrKqyxxMTgWhcs0aBJTIwaxbI2jFGz9oKokYglu6JR7A2ChSJNlCbM8N0/zhlyGZmZK94zl5nzvF+vec099fe9w/DMub9zzu8oIjAzs6avWbkLMDOzhuHANzPLCQe+mVlOOPDNzHLCgW9mlhMOfDOznHDgW5MhqbWkRyTNl3Tfd9jP0ZLGlbK2cpD0hKTjy12HrTkc+NbgJA2WNFHSAknT02D6YQl2fTjQDegcEUes7k4i4q6I6FeCelYiaQ9JIen+GvO3S+ePL3I/F0kaU996EdE/Im5fzXKtCXLgW4OSdBbwO+ByknDeGPgDcHAJdt8DeD8iKkuwr6zMAnaV1Llg3vHA+6VqQAn/37Zv8C+FNRhJ7YFLgNMi4v6IWBgRyyLikYg4O12npaTfSZqWfv1OUst02R6SPpX0c0mfp58OhqTLLgZGAAPTTw4n1TwSlrRJeiRdkU6fIGmypK8kTZF0dMH85wu221XSy2lX0cuSdi1YNl7SpZJeSPczTlKXOn4MS4EHgaPS7ZsDRwJ31fhZXS3pE0lfSnpFUu90/r7A+QXv8/WCOi6T9AKwCNgsnXdyuvx6SX8u2P8Vkp6RpKL/Aa3Rc+BbQ/p3oBXwQB3rXADsAvQCtgN2Bi4sWN4daA9sAJwEXCepY0SMJPnUcE9EtI2IW+oqRNLawO+B/hGxDrAr8Noq1usEPJau2xm4CnisxhH6YGAIsC7QAhheV9vAHcBx6et9gLeAaTXWeZnkZ9AJ+CNwn6RWETG2xvvcrmCbY4FhwDrA1Br7+znwb+kfs94kP7vjw2Or5IoD3xpSZ2B2PV0uRwOXRMTnETELuJgkyKotS5cvi4jHgQXA1qtZz3JgW0mtI2J6RLy1inX2Bz6IiDsjojIi7gbeBQ4sWOfWiHg/IhYD95IEda0i4kWgk6StSYL/jlWsMyYi5qRt/gZoSf3v87aIeCvdZlmN/S0CjiH5gzUGOCMiPq1nf9bEOPCtIc0BulR3qdRifVY+Op2azluxjxp/MBYBbb9tIRGxEBgInAJMl/SYpJ5F1FNd0wYF0zNWo547gdOBvqziE0/abfVO2o30Bcmnmrq6igA+qWthRPwfMBkQyR8myxkHvjWkl4AlwCF1rDON5ORrtY35ZndHsRYCbQqmuxcujIgnI+JHwHokR+03FVFPdU2frWZN1e4ETgUeT4++V0i7XM4h6dvvGBEdgPkkQQ1QWzdMnd0zkk4j+aQwDfjP1S/dGisHvjWYiJhPcmL1OkmHSGojaS1J/SVdma52N3ChpK7pyc8RJF0Qq+M1oI+kjdMTxudVL5DUTdJBaV/+1yRdQ1Wr2MfjwFbppaQVkgYC3wMeXc2aAIiIKcDuJOcsaloHqCS5oqdC0gigXcHymcAm3+ZKHElbAb8g6dY5FvhPSXV2PVnT48C3BhURVwFnkZyInUXSDXE6yZUrkITSRGAS8Abwajpvddp6Crgn3dcrrBzSzUhOZE4D5pKE76mr2Mcc4IB03TkkR8YHRMTs1ampxr6fj4hVfXp5EniC5FLNqSSfigq7a6pvKpsj6dX62km70MYAV0TE6xHxAcmVPndWXwFl+SCfpDczywcf4ZuZ5YQD38wsJxz4ZmY54cA3M8uJum6AKavW25/us8m2Rpr38rXlLsGsVq0qqHV8JB/hm5nlhAPfzCwnHPhmZjnhwDczywkHvplZTjjwzcxywoFvZpYTDnwzs5xw4JuZ5YQD38wsJxz4ZmY54cA3M8sJB76ZWU448M3McsKBb2aWEw58M7OccOCbmeWEA9/MLCcc+GZmOeHANzPLCQe+mVlOOPDNzHLCgW9mlhMOfDOznHDgm5nlhAPfzCwnHPhmZjnhwDczywkHvplZTjjwzcxywoFvZpYTDnwzs5xw4JuZ5YQD38wsJxz4ZmY54cA3M8sJB76ZWU448M3McsKBb2aWExVZNyCpOdCtsK2I+Djrds3MbGWZBr6kM4CRwExgeTo7gH/Lsl0zM/umrI/wzwS2jog5GbdjZmb1yLoP/xNgfsZtmJlZEbI+wp8MjJf0GPB19cyIuCrjds3MrIasA//j9KtF+mVmZmWSaeBHxMVZ7t/MzIqX9VU6j5BclVNoPjARGB0RS7Js38zM/inrk7aTgQXATenXlySXaG6VTpuZWQPJug9/+4joUzD9iKTnIqKPpLcybtvMzApkfYTfVdLG1RPp6y7p5NKM2zYzswJZH+H/HHhe0keAgE2BUyWtDdyecdtN2g0jj6Z/n22ZNfcrdjri8hXzf3zU7pwysA+VVcsZO+FNLrj6IXbapgfX/tcgACS47IbHefjZSQAcue+OnH3iPkQE02fN58QLb2fOFwvL8p4sf16Y8BxXjLqM5VXLOXTAEZw0dFi5S2rSFFHznGqJG5BaAj1JAv/dYk/Utt7+9GwLa+R222FzFi76mpsvPW5F4PfZaUvOOXkfDj3jBpYuq6Rrx7bMmreA1q3WYumyKqqqltO9Szv+957z2KzfBQBMHncZOwz4BXO+WMhlZx7MoiXLuGz04+V8a2u8eS9fW+4SmoSqqioO2n8fRt90K926dWPwwMMZ9aur2HyLLcpdWqPWqgLVtiyTLh1Je6bfDwP2BzYHNgP2S+fZd/TCqx8xd/6ileYNO6I3v771KZYuqwRg1rwFACxesoyqqmQoo5Yt1qL6j7yUfK3dOrlFYp22rZk+yzdGW8N4841JbLRRDzbcaCPWatGCfffbn/HPPlPuspq0rLp0dgf+Ahy4imUB3J9Ru7m2RY912W37zbn4tANZsnQZ5131AK+8nQxM+v1te3DDRcew8XqdOOnC21f8ATjz8nt4+d7zWbh4KR99Mouf/vKecr4Fy5HPZ86k+3rdV0yv260bb0yaVMaKmr5MjvAjYmT6fcgqvk6sbTtJwyRNlDSxcrYv4vm2Kpo3o2O7NvQ57tec/9sHGXPlP3/UL785lR0Pv4wfHnMlZ5/Yj5YtKqioaMbQw3uzy6Ar2KzfBbz5/mecfWK/Mr4Dy5P4xi06INXaG2ElkPWNVy2BAcAmrDwe/iWrWj8ibgRuBPfhr47PZn7Bg8+8DsDEt6ayfHnQpWNbZqddOwDvTZnJwsVL2WaL9Vd09E35dDYAf37qVYYPceBbw+jWrTszps9YMf35zJmsu+66Zayo6cv6ssyHgIOBSmBhwZdl4JHxk9hj560A2GLjdWmxVgWz5y2gx/qdad48+afeeL2ObLVJN6ZOm8O0WfPpuVl3unRsC8Beu/TkvSkzat2/WSlts+2/8vHH/+DTTz9h2dKljH38MXbvu2e5y2rSsr4sc8OI2DfjNnLp9l+eQO8dt6RLh7Z8OPZSLr3hcW5/8CVGX3Q0E+87n6XLqjh5xJ0A7Lr9Zgwf0o9llVUsXx6cefk9Ky69vPzGJ3jq5p+yrLKKj6fPZdjIMeV8W5YjFRUVnHfBCH487GSWL6/ikEMHsMUWW5a7rCYt08syJd0IXBMRb3zbbd2lY2sqX5Zpa7K6LsvM+gj/h8AJkqaQjIcvICLCjzg0M2tgWQd+/4z3b2ZmRcr0pG1ETAU2AvZMXy/Kuk0zM1u1TMNX0kjgHOC8dNZagM8KmpmVQdZH24cCB5FeihkR04B1Mm7TzMxWIevAXxrJZUABkI6SaWZmZZB14N8raTTQQdJQ4Gn8pCszs7LI+iHmv5b0I5JHG24NjIiIp7Js08zMVi3ryzJJA/4pSV2AOVm3Z2Zmq5bVePi7SBov6X5J20t6E3gTmCnJQy2YmZVBVkf41wLnA+1JxsXvHxF/k9QTuBsYm1G7ZmZWi6xO2lZExLiIuA+YERF/A4iIdzNqz8zM6pFV4C8veL24xjIPimZmVgZZdelsJ+lLksHSWqevSadbZdSmmZnVIZPAj4jmWezXzMxWnwcyMzPLCQe+mVlOOPDNzHLCgW9mlhMOfDOznHDgm5nlhAPfzCwnHPhmZjnhwDczywkHvplZTjjwzcxywoFvZpYTDnwzs5xw4JuZ5YQD38wsJxz4ZmY54cA3M8sJB76ZWU7U+ohDSTvUtWFEvFr6cszMLCt1PdP2N3UsC2DPEtdiZmYZqjXwI6JvQxZiZmbZqusIfwVJ2wLfA1pVz4uIO7IqyszMSq/ewJc0EtiDJPAfB/oDzwMOfDOzRqSYq3QOB/YCZkTEEGA7oGWmVZmZWckVE/iLI2I5UCmpHfA5sFm2ZZmZWakV04c/UVIH4CbgFWAB8H+ZVmVmZiVXb+BHxKnpyxskjQXaRcSkbMsyM7NSK+akbZ9VzYuI57IpyczMslBMl87ZBa9bATuTdO34xiszs0akmC6dAwunJW0EXJlZRWZmlonVGTztU2DbUhdiZmbZKqYP/xqSsXMg+QPRC3g9y6LMzKz0iross+B1JXB3RLyQUT1mZpaRYgK/Q0RcXThD0pk155mZ2ZpNEVH3CtKrEbFDjXl/j4jtsyzsvRmL6i7MrEw6tW1R7hLMatW1bYVqW1bXA1AGAYOBTSU9XLBoHWBO6cozM7OGUFeXzovAdKALKz8M5SvAd9qamTUydT0AZSowVdLRwLSIWAIgqTWwIfCPBqnQzMxKopjr8O8FlhdMVwH3ZVOOmZllpZjAr4iIpdUT6WuftTIza2SKCfxZkg6qnpB0MDA7u5LMzCwLxVyHfwpwl6Rr0+lPgeOyK8nMzLJQzOBpHwG7SGpLct3+V9mXZWZmpVZvl46kyyV1iIgFEfGVpI6SftEQxZmZWekU04ffPyK+qJ6IiHnAftmVZGZmWSgm8JtLalk9kV6H37KO9c3MbA1UzEnbMcAzkm5Np4cAt2dXkpmZZaGYk7ZXSpoE7A0IGAv0yLowMzMrrWKfeDWD5G7bAcBewDuZVWRmZpmoa7TMrYCjgEEko2PeQ3JZZt8Gqs3MzEqori6dd4EJwIER8SGApJ81SFVmZlZydXXpDCDpynlW0k2S9iLpwzczs0ao1sCPiAciYiDQExgP/AzoJul6Sf0aqD4zMyuRek/aRsTCiLgrIg4gGQf/NeDczCszM7OSqveZtuXiZ9ramsrPtLU1WV3PtC32skwzM2vkHPhmZjnhwDczywkHvplZTjjwzcxywoFvZpYTDnwzs5xw4JuZ5YQD38wsJxz4ZmY54cA3M8sJB76ZWU448M3McsKBb2aWEw58M7OccOCbmeWEA9/MLCcc+GZmOeHANzPLCQe+mVlOZBb4kk5axbxRWbVnZmZ1q8hw34dLWhIRdwFI+gPQMsP2zMysDlkG/mHAw5KWA/2BuRFxaobtmZlZHUoe+JI6FUyeDDwIvABcIqlTRMwtdZtmZlY/RURpdyhNAQJQwfdqERGbFbOf92YsKm1hZiXSqW2LcpdgVquubStU27KSH+FHxKal3qeZmX13WV6lc5qkDgXTHSW5D9/MrEyyvA5/aER8UT0REfOAoRm2Z2Zmdcgy8JtJWtGXJKk54M5PM7MyyfKyzCeBeyXdQHLy9hRgbIbt5dasz2fwu8v+i3lz56BmYp8DB3DQ4YP54603MO7R+2nfoSMAxw49nZ126U1l5TKuufISJr//LlVVVfTdZ3+OOOYb98mZlcTlF1/IixP+SsdOnbjz3ocA+HL+F4w4bzgzpn1G9/U34JJRv6Fdu/YrtpkxfRrHHnEQQ4adxuDjhpSr9CYny8A/B/gP4MckV+qMA27OsL3cat68OSeedhabb/UvLFq0kLOGDqbXTj8A4OAjjuHQo45baf0Xnn2aymVLuea2+/h6yWJOO34AffbqT7f11i9H+dbE7XfgIQw4cjC/GHneinljbruZHb//A44dMpQ7b72JMbfdzKk/+fmK5ddcdQU/2LV3Ocpt0jLr0omI5RFxfUQcHhEDImJ0RFRl1V6ederclc23+hcA2rRZmw17bMqcWbNq30CwZPESqior+frrr6moWIs2a6/dQNVa3vTaYSfatW+/0rwJf32W/gccAkD/Aw5hwvi/rFj23LPPsP4GG7Hp5ls0aJ15kOVVOltK+rOktyVNrv7Kqj1LzJw+jckfvMfW39sWgMce+BNnDDmSq0ddxIKvvgRgtz32plXrVhx/2I846cj+HDLwONZp176u3ZqV1Lw5c+jStSsAXbp2Zd7c5H7MxYsXcdfttzBk2I/LWV6TleVJ21uB64FKoC9wB3BnXRtIGiZpoqSJ99z53xmW1jQtXrSIUSOGc/IZw2mzdlv6H3wEo//4CFff8ic6de7CLdddBcD777xFs2bNue3+cdz0p8d46N47mTHt0zJXbwa33HAdRw4+jjZt/IkzC1n24beOiGckKSKmAhdJmgCMrG2DiLgRuBF8p+23VVm5jFEjhrP73v3Ztc9eAHTs1HnF8n4HHMal5/0EgOeefoIddt6Vioq16NCxEz237cWH775N9/U3LEvtlj8dO3dm9qxZdOnaldmzZtGxUzIiy9tvTmL8M+O4/ve/YcFXX6FmomXLFgwYeHSZK24asgz8JZKaAR9IOh34DFg3w/ZyKyK45oqL2bDHphwy8NgV8+fOmUWnzsnH5r9N+As9Nt0cgK7dujPp1ZfZo9/+fL1kCe+/PYmDjhhcltotn37Ypy9PPPogxw4ZyhOPPkjv3fsC8Idb/tkJcMvo62jduo3DvoRKPpbOih1L3wfeAToAlwLtgCsj4n+L2d5H+MV7e9LfOfeME+mx2ZY0a5bc+nDs0NN57uknmfLheyDRrft6nDr8Qjp17sriRYu4etRIPpk6GSLYq//BHDbo+DK/i8bDY+l8OyPPH85rE1/miy++oFPnzpz0H6fRe4+9GHHuWcycMZ1u3dfj0iuuol37DittVx34vizz26lrLJ0sA/+IiLivvnm1ceDbmsqBb2uyugI/y5O25xU5z8zMGkAW4+H3B/YDNpD0+4JF7Uiu2DEzszLI4qTtNGAicBDwSsH8r4CfZdCemZkVIYvx8F8HXpfULSJuL1wm6Uzg6lK3aWZm9cuyD/+oVcw7IcP2zMysDln04Q8CBgObSnq4YFE7YHap2zMzs+Jk0Yf/IjAd6AL8pmB+AAMzaM/MzIqQRR/+VGAq8O+SepEc7R8JTAH+p9TtmZlZcbLo0tmKpP9+EDAHuIfkBq++pW7LzMyKl0WXzrvABODAiPgQQJIvxzQzK7MsrtIZAMwAnpV0k6S9SJ54ZWZmZVTywI+IByJiINATGE9ys1U3SddL6lfq9szMrDhZPuJwYUTcFREHABsCrwHnZtWemZnVLbPRMr8rj5ZpayqPlmlrsnKNlmlmZmsQB76ZWU448M3McsKBb2aWEw58M7OccOCbmeWEA9/MLCcc+GZmOeHANzPLCQe+mVlOOPDNzHLCgW9mlhMOfDOznHDgm5nlhAPfzCwnHPhmZjnhwDczywkHvplZTjjwzcxywoFvZpYTDnwzs5xw4JuZ5YQD38wsJxz4ZmY54cA3M8sJB76ZWU448M3McsKBb2aWEw58M7OccOCbmeWEA9/MLCcc+GZmOeHANzPLCQe+mVlOOPDNzHLCgW9mlhOKiHLXYA1A0rCIuLHcdZjV5N/NhuMj/PwYVu4CzGrh380G4sA3M8sJB76ZWU448PPDfaS2pvLvZgPxSVszs5zwEb6ZWU448M3McsKB3whIqpL0mqTXJb0qadfvsK9LJO1dyvrMACQdKikk9UynN5E0uGB5L0n7fYf9/0NSl1LUmlcO/MZhcUT0iojtgPOAX67ujiJiREQ8XbrSzFYYBDwPHJVObwIMLljeC1jtwLfvzoHf+LQD5lVPSDpb0suSJkm6OJ23iaR3JN0k6S1J4yS1TpfdJunw9PV+kt6V9Lyk30t6NJ1/kaT/ljRe0mRJPynD+7RGRFJbYDfgJP4Z+KOA3umn03OAS4CB6fRASTtLelHS39PvW6f7ai7p15LeSH+vz6jRVmtJYyUNbcC32CRUlLsAK0prSa8BrYD1gD0BJPUDtgR2BgQ8LKkP8HE6f1BEDJV0LzAAGFO9Q0mtgNFAn4iYIunuGm32BPoC6wDvSbo+IpZl+SatUTsEGBsR70uaK2kH4FxgeEQcACBpJrBTRJyeTrcj+f2rTLsZLyf5PR0GbApsny7rVNBOW+BPwB0RcUeDvbsmwkf4jUN1l05PYF/gDkkC+qVffwdeJQnpLdNtpkTEa+nrV0g+XhfqCUyOiCnpdM3Afywivo6I2cDnQLdSviFrcgaRBDHp90FFbNMeuE/Sm8BvgW3S+XsDN0REJUBEzC3Y5iHgVof96vERfiMTES+lJ666khzV/zIiRheuI2kT4OuCWVVA6xq7Uj1N1dzevyu2SpI6k3zq3FZSAM2BAB6vZ9NLgWcj4tD0d3Z89S7T7VflBaC/pD+GbyL61nyE38ikV0A0B+YATwInpv2nSNpA0rpF7updYLP0PxrAwBKXavlxOEkXS4+I2CQiNgKmAMtJugSrfVVjuj3wWfr6hIL544BTJFUA1OjSGUHyu/+Hkr6DnHDgNw6t0xNdrwH3AMdHRFVEjAP+CLwk6Q3gz6z8H6pWEbEYOBUYK+l5YCYwP5vyrYkbBDxQY97/kJy8rUwvJ/4Z8CzwveqTtsCVwC8lvUByEFPtZpLzUJMkvc7KV/oA/BRoJenKDN5Lk+ahFXJMUtuIWJCeD7gO+CAiflvuuswsGz7Cz7eh6aeGt0g+Xo+uZ30za8R8hG9mlhM+wjczywkHvplZTjjwzcxywoFvTVbBKKNvSrpPUpvvsK89CsYaOkjSuXWs20HSqavRxkWShq9ujWb1ceBbU1Y9JMW2wFLglMKFSnzr/wMR8XBEjKpjlQ4k9ziYrVEc+JYXE4AtCkYS/QPJ+EMbSeon6aX0WQP3Fdy5vG/1aKLAYdU7knSCpGvT190kPZDeXPS6kmcVjAI2Tz9d/Cpd7xujmqbzL5D0nqSnga0b7KdhueTAtyYvvUW/P/BGOmtrkqEAtgcWAhcCe0fEDsBE4Kx0NNGbgAOB3kD3Wnb/e+Cv6bMKdiC5p+Fc4KP008XZNUY17QXsKKmPpB1J7kbdnuQPyvdL/NbNVuIBsawpqx5WGpIj/FuA9YGpEfG3dP4uwPeAF5IbjmkBvEQymuiUiPgAQNIYkmF7a9oTOA4gIqqA+ZI61lincFRTSIb43ZJkGIwHImJR2sbD3+ndmtXDgW9N2eKI6FU4Iw31hYWzgKciYlCN9XpR+4iN31Zto5r+tIRtmNXLXTqWd38DdpO0BYCkNpK2IhlNdFNJm6fr1Ta++zPAj9Ntm6cP9ag5KmRto5o+BxyaPsFpHZLuI7PMOPAt1yJiFsnQvHdLmkTyB6BnRCwh6cJ5LD1pO7WWXZwJ9E1HK30F2CYi5pB0Eb0p6Ve1jWoaEa+SjH76GsnokhMye6NmeCwdM7Pc8BG+mVlOOPDNzHLCgW9mlhMOfDOznHDgm5nlhAPfzCwnHPhmZjnx/zT+z1Wh63oMAAAAAElFTkSuQmCC",
+      "text/plain": [
+       "<Figure size 432x288 with 1 Axes>"
       ]
+     },
+     "output_type": "display_data"
     }
-  ],
-  "metadata": {
-    "accelerator": "TPU",
+   ],
+   "source": [
+    "# Create confusion matrix\n",
+    "conf_matrix = confusion_matrix(y_true, y_pred_model)\n",
+    "\n",
+    "# Show confusion matrix\n",
+    "ax = plt.subplot()\n",
+    "sns.heatmap(conf_matrix, annot=True, ax=ax, cmap=\"Blues\", fmt=\"g\", cbar=False)\n",
+    "\n",
+    "# Add labels, title and ticks\n",
+    "ax.set_xlabel(\"Predicted\")\n",
+    "ax.set_ylabel(\"Acctual\")\n",
+    "ax.set_title(\"Confusion Matrix\")\n",
+    "ax.xaxis.set_ticklabels([\"Benign\", \"Attack\"])\n",
+    "ax.yaxis.set_ticklabels([\"Benign\", \"Attack\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
     "colab": {
-      "collapsed_sections": [],
-      "machine_shape": "hm",
-      "name": "it_threat_detection.ipynb",
-      "provenance": []
-    },
-    "environment": {
-      "name": "tf2-gpu.2-3.m65",
-      "type": "gcloud",
-      "uri": "gcr.io/deeplearning-platform-release/tf2-gpu.2-3:m65"
-    },
-    "kernelspec": {
-      "display_name": "Python 3",
-      "language": "python",
-      "name": "python3"
-    },
-    "language_info": {
-      "codemirror_mode": {
-        "name": "ipython",
-        "version": 3
-      },
-      "file_extension": ".py",
-      "mimetype": "text/x-python",
-      "name": "python",
-      "nbconvert_exporter": "python",
-      "pygments_lexer": "ipython3",
-      "version": "3.10.7 (main, Sep 14 2022, 22:38:23) [Clang 14.0.0 (clang-1400.0.29.102)]"
-    },
-    "vscode": {
-      "interpreter": {
-        "hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e"
-      }
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "VJRwvXOvYtBL",
+    "outputId": "9d45426b-6bd8-4374-dcf8-9ffecedd7176"
+   },
+   "outputs": [
+    {
+     "output_type": "stream",
+     "text": [
+      "Accuracy: 0.871\n",
+      "Precision: 1.000\n",
+      "Recall: 0.287\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Calculate accuracy\n",
+    "acc = accuracy_score(y_true, y_pred_model, normalize=True, sample_weight=None)\n",
+    "precision = precision_score(y_true, y_pred_model)\n",
+    "recall = recall_score(y_true, y_pred_model)\n",
+    "\n",
+    "print(f\"Accuracy: {acc:.3f}\")\n",
+    "print(f\"Precision: {precision:.3f}\")\n",
+    "print(f\"Recall: {recall:.3f}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 112
+    },
+    "id": "TFbfVi_-W6GT",
+    "outputId": "68facf43-ac41-44de-d59a-a489627cc893"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>type</th>\n",
+       "      <th>accuracy</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Benign</td>\n",
+       "      <td>1.00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Attack</td>\n",
+       "      <td>0.29</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "     type  accuracy\n",
+       "0  Benign      1.00\n",
+       "1  Attack      0.29"
+      ]
+     },
+     "output_type": "display_data"
     }
+   ],
+   "source": [
+    "# Calculate per class accuracy\n",
+    "cmd = confusion_matrix(y_true, y_pred_model, normalize=\"true\").diagonal()\n",
+    "per_class_accuracy_df = pd.DataFrame(\n",
+    "    [(index, round(value, 4)) for index, value in zip([\"Benign\", \"Attack\"], cmd)],\n",
+    "    columns=[\"type\", \"accuracy\"],\n",
+    ")\n",
+    "per_class_accuracy_df = per_class_accuracy_df.round(2)\n",
+    "display(per_class_accuracy_df)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "_E5rnFehXSls"
+   },
+   "source": [
+    "As we can see, the direct application of our model produced much worse results. Pinecone's similarity search over the same model's embeddings improved our threat detection (i.e., \"Attack\") accuracy by over 50%!"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "OiT0JgQxktOC"
+   },
+   "source": [
+    "### Result summary"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "UBBv6-dnfEeX"
+   },
+   "source": [
+    "Using standard vector embeddings with Pinecone's similarity search service, we detected 85% of the attacks while keeping a low 3% false-positive rate. We also showed that our similarity search approach outperforms the direct classification approach that utilizes the classifier's embedding model. Similarity search-based detection gained 50% higher accuracy compared to the direct detector.\n",
+    "\n",
+    "[Original published results](https://github.com/rambasnet/DeepLearning-IDS/blob/master/graphics/confusion_matrices/) for 02-22-2018 show that the model was able to correctly detect 208520 benign cases out of 208520 benign cases, and 24 (18+1+5) attacks out of 70 attacks in the test set making this model **34.3% accurate in predicting attacks**. For testing purposes, 20% of the data for 02-22-2018 was used. \n",
+    "\n",
+    "![02-22-2018--6-15%281%29.png](https://raw.githubusercontent.com/rambasnet/DeepLearning-IDS/master/graphics/confusion_matrices/02-22-2018--6-15(1).png)\n",
+    "\n",
+    "As you can see, the model's performance for creating embeddings for Pinecone was much higher. \n",
+    "\n",
+    "The model we have created follows the academic paper ([model for the same date](https://github.com/rambasnet/DeepLearning-IDS/blob/master/keras_tensorflow_models/) (02-23-2018)) and is slightly modified, but still a straightforward, sequential, shallow model. We have changed the number of classes from four (Benign, BruteForce-Web, BruteForce-XSS, SQL-Injection) to two (Benign and Attack), only interested in whether we are detecting an attack or not. We have also changed validation metrics to precision and recall. These changes improved our results. Yet, there is still room for further improvements, for example, by adding more data covering multiple days and different types of attacks."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "JdZcEl1pfEeX"
+   },
+   "source": [
+    "## Delete the Index"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "hyxLeOnSfEeX"
+   },
+   "source": [
+    "Delete the index once you are sure that you do not want to use it anymore. Once it is deleted, you cannot reuse it."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "ywvaJrVkfEeX"
+   },
+   "outputs": [],
+   "source": [
+    "pc.delete_index(index_name)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "Hu2mIbHms3k7"
+   },
+   "source": [
+    "---"
+   ]
+  }
+ ],
+ "metadata": {
+  "accelerator": "TPU",
+  "colab": {
+   "collapsed_sections": [],
+   "machine_shape": "hm",
+   "name": "it_threat_detection.ipynb",
+   "provenance": []
+  },
+  "environment": {
+   "name": "tf2-gpu.2-3.m65",
+   "type": "gcloud",
+   "uri": "gcr.io/deeplearning-platform-release/tf2-gpu.2-3:m65"
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.7 (main, Sep 14 2022, 22:38:23) [Clang 14.0.0 (clang-1400.0.29.102)]"
   },
-  "nbformat": 4,
-  "nbformat_minor": 1
+  "vscode": {
+   "interpreter": {
+    "hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
 }
\ No newline at end of file

From c37de91af47c3f40ebaac099e4cf78666f31fe73 Mon Sep 17 00:00:00 2001
From: Jen Hamon <jhamon@pinecone.io>
Date: Thu, 29 Jan 2026 02:51:04 -0500
Subject: [PATCH 2/5] fix: fix notebook lint and validation issues

- Fix import sorting (I001 errors)
- Add missing metadata to execute_result outputs
- Add missing name to stream outputs
---
 learn/security/it-threat-detection.ipynb | 102 ++++++++++++++++-------
 1 file changed, 70 insertions(+), 32 deletions(-)

diff --git a/learn/security/it-threat-detection.ipynb b/learn/security/it-threat-detection.ipynb
index 0e98a4df..4495ef3b 100644
--- a/learn/security/it-threat-detection.ipynb
+++ b/learn/security/it-threat-detection.ipynb
@@ -64,12 +64,15 @@
        "[]"
       ]
      },
-     "output_type": "execute_result"
+     "output_type": "execute_result",
+     "metadata": {},
+     "execution_count": 1
     }
    ],
    "source": [
     "import os\n",
     "from getpass import getpass\n",
+    "\n",
     "from pinecone import Pinecone\n",
     "\n",
     "# initialize connection to pinecone (get API key at app.pinecone.io)\n",
@@ -108,14 +111,19 @@
    "outputs": [],
    "source": [
     "from collections import Counter\n",
+    "\n",
     "import matplotlib.pyplot as plt\n",
     "import pandas as pd\n",
     "import seaborn as sns\n",
-    "from tensorflow import keras\n",
-    "from tensorflow.keras.models import Model\n",
     "import tensorflow.keras.backend as K\n",
-    "from sklearn.metrics import accuracy_score, precision_score, recall_score\n",
-    "from sklearn.metrics import confusion_matrix"
+    "from sklearn.metrics import (\n",
+    "    accuracy_score,\n",
+    "    confusion_matrix,\n",
+    "    precision_score,\n",
+    "    recall_score,\n",
+    ")\n",
+    "from tensorflow import keras\n",
+    "from tensorflow.keras.models import Model"
    ]
   },
   {
@@ -281,7 +289,8 @@
      "text": [
       "Thursday-22-02-2018 100%[===================>] 364.91M  3.07MB/s    in 2m 6s   \n",
       "Friday-23-02-2018_T 100%[===================>] 365.10M  3.07MB/s    in 1m 53s  \n"
-     ]
+     ],
+     "name": "stdout"
     }
    ],
    "source": [
@@ -319,7 +328,9 @@
        "Name: Label, dtype: int64"
       ]
      },
-     "output_type": "execute_result"
+     "output_type": "execute_result",
+     "metadata": {},
+     "execution_count": 1
     }
    ],
    "source": [
@@ -353,7 +364,8 @@
       "cleaning Friday-23-02-2018_TrafficForML_CICFlowMeter.csv\n",
       "total rows read = 1048576\n",
       "all done writing 1042868 rows; dropped 5708 rows\n"
-     ]
+     ],
+     "name": "stdout"
     }
    ],
    "source": [
@@ -548,7 +560,7 @@
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
-       "<p>5 rows × 80 columns</p>\n",
+       "<p>5 rows \u00d7 80 columns</p>\n",
        "</div>"
       ],
       "text/plain": [
@@ -590,7 +602,9 @@
        "[5 rows x 80 columns]"
       ]
      },
-     "output_type": "execute_result"
+     "output_type": "execute_result",
+     "metadata": {},
+     "execution_count": 1
     }
    ],
    "source": [
@@ -619,7 +633,9 @@
        "Name: Label, dtype: int64"
       ]
      },
-     "output_type": "execute_result"
+     "output_type": "execute_result",
+     "metadata": {},
+     "execution_count": 1
     }
    ],
    "source": [
@@ -694,7 +710,8 @@
       "Trainable params: 18,561\n",
       "Non-trainable params: 0\n",
       "_________________________________________________________________\n"
-     ]
+     ],
+     "name": "stdout"
     }
    ],
    "source": [
@@ -750,8 +767,9 @@
     {
      "output_type": "stream",
      "text": [
-      "100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1042867/1042867 [01:43<00:00, 10067.22it/s]\n"
-     ]
+      "100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 1042867/1042867 [01:43<00:00, 10067.22it/s]\n"
+     ],
+     "name": "stdout"
     }
    ],
    "source": [
@@ -846,7 +864,9 @@
        "{'dimension': 128, 'namespaces': {'': {'vector_count': 1042867}}}"
       ]
      },
-     "output_type": "execute_result"
+     "output_type": "execute_result",
+     "metadata": {},
+     "execution_count": 1
     }
    ],
    "source": [
@@ -917,7 +937,8 @@
       "cleaning Thursday-22-02-2018_TrafficForML_CICFlowMeter.csv\n",
       "total rows read = 1048576\n",
       "all done writing 1042966 rows; dropped 5610 rows\n"
-     ]
+     ],
+     "name": "stdout"
     }
    ],
    "source": [
@@ -1103,7 +1124,7 @@
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
-       "<p>5 rows × 80 columns</p>\n",
+       "<p>5 rows \u00d7 80 columns</p>\n",
        "</div>"
       ],
       "text/plain": [
@@ -1145,7 +1166,9 @@
        "[5 rows x 80 columns]"
       ]
      },
-     "output_type": "execute_result"
+     "output_type": "execute_result",
+     "metadata": {},
+     "execution_count": 1
     }
    ],
    "source": [
@@ -1174,7 +1197,9 @@
        "Name: Label, dtype: int64"
       ]
      },
-     "output_type": "execute_result"
+     "output_type": "execute_result",
+     "metadata": {},
+     "execution_count": 1
     }
    ],
    "source": [
@@ -1211,7 +1236,9 @@
        "Name: Label, dtype: int64"
       ]
      },
-     "output_type": "execute_result"
+     "output_type": "execute_result",
+     "metadata": {},
+     "execution_count": 1
     }
    ],
    "source": [
@@ -1268,8 +1295,9 @@
     {
      "output_type": "stream",
      "text": [
-      "100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [10:48<00:00, 32.44s/it]\n"
-     ]
+      "100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 20/20 [10:48<00:00, 32.44s/it]\n"
+     ],
+     "name": "stdout"
     }
    ],
    "source": [
@@ -1326,7 +1354,9 @@
        "[Text(0, 0.5, 'Benign'), Text(0, 1.5, 'Attack')]"
       ]
      },
-     "output_type": "execute_result"
+     "output_type": "execute_result",
+     "metadata": {},
+     "execution_count": 1
     },
     {
      "data": {
@@ -1335,7 +1365,8 @@
        "<Figure size 432x288 with 1 Axes>"
       ]
      },
-     "output_type": "display_data"
+     "output_type": "display_data",
+     "metadata": {}
     }
    ],
    "source": [
@@ -1380,7 +1411,8 @@
       "Accuracy: 0.923\n",
       "Precision: 0.995\n",
       "Recall: 0.577\n"
-     ]
+     ],
+     "name": "stdout"
     }
    ],
    "source": [
@@ -1452,7 +1484,8 @@
        "1  Attack      0.58"
       ]
      },
-     "output_type": "display_data"
+     "output_type": "display_data",
+     "metadata": {}
     }
    ],
    "source": [
@@ -1483,8 +1516,8 @@
    },
    "outputs": [],
    "source": [
-    "from keras.utils.np_utils import normalize\n",
     "import numpy as np\n",
+    "from keras.utils.np_utils import normalize\n",
     "\n",
     "data_sample = normalize(data_22_cleaned.iloc[:, :-1])[-2000:]\n",
     "y_pred_model = model.predict(normalize(data_sample)).flatten()\n",
@@ -1509,7 +1542,9 @@
        "[Text(0, 0.5, 'Benign'), Text(0, 1.5, 'Attack')]"
       ]
      },
-     "output_type": "execute_result"
+     "output_type": "execute_result",
+     "metadata": {},
+     "execution_count": 1
     },
     {
      "data": {
@@ -1518,7 +1553,8 @@
        "<Figure size 432x288 with 1 Axes>"
       ]
      },
-     "output_type": "display_data"
+     "output_type": "display_data",
+     "metadata": {}
     }
    ],
    "source": [
@@ -1554,7 +1590,8 @@
       "Accuracy: 0.871\n",
       "Precision: 1.000\n",
       "Recall: 0.287\n"
-     ]
+     ],
+     "name": "stdout"
     }
    ],
    "source": [
@@ -1626,7 +1663,8 @@
        "1  Attack      0.29"
       ]
      },
-     "output_type": "display_data"
+     "output_type": "display_data",
+     "metadata": {}
     }
    ],
    "source": [
@@ -1752,4 +1790,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 1
-}
\ No newline at end of file
+}

From 9a255149458f7cc86d81115761a57af38589b707 Mon Sep 17 00:00:00 2001
From: Jen Hamon <jhamon@pinecone.io>
Date: Thu, 29 Jan 2026 02:57:00 -0500
Subject: [PATCH 3/5] fix: update keras import to use tensorflow.keras

Address Bugbot review: change `from keras.utils.np_utils import normalize`
to `from tensorflow.keras.utils import normalize` since standalone keras
was removed from dependencies.
---
 learn/security/it-threat-detection.ipynb | 3488 +++++++++++-----------
 1 file changed, 1728 insertions(+), 1760 deletions(-)

diff --git a/learn/security/it-threat-detection.ipynb b/learn/security/it-threat-detection.ipynb
index 4495ef3b..01746744 100644
--- a/learn/security/it-threat-detection.ipynb
+++ b/learn/security/it-threat-detection.ipynb
@@ -1,1793 +1,1761 @@
 {
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pinecone-io/examples/blob/master/learn/security/it-threat-detection.ipynb) [![Open nbviewer](https://raw.githubusercontent.com/pinecone-io/examples/master/assets/nbviewer-shield.svg)](https://nbviewer.org/github/pinecone-io/examples/blob/master/learn/security/it-threat-detection.ipynb)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "b4Yv1jeGywpL"
-   },
-   "source": [
-    "## IT Threat Detection With Similarity Search\n",
-    "\n",
-    "This notebook shows how to use Pinecone's similarity search as a service to build an application for detecting rare events. Such application is common in cyber-security and fraud detection domains wherein only a tiny fraction of the events are malicious. \n",
-    "\n",
-    "Here we will build a network intrusion detector. Network intrusion detection systems monitor incoming and outgoing network traffic flow, raising alarms whenever a threat is detected. Here we use a deep-learning model and similarity search in detecting and classifying network intrusion traffic.\n",
-    "\n",
-    "We will start by indexing a set of labeled traffic events in the form of vector embeddings. Each event is either benign or malicious. The vector embeddings are rich, mathematical representations of the network traffic events. It is making it possible to determine how similar the network events are to one another using similarity-search algorithms built into Pinecone. Here we will transform network traffic events into vectors using a deep learning model from recent academic work.\n",
-    "\n",
-    "\n",
-    "We will then take some new (unseen) network events and search through the index to find the most similar matches, along with their labels. In such a way, we will propagate the matched labels to classify the unseen events as benign or malicious. Mind that the intrusion detection task is a challenging classification task because malicious events are sporadic. The similarity search service helps us sift the most relevant historical labeled events. That way, we identify these rare events while keeping a low rate of false alarms. \n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "1OUSClcPhU4j"
-   },
-   "source": [
-    "## Setting up Pinecone\n",
-    "\n",
-    "We will first install and initialize Pinecone. You can get your [API Key here](https://app.pinecone.io)."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "251n1avKzCrm"
-   },
-   "outputs": [],
-   "source": [
-    "!pip install -qU pinecone"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/"
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pinecone-io/examples/blob/master/learn/security/it-threat-detection.ipynb) [![Open nbviewer](https://raw.githubusercontent.com/pinecone-io/examples/master/assets/nbviewer-shield.svg)](https://nbviewer.org/github/pinecone-io/examples/blob/master/learn/security/it-threat-detection.ipynb)"
+      ]
     },
-    "id": "_cGTuY8dywpV",
-    "outputId": "da0b57bf-bde2-401f-e502-6ad80f7fc6cc"
-   },
-   "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "[]"
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "b4Yv1jeGywpL"
+      },
+      "source": [
+        "## IT Threat Detection With Similarity Search\n",
+        "\n",
+        "This notebook shows how to use Pinecone's similarity search as a service to build an application for detecting rare events. Such application is common in cyber-security and fraud detection domains wherein only a tiny fraction of the events are malicious. \n",
+        "\n",
+        "Here we will build a network intrusion detector. Network intrusion detection systems monitor incoming and outgoing network traffic flow, raising alarms whenever a threat is detected. Here we use a deep-learning model and similarity search in detecting and classifying network intrusion traffic.\n",
+        "\n",
+        "We will start by indexing a set of labeled traffic events in the form of vector embeddings. Each event is either benign or malicious. The vector embeddings are rich, mathematical representations of the network traffic events. It is making it possible to determine how similar the network events are to one another using similarity-search algorithms built into Pinecone. Here we will transform network traffic events into vectors using a deep learning model from recent academic work.\n",
+        "\n",
+        "\n",
+        "We will then take some new (unseen) network events and search through the index to find the most similar matches, along with their labels. In such a way, we will propagate the matched labels to classify the unseen events as benign or malicious. Mind that the intrusion detection task is a challenging classification task because malicious events are sporadic. The similarity search service helps us sift the most relevant historical labeled events. That way, we identify these rare events while keeping a low rate of false alarms. \n"
       ]
-     },
-     "output_type": "execute_result",
-     "metadata": {},
-     "execution_count": 1
-    }
-   ],
-   "source": [
-    "import os\n",
-    "from getpass import getpass\n",
-    "\n",
-    "from pinecone import Pinecone\n",
-    "\n",
-    "# initialize connection to pinecone (get API key at app.pinecone.io)\n",
-    "api_key = os.environ.get(\"PINECONE_API_KEY\") or getpass(\"Enter your Pinecone API key: \")\n",
-    "\n",
-    "# configure client\n",
-    "pc = Pinecone(api_key=api_key)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "a7ysNAlrjD_k"
-   },
-   "source": [
-    "## Installing other dependencies"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "r3g-b61IywpQ"
-   },
-   "outputs": [],
-   "source": [
-    "!pip install -qU pip python-dateutil tensorflow scikit-learn matplotlib seaborn"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "0uuHcP-WywpQ"
-   },
-   "outputs": [],
-   "source": [
-    "from collections import Counter\n",
-    "\n",
-    "import matplotlib.pyplot as plt\n",
-    "import pandas as pd\n",
-    "import seaborn as sns\n",
-    "import tensorflow.keras.backend as K\n",
-    "from sklearn.metrics import (\n",
-    "    accuracy_score,\n",
-    "    confusion_matrix,\n",
-    "    precision_score,\n",
-    "    recall_score,\n",
-    ")\n",
-    "from tensorflow import keras\n",
-    "from tensorflow.keras.models import Model"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "lEwqF1osp83o"
-   },
-   "source": [
-    "We will use some of the code from an [academic work on deep learning for intrusion detection](https://github.com/rambasnet/DeepLearning-IDS). Let's clone the repository that we will use to prepare data."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "FUDeovNiywpT"
-   },
-   "outputs": [],
-   "source": [
-    "!git clone -q https://github.com/rambasnet/DeepLearning-IDS.git "
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "mc4ERmwniO1H"
-   },
-   "source": [
-    "## Define a New Pinecone Index"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Now we setup our index specification, this allows us to define the cloud provider and region where we want to deploy our index. You can find a list of all [available providers and regions here](https://docs.pinecone.io/guides/get-started/understanding-organizations#regions)."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from pinecone import ServerlessSpec\n",
-    "\n",
-    "cloud = os.environ.get(\"PINECONE_CLOUD\") or \"aws\"\n",
-    "region = os.environ.get(\"PINECONE_REGION\") or \"us-east-1\"\n",
-    "\n",
-    "spec = ServerlessSpec(cloud=cloud, region=region)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Create the index:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "9_TIgYxBywpV"
-   },
-   "outputs": [],
-   "source": [
-    "# Pick a name for the new service\n",
-    "index_name = \"it-threats\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import time\n",
-    "\n",
-    "# check if index already exists (it shouldn't if this is first time)\n",
-    "if not pc.has_index(index_name):\n",
-    "    # if does not exist, create index\n",
-    "    pc.create_index(name=index_name, dimension=128, metric=\"euclidean\", spec=spec)\n",
-    "    # wait for index to be initialized\n",
-    "    while not pc.describe_index(index_name).status[\"ready\"]:\n",
-    "        time.sleep(1)\n",
-    "\n",
-    "# connect to index\n",
-    "index = pc.Index(index_name)\n",
-    "# view index stats\n",
-    "index.describe_index_stats()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "IOP9jCo5ywpX"
-   },
-   "source": [
-    "## Upload\n",
-    "Here we transform network events into vector embeddings, then upload them into Pinecone's vector index. "
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "PzkJJd8ZYTNM"
-   },
-   "source": [
-    "### Prepare Data"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "-N0bh6dUYTNN"
-   },
-   "source": [
-    "The datasets we use consist of benign (normal) network traffic and malicious traffic\n",
-    "generated from several different network attacks. We will focus on web attacks only. \n",
-    "\n",
-    "The web attack category consists of three common attacks: \n",
-    "- Cross-site scripting (BruteForce-XSS), \n",
-    "- SQL-Injection (SQL-Injection), \n",
-    "- Brute force administrative and user passwords (BruteForce-Web)\n",
-    "\n",
-    "The original data was recorded over two days."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "vuOLNAehYTNN"
-   },
-   "source": [
-    "**Download data for 22-02-2018 and 23-02-2018**\n",
-    "\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "pCrwPkDJYTNO"
-   },
-   "source": [
-    "Files should be downloaded to the current directory. We will be using one date for training and generating vectors, and another one for testing."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/"
-    },
-    "id": "HshKyF0KywpR",
-    "outputId": "d5b3ceee-b584-47e2-a428-43e5c835968f"
-   },
-   "outputs": [
-    {
-     "output_type": "stream",
-     "text": [
-      "Thursday-22-02-2018 100%[===================>] 364.91M  3.07MB/s    in 2m 6s   \n",
-      "Friday-23-02-2018_T 100%[===================>] 365.10M  3.07MB/s    in 1m 53s  \n"
-     ],
-     "name": "stdout"
-    }
-   ],
-   "source": [
-    "!wget \"https://cse-cic-ids2018.s3.ca-central-1.amazonaws.com/Processed%20Traffic%20Data%20for%20ML%20Algorithms/Thursday-22-02-2018_TrafficForML_CICFlowMeter.csv\" -q --show-progress\n",
-    "!wget \"https://cse-cic-ids2018.s3.ca-central-1.amazonaws.com/Processed%20Traffic%20Data%20for%20ML%20Algorithms/Friday-23-02-2018_TrafficForML_CICFlowMeter.csv\" -q --show-progress"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "gr-hay4rfk0d"
-   },
-   "source": [
-    "Let's look at the data events first."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/"
-    },
-    "id": "bSSZ4YCZywpT",
-    "outputId": "27d111b0-c0c1-4694-9e07-65965d2014f1"
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Benign              1048009\n",
-       "Brute Force -Web        362\n",
-       "Brute Force -XSS        151\n",
-       "SQL Injection            53\n",
-       "Name: Label, dtype: int64"
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "1OUSClcPhU4j"
+      },
+      "source": [
+        "## Setting up Pinecone\n",
+        "\n",
+        "We will first install and initialize Pinecone. You can get your [API Key here](https://app.pinecone.io)."
       ]
-     },
-     "output_type": "execute_result",
-     "metadata": {},
-     "execution_count": 1
-    }
-   ],
-   "source": [
-    "data = pd.read_csv(\"Friday-23-02-2018_TrafficForML_CICFlowMeter.csv\")\n",
-    "data.Label.value_counts()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "vYl82iXeywpT"
-   },
-   "source": [
-    "**Clean the data** using a python script from the cloned repository."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/"
-    },
-    "id": "y-trCZOcywpT",
-    "outputId": "c51b5e77-4e66-47a3-c4f6-d492a7dfaef8"
-   },
-   "outputs": [
-    {
-     "output_type": "stream",
-     "text": [
-      "cleaning Friday-23-02-2018_TrafficForML_CICFlowMeter.csv\n",
-      "total rows read = 1048576\n",
-      "all done writing 1042868 rows; dropped 5708 rows\n"
-     ],
-     "name": "stdout"
-    }
-   ],
-   "source": [
-    "!python DeepLearning-IDS/data_cleanup.py \"Friday-23-02-2018_TrafficForML_CICFlowMeter.csv\" \"result23022018\""
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "oKHL8HCjYTNQ"
-   },
-   "source": [
-    "Load the file that you got from the previous step."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 278
-    },
-    "id": "uzH6_tBpywpU",
-    "outputId": "017263dd-8e2a-45a4-9693-5b5b763197ce"
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>Dst Port</th>\n",
-       "      <th>Protocol</th>\n",
-       "      <th>Timestamp</th>\n",
-       "      <th>Flow Duration</th>\n",
-       "      <th>Tot Fwd Pkts</th>\n",
-       "      <th>Tot Bwd Pkts</th>\n",
-       "      <th>TotLen Fwd Pkts</th>\n",
-       "      <th>TotLen Bwd Pkts</th>\n",
-       "      <th>Fwd Pkt Len Max</th>\n",
-       "      <th>Fwd Pkt Len Min</th>\n",
-       "      <th>...</th>\n",
-       "      <th>Fwd Seg Size Min</th>\n",
-       "      <th>Active Mean</th>\n",
-       "      <th>Active Std</th>\n",
-       "      <th>Active Max</th>\n",
-       "      <th>Active Min</th>\n",
-       "      <th>Idle Mean</th>\n",
-       "      <th>Idle Std</th>\n",
-       "      <th>Idle Max</th>\n",
-       "      <th>Idle Min</th>\n",
-       "      <th>Label</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>22</td>\n",
-       "      <td>6</td>\n",
-       "      <td>1.519374e+09</td>\n",
-       "      <td>1532698</td>\n",
-       "      <td>11</td>\n",
-       "      <td>11</td>\n",
-       "      <td>1179</td>\n",
-       "      <td>1969</td>\n",
-       "      <td>648</td>\n",
-       "      <td>0</td>\n",
-       "      <td>...</td>\n",
-       "      <td>32</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.000000e+00</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>Benign</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>500</td>\n",
-       "      <td>17</td>\n",
-       "      <td>1.519374e+09</td>\n",
-       "      <td>117573855</td>\n",
-       "      <td>3</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1500</td>\n",
-       "      <td>0</td>\n",
-       "      <td>500</td>\n",
-       "      <td>500</td>\n",
-       "      <td>...</td>\n",
-       "      <td>8</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>58786927.5</td>\n",
-       "      <td>2.375324e+07</td>\n",
-       "      <td>75583006</td>\n",
-       "      <td>41990849</td>\n",
-       "      <td>Benign</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>500</td>\n",
-       "      <td>17</td>\n",
-       "      <td>1.519374e+09</td>\n",
-       "      <td>117573848</td>\n",
-       "      <td>3</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1500</td>\n",
-       "      <td>0</td>\n",
-       "      <td>500</td>\n",
-       "      <td>500</td>\n",
-       "      <td>...</td>\n",
-       "      <td>8</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>58786924.0</td>\n",
-       "      <td>2.375325e+07</td>\n",
-       "      <td>75583007</td>\n",
-       "      <td>41990841</td>\n",
-       "      <td>Benign</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>22</td>\n",
-       "      <td>6</td>\n",
-       "      <td>1.519374e+09</td>\n",
-       "      <td>1745392</td>\n",
-       "      <td>11</td>\n",
-       "      <td>11</td>\n",
-       "      <td>1179</td>\n",
-       "      <td>1969</td>\n",
-       "      <td>648</td>\n",
-       "      <td>0</td>\n",
-       "      <td>...</td>\n",
-       "      <td>32</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.000000e+00</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>Benign</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>500</td>\n",
-       "      <td>17</td>\n",
-       "      <td>1.519374e+09</td>\n",
-       "      <td>89483474</td>\n",
-       "      <td>6</td>\n",
-       "      <td>0</td>\n",
-       "      <td>3000</td>\n",
-       "      <td>0</td>\n",
-       "      <td>500</td>\n",
-       "      <td>500</td>\n",
-       "      <td>...</td>\n",
-       "      <td>8</td>\n",
-       "      <td>4000364.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>4000364</td>\n",
-       "      <td>4000364</td>\n",
-       "      <td>21370777.5</td>\n",
-       "      <td>1.528092e+07</td>\n",
-       "      <td>41989576</td>\n",
-       "      <td>7200485</td>\n",
-       "      <td>Benign</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>5 rows \u00d7 80 columns</p>\n",
-       "</div>"
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "251n1avKzCrm"
+      },
+      "source": [
+        "!pip install -qU pinecone"
       ],
-      "text/plain": [
-       "   Dst Port  Protocol     Timestamp  Flow Duration  Tot Fwd Pkts  \\\n",
-       "0        22         6  1.519374e+09        1532698            11   \n",
-       "1       500        17  1.519374e+09      117573855             3   \n",
-       "2       500        17  1.519374e+09      117573848             3   \n",
-       "3        22         6  1.519374e+09        1745392            11   \n",
-       "4       500        17  1.519374e+09       89483474             6   \n",
-       "\n",
-       "   Tot Bwd Pkts  TotLen Fwd Pkts  TotLen Bwd Pkts  Fwd Pkt Len Max  \\\n",
-       "0            11             1179             1969              648   \n",
-       "1             0             1500                0              500   \n",
-       "2             0             1500                0              500   \n",
-       "3            11             1179             1969              648   \n",
-       "4             0             3000                0              500   \n",
-       "\n",
-       "   Fwd Pkt Len Min  ...  Fwd Seg Size Min  Active Mean  Active Std  \\\n",
-       "0                0  ...                32          0.0         0.0   \n",
-       "1              500  ...                 8          0.0         0.0   \n",
-       "2              500  ...                 8          0.0         0.0   \n",
-       "3                0  ...                32          0.0         0.0   \n",
-       "4              500  ...                 8    4000364.0         0.0   \n",
-       "\n",
-       "   Active Max  Active Min   Idle Mean      Idle Std  Idle Max  Idle Min  \\\n",
-       "0           0           0         0.0  0.000000e+00         0         0   \n",
-       "1           0           0  58786927.5  2.375324e+07  75583006  41990849   \n",
-       "2           0           0  58786924.0  2.375325e+07  75583007  41990841   \n",
-       "3           0           0         0.0  0.000000e+00         0         0   \n",
-       "4     4000364     4000364  21370777.5  1.528092e+07  41989576   7200485   \n",
-       "\n",
-       "    Label  \n",
-       "0  Benign  \n",
-       "1  Benign  \n",
-       "2  Benign  \n",
-       "3  Benign  \n",
-       "4  Benign  \n",
-       "\n",
-       "[5 rows x 80 columns]"
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "_cGTuY8dywpV",
+        "outputId": "da0b57bf-bde2-401f-e502-6ad80f7fc6cc"
+      },
+      "source": [
+        "import os\n",
+        "from getpass import getpass\n",
+        "\n",
+        "from pinecone import Pinecone\n",
+        "\n",
+        "# initialize connection to pinecone (get API key at app.pinecone.io)\n",
+        "api_key = os.environ.get(\"PINECONE_API_KEY\") or getpass(\"Enter your Pinecone API key: \")\n",
+        "\n",
+        "# configure client\n",
+        "pc = Pinecone(api_key=api_key)"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "[]"
+            ]
+          }
+        }
       ]
-     },
-     "output_type": "execute_result",
-     "metadata": {},
-     "execution_count": 1
-    }
-   ],
-   "source": [
-    "data_23_cleaned = pd.read_csv(\"result23022018.csv\")\n",
-    "data_23_cleaned.head()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/"
-    },
-    "id": "cGXf6PmhYTNR",
-    "outputId": "3371cd8e-e8e3-4382-ad17-bfffd8e3d8ac"
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Benign              1042301\n",
-       "Brute Force -Web        362\n",
-       "Brute Force -XSS        151\n",
-       "SQL Injection            53\n",
-       "Name: Label, dtype: int64"
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "a7ysNAlrjD_k"
+      },
+      "source": [
+        "## Installing other dependencies"
       ]
-     },
-     "output_type": "execute_result",
-     "metadata": {},
-     "execution_count": 1
-    }
-   ],
-   "source": [
-    "data_23_cleaned.Label.value_counts()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "bWUf-dk1ywpU"
-   },
-   "source": [
-    "### Load the Model"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "97lIL236YTNR"
-   },
-   "source": [
-    "Here we load the pretrained model. The model is trained using the data from the same date."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "Bq8_hM-RfEeR"
-   },
-   "source": [
-    "We have modified [the original model](https://github.com/rambasnet/DeepLearning-IDS/blob/master/keras_tensorflow_models/02-23-2018.csv_adam_10_10_multiclass_baseline_model_1561316601.model) slightly and changed the number of classes from four (Benign, BruteForce-Web, BruteForce-XSS, SQL-Injection) to two (Benign and Attack). In the step below we will download and unzip our modified model."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "eRTvBzhoqPkR"
-   },
-   "outputs": [],
-   "source": [
-    "!wget -q -O it_threat_model.model.zip \"https://drive.google.com/uc?export=download&id=1VYMHOk_XMAc-QFJ_8CAPvWFfHnLpS2J_\" \n",
-    "!unzip -q it_threat_model.model.zip"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/"
-    },
-    "id": "WP44njScywpU",
-    "outputId": "2a9d2001-0328-4602-c595-372c5bf67aa4"
-   },
-   "outputs": [
-    {
-     "output_type": "stream",
-     "text": [
-      "WARNING:tensorflow:SavedModel saved prior to TF 2.5 detected when loading Keras model. Please ensure that you are saving the model with model.save() or tf.keras.models.save_model(), *NOT* tf.saved_model.save(). To confirm, there should be a file named \"keras_metadata.pb\" in the SavedModel directory.\n",
-      "Model: \"sequential\"\n",
-      "_________________________________________________________________\n",
-      "Layer (type)                 Output Shape              Param #   \n",
-      "=================================================================\n",
-      "dense (Dense)                (None, 128)               10240     \n",
-      "_________________________________________________________________\n",
-      "dense_1 (Dense)              (None, 64)                8256      \n",
-      "_________________________________________________________________\n",
-      "dense_2 (Dense)              (None, 1)                 65        \n",
-      "=================================================================\n",
-      "Total params: 18,561\n",
-      "Trainable params: 18,561\n",
-      "Non-trainable params: 0\n",
-      "_________________________________________________________________\n"
-     ],
-     "name": "stdout"
-    }
-   ],
-   "source": [
-    "model = keras.models.load_model(\"it_threat_model.model\")\n",
-    "model.summary()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "2rCdzSrfywpV"
-   },
-   "outputs": [],
-   "source": [
-    "# Select the first layer\n",
-    "layer_name = \"dense\"\n",
-    "intermediate_layer_model = Model(\n",
-    "    inputs=model.input, outputs=model.get_layer(layer_name).output\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "V3dx9XkPYTNV"
-   },
-   "source": [
-    "### Upload Data"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "yO1eySIdjSOG"
-   },
-   "source": [
-    "\n",
-    "Let's define the item's ids in a way that will reflect the event's label.  Then, we index the events in Pinecone's vector index."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/"
-    },
-    "id": "nk4ZjGg-ywpX",
-    "outputId": "bce8f1fd-98e3-4313-d625-c404a4eca184"
-   },
-   "outputs": [
-    {
-     "output_type": "stream",
-     "text": [
-      "100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 1042867/1042867 [01:43<00:00, 10067.22it/s]\n"
-     ],
-     "name": "stdout"
-    }
-   ],
-   "source": [
-    "from tqdm import tqdm\n",
-    "\n",
-    "items_to_upload = []\n",
-    "\n",
-    "model_res = intermediate_layer_model.predict(K.constant(data_23_cleaned.iloc[:, :-1]))\n",
-    "\n",
-    "for i, res in tqdm(zip(data_23_cleaned.iterrows(), model_res), total=len(model_res)):\n",
-    "    benign_or_attack = i[1][\"Label\"][:3]\n",
-    "    items_to_upload.append((benign_or_attack + \"_\" + str(i[0]), res.tolist()))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "cMzD8s3ps3k0"
-   },
-   "outputs": [],
-   "source": [
-    "import itertools\n",
-    "\n",
-    "\n",
-    "def chunks(iterable, batch_size=100):\n",
-    "    it = iter(iterable)\n",
-    "    chunk = tuple(itertools.islice(it, batch_size))\n",
-    "    while chunk:\n",
-    "        yield chunk\n",
-    "        chunk = tuple(itertools.islice(it, batch_size))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "6C7er-8Gl2Rg"
-   },
-   "source": [
-    "You can lower the NUMBER_OF_ITEMS and, by doing so, limit the number of uploaded items. "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "_Ti9p0P-ywpX",
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "NUMBER_OF_ITEMS = len(items_to_upload)\n",
-    "\n",
-    "for batch in chunks(items_to_upload[:NUMBER_OF_ITEMS], 50):\n",
-    "    index.upsert(vectors=batch)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "XgF8vW8PtaRX"
-   },
-   "outputs": [],
-   "source": [
-    "items_to_upload.clear()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "fglWJfAq_kw3"
-   },
-   "source": [
-    "Let's verify all items were inserted. "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/"
     },
-    "id": "xU172A4EywpY",
-    "outputId": "80008942-0000-40e5-92a1-66b767a489c6"
-   },
-   "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "{'dimension': 128, 'namespaces': {'': {'vector_count': 1042867}}}"
+      "cell_type": "code",
+      "metadata": {
+        "id": "r3g-b61IywpQ"
+      },
+      "source": [
+        "!pip install -qU pip python-dateutil tensorflow scikit-learn matplotlib seaborn"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "0uuHcP-WywpQ"
+      },
+      "source": [
+        "from collections import Counter\n",
+        "\n",
+        "import matplotlib.pyplot as plt\n",
+        "import pandas as pd\n",
+        "import seaborn as sns\n",
+        "import tensorflow.keras.backend as K\n",
+        "from sklearn.metrics import (\n",
+        "    accuracy_score,\n",
+        "    confusion_matrix,\n",
+        "    precision_score,\n",
+        "    recall_score,\n",
+        ")\n",
+        "from tensorflow import keras\n",
+        "from tensorflow.keras.models import Model"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "lEwqF1osp83o"
+      },
+      "source": [
+        "We will use some of the code from an [academic work on deep learning for intrusion detection](https://github.com/rambasnet/DeepLearning-IDS). Let's clone the repository that we will use to prepare data."
       ]
-     },
-     "output_type": "execute_result",
-     "metadata": {},
-     "execution_count": 1
-    }
-   ],
-   "source": [
-    "index.describe_index_stats()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "3CGzW3mVywpY"
-   },
-   "source": [
-    "## Query"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "Ywuld4BylAIu"
-   },
-   "source": [
-    "First, we will randomly select a Benign/Attack event and query the vector index using the event embedding. Then, we will use data from different day, that contains same set of attacks to query on a bigger sample."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "m5H4rMyOYTNX"
-   },
-   "source": [
-    "\n",
-    "### Evaluate the Rare Event Classification Model"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "velMK_XlYTNX"
-   },
-   "source": [
-    "We will use network intrusion dataset for 22-02-2018 for querying and testing the Pinecone."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "VC2AVfWsj7em"
-   },
-   "source": [
-    "First, let's clean the data."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/"
-    },
-    "id": "bW9mhYvOYTNX",
-    "outputId": "84a9d548-de90-40fe-c183-fc2ca48de2ec",
-    "scrolled": true
-   },
-   "outputs": [
-    {
-     "output_type": "stream",
-     "text": [
-      "cleaning Thursday-22-02-2018_TrafficForML_CICFlowMeter.csv\n",
-      "total rows read = 1048576\n",
-      "all done writing 1042966 rows; dropped 5610 rows\n"
-     ],
-     "name": "stdout"
-    }
-   ],
-   "source": [
-    "!python DeepLearning-IDS/data_cleanup.py \"Thursday-22-02-2018_TrafficForML_CICFlowMeter.csv\" \"result22022018\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 278
-    },
-    "id": "xqMuz0jKYTNX",
-    "outputId": "d9f8c333-5e3f-4509-b84a-901bacf42487"
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>Dst Port</th>\n",
-       "      <th>Protocol</th>\n",
-       "      <th>Timestamp</th>\n",
-       "      <th>Flow Duration</th>\n",
-       "      <th>Tot Fwd Pkts</th>\n",
-       "      <th>Tot Bwd Pkts</th>\n",
-       "      <th>TotLen Fwd Pkts</th>\n",
-       "      <th>TotLen Bwd Pkts</th>\n",
-       "      <th>Fwd Pkt Len Max</th>\n",
-       "      <th>Fwd Pkt Len Min</th>\n",
-       "      <th>...</th>\n",
-       "      <th>Fwd Seg Size Min</th>\n",
-       "      <th>Active Mean</th>\n",
-       "      <th>Active Std</th>\n",
-       "      <th>Active Max</th>\n",
-       "      <th>Active Min</th>\n",
-       "      <th>Idle Mean</th>\n",
-       "      <th>Idle Std</th>\n",
-       "      <th>Idle Max</th>\n",
-       "      <th>Idle Min</th>\n",
-       "      <th>Label</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>22</td>\n",
-       "      <td>6</td>\n",
-       "      <td>1.519288e+09</td>\n",
-       "      <td>20553406</td>\n",
-       "      <td>10</td>\n",
-       "      <td>7</td>\n",
-       "      <td>1063</td>\n",
-       "      <td>1297</td>\n",
-       "      <td>744</td>\n",
-       "      <td>0</td>\n",
-       "      <td>...</td>\n",
-       "      <td>20</td>\n",
-       "      <td>1027304.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>1027304</td>\n",
-       "      <td>1027304</td>\n",
-       "      <td>1.952608e+07</td>\n",
-       "      <td>0.000000e+00</td>\n",
-       "      <td>19526080</td>\n",
-       "      <td>19526080</td>\n",
-       "      <td>Benign</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>34989</td>\n",
-       "      <td>6</td>\n",
-       "      <td>1.519288e+09</td>\n",
-       "      <td>790</td>\n",
-       "      <td>2</td>\n",
-       "      <td>0</td>\n",
-       "      <td>848</td>\n",
-       "      <td>0</td>\n",
-       "      <td>848</td>\n",
-       "      <td>0</td>\n",
-       "      <td>...</td>\n",
-       "      <td>20</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0.000000e+00</td>\n",
-       "      <td>0.000000e+00</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>Benign</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>500</td>\n",
-       "      <td>17</td>\n",
-       "      <td>1.519288e+09</td>\n",
-       "      <td>99745913</td>\n",
-       "      <td>5</td>\n",
-       "      <td>0</td>\n",
-       "      <td>2500</td>\n",
-       "      <td>0</td>\n",
-       "      <td>500</td>\n",
-       "      <td>500</td>\n",
-       "      <td>...</td>\n",
-       "      <td>8</td>\n",
-       "      <td>4000203.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>4000203</td>\n",
-       "      <td>4000203</td>\n",
-       "      <td>3.191524e+07</td>\n",
-       "      <td>3.792787e+07</td>\n",
-       "      <td>75584115</td>\n",
-       "      <td>7200679</td>\n",
-       "      <td>Benign</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>500</td>\n",
-       "      <td>17</td>\n",
-       "      <td>1.519288e+09</td>\n",
-       "      <td>99745913</td>\n",
-       "      <td>5</td>\n",
-       "      <td>0</td>\n",
-       "      <td>2500</td>\n",
-       "      <td>0</td>\n",
-       "      <td>500</td>\n",
-       "      <td>500</td>\n",
-       "      <td>...</td>\n",
-       "      <td>8</td>\n",
-       "      <td>4000189.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>4000189</td>\n",
-       "      <td>4000189</td>\n",
-       "      <td>3.191524e+07</td>\n",
-       "      <td>3.792788e+07</td>\n",
-       "      <td>75584130</td>\n",
-       "      <td>7200693</td>\n",
-       "      <td>Benign</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>500</td>\n",
-       "      <td>17</td>\n",
-       "      <td>1.519288e+09</td>\n",
-       "      <td>89481361</td>\n",
-       "      <td>6</td>\n",
-       "      <td>0</td>\n",
-       "      <td>3000</td>\n",
-       "      <td>0</td>\n",
-       "      <td>500</td>\n",
-       "      <td>500</td>\n",
-       "      <td>...</td>\n",
-       "      <td>8</td>\n",
-       "      <td>4000554.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>4000554</td>\n",
-       "      <td>4000554</td>\n",
-       "      <td>2.137020e+07</td>\n",
-       "      <td>1.528109e+07</td>\n",
-       "      <td>41990741</td>\n",
-       "      <td>7200848</td>\n",
-       "      <td>Benign</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>5 rows \u00d7 80 columns</p>\n",
-       "</div>"
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "FUDeovNiywpT"
+      },
+      "source": [
+        "!git clone -q https://github.com/rambasnet/DeepLearning-IDS.git "
       ],
-      "text/plain": [
-       "   Dst Port  Protocol     Timestamp  Flow Duration  Tot Fwd Pkts  \\\n",
-       "0        22         6  1.519288e+09       20553406            10   \n",
-       "1     34989         6  1.519288e+09            790             2   \n",
-       "2       500        17  1.519288e+09       99745913             5   \n",
-       "3       500        17  1.519288e+09       99745913             5   \n",
-       "4       500        17  1.519288e+09       89481361             6   \n",
-       "\n",
-       "   Tot Bwd Pkts  TotLen Fwd Pkts  TotLen Bwd Pkts  Fwd Pkt Len Max  \\\n",
-       "0             7             1063             1297              744   \n",
-       "1             0              848                0              848   \n",
-       "2             0             2500                0              500   \n",
-       "3             0             2500                0              500   \n",
-       "4             0             3000                0              500   \n",
-       "\n",
-       "   Fwd Pkt Len Min  ...  Fwd Seg Size Min  Active Mean  Active Std  \\\n",
-       "0                0  ...                20    1027304.0         0.0   \n",
-       "1                0  ...                20          0.0         0.0   \n",
-       "2              500  ...                 8    4000203.0         0.0   \n",
-       "3              500  ...                 8    4000189.0         0.0   \n",
-       "4              500  ...                 8    4000554.0         0.0   \n",
-       "\n",
-       "   Active Max  Active Min     Idle Mean      Idle Std  Idle Max  Idle Min  \\\n",
-       "0     1027304     1027304  1.952608e+07  0.000000e+00  19526080  19526080   \n",
-       "1           0           0  0.000000e+00  0.000000e+00         0         0   \n",
-       "2     4000203     4000203  3.191524e+07  3.792787e+07  75584115   7200679   \n",
-       "3     4000189     4000189  3.191524e+07  3.792788e+07  75584130   7200693   \n",
-       "4     4000554     4000554  2.137020e+07  1.528109e+07  41990741   7200848   \n",
-       "\n",
-       "    Label  \n",
-       "0  Benign  \n",
-       "1  Benign  \n",
-       "2  Benign  \n",
-       "3  Benign  \n",
-       "4  Benign  \n",
-       "\n",
-       "[5 rows x 80 columns]"
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "mc4ERmwniO1H"
+      },
+      "source": [
+        "## Define a New Pinecone Index"
       ]
-     },
-     "output_type": "execute_result",
-     "metadata": {},
-     "execution_count": 1
-    }
-   ],
-   "source": [
-    "data_22_cleaned = pd.read_csv(\"result22022018.csv\")\n",
-    "data_22_cleaned.head()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/"
-    },
-    "id": "262mxbQDYTNY",
-    "outputId": "7540316e-2c26-49ad-a487-85a4fe0bd84c"
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Benign              1042603\n",
-       "Brute Force -Web        249\n",
-       "Brute Force -XSS         79\n",
-       "SQL Injection            34\n",
-       "Name: Label, dtype: int64"
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Now we setup our index specification, this allows us to define the cloud provider and region where we want to deploy our index. You can find a list of all [available providers and regions here](https://docs.pinecone.io/guides/get-started/understanding-organizations#regions)."
       ]
-     },
-     "output_type": "execute_result",
-     "metadata": {},
-     "execution_count": 1
-    }
-   ],
-   "source": [
-    "data_22_cleaned.Label.value_counts()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "P6nJdtveYTNY"
-   },
-   "source": [
-    "Let's define a sample that will include all different types of web attacks for this specific date."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/"
-    },
-    "id": "N7vwZk6HYTNY",
-    "outputId": "96c03fc3-c086-4398-b58a-2c5c57eb4ded"
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Benign              1638\n",
-       "Brute Force -Web     249\n",
-       "Brute Force -XSS      79\n",
-       "SQL Injection         34\n",
-       "Name: Label, dtype: int64"
+    },
+    {
+      "cell_type": "code",
+      "metadata": {},
+      "source": [
+        "from pinecone import ServerlessSpec\n",
+        "\n",
+        "cloud = os.environ.get(\"PINECONE_CLOUD\") or \"aws\"\n",
+        "region = os.environ.get(\"PINECONE_REGION\") or \"us-east-1\"\n",
+        "\n",
+        "spec = ServerlessSpec(cloud=cloud, region=region)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Create the index:"
       ]
-     },
-     "output_type": "execute_result",
-     "metadata": {},
-     "execution_count": 1
-    }
-   ],
-   "source": [
-    "data_sample = data_22_cleaned[-2000:]\n",
-    "data_sample.Label.value_counts()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "neSxNwYckGMK"
-   },
-   "source": [
-    "Now, we will query the test dataset and save predicted and expected results to create a confusion matrix."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 136,
-     "referenced_widgets": [
-      "567d04116b1b4d21bac2348535b750a0",
-      "8f92a228cbc54b30bcb22d0598e9577f",
-      "8fe529938a4f40ac905e145e423d856e",
-      "a4b449113a734e90825bc1ef87ca3d3c",
-      "cda25dc5f86344b3850c92e59085c06c",
-      "3586c81ff82048ed80d69b7a4b5bd6b3",
-      "c427de521f054c4997d69586251bed4f",
-      "90d8d1d9da814d90b0f0bf331102d4df",
-      "7f2cabefd9eb4674a63ef3d56a5be122",
-      "dda94f4a1ea946b7996a928374dda4a5",
-      "ea6f763369cd4b478998ea4d3e8f20e6",
-      "e752273786584dd4baa77ac3f4528849",
-      "aef6058200fa454c90f51760685e25db",
-      "87b4d4b12452401cb82285364dae3576",
-      "8a60896e0288471a91089a03a75b210b",
-      "fe75c2a93c6e4619a2731a4c01a71619",
-      "5c64d617d0d94a6a9797d20f0d1e80f3",
-      "25ac309f2f5d43169ed0bda88300c8d2",
-      "9ebe5f94bbb743058602c9af26cd4eaf",
-      "3db0b4717ac140b78bf7d75e7ebeaf39",
-      "fcc29e1c6b304044a53522c550c4b49d",
-      "cd9e8a060d05491f8c4d74871c9560fa"
-     ]
-    },
-    "id": "8u6cg_1tYTNY",
-    "outputId": "dbe81cb3-88fb-4cd9-91fe-773c960ca108",
-    "scrolled": true
-   },
-   "outputs": [
-    {
-     "output_type": "stream",
-     "text": [
-      "100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 20/20 [10:48<00:00, 32.44s/it]\n"
-     ],
-     "name": "stdout"
-    }
-   ],
-   "source": [
-    "y_true = []\n",
-    "y_pred = []\n",
-    "\n",
-    "BATCH_SIZE = 100\n",
-    "\n",
-    "for i in tqdm(range(0, len(data_sample), BATCH_SIZE)):\n",
-    "    test_data = data_sample.iloc[i : i + BATCH_SIZE, :]\n",
-    "\n",
-    "    # Create vector embedding using the model\n",
-    "    test_vector = intermediate_layer_model.predict(K.constant(test_data.iloc[:, :-1]))\n",
-    "    # Query using the vector embedding\n",
-    "    query_results = []\n",
-    "\n",
-    "    for xq in test_vector.tolist():\n",
-    "        query_res = index.query(vector=xq, top_k=50)\n",
-    "        query_results.append(query_res)\n",
-    "\n",
-    "    ids = [res.id for result in query_results for res in result.matches]\n",
-    "\n",
-    "    for label, res in zip(test_data.Label.values, query_results):\n",
-    "        # Add to the true list\n",
-    "        if label == \"Benign\":\n",
-    "            y_true.append(0)\n",
-    "        else:\n",
-    "            y_true.append(1)\n",
-    "\n",
-    "        counter = Counter(match.id.split(\"_\")[0] for match in res.matches)\n",
-    "\n",
-    "        # Add to the predicted list\n",
-    "        if counter[\"Bru\"] or counter[\"SQL\"]:\n",
-    "            y_pred.append(1)\n",
-    "        else:\n",
-    "            y_pred.append(0)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 313
     },
-    "id": "HV3-gkdWYTNZ",
-    "outputId": "20103ea7-713b-4e2b-9590-ecc70ef9b76e"
-   },
-   "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "[Text(0, 0.5, 'Benign'), Text(0, 1.5, 'Attack')]"
+      "cell_type": "code",
+      "metadata": {
+        "id": "9_TIgYxBywpV"
+      },
+      "source": [
+        "# Pick a name for the new service\n",
+        "index_name = \"it-threats\""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {},
+      "source": [
+        "import time\n",
+        "\n",
+        "# check if index already exists (it shouldn't if this is first time)\n",
+        "if not pc.has_index(index_name):\n",
+        "    # if does not exist, create index\n",
+        "    pc.create_index(name=index_name, dimension=128, metric=\"euclidean\", spec=spec)\n",
+        "    # wait for index to be initialized\n",
+        "    while not pc.describe_index(index_name).status[\"ready\"]:\n",
+        "        time.sleep(1)\n",
+        "\n",
+        "# connect to index\n",
+        "index = pc.Index(index_name)\n",
+        "# view index stats\n",
+        "index.describe_index_stats()"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "IOP9jCo5ywpX"
+      },
+      "source": [
+        "## Upload\n",
+        "Here we transform network events into vector embeddings, then upload them into Pinecone's vector index. "
       ]
-     },
-     "output_type": "execute_result",
-     "metadata": {},
-     "execution_count": 1
     },
     {
-     "data": {
-      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAEWCAYAAABliCz2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAbKUlEQVR4nO3debxVdb3/8debc5RBQEZxKBAcILUUtX5eS1IzE6+aM6JlGE455HXWMBwoLbNBrzl3zTmnLEwihyupZDeQxCEpUqIUQTgCKjId+Pz+WOvQ5sg5Z4t7nQ3n+34+HufBXsNe38+Gzfus/V3f/V2KCMzMrO1rV+0CzMysdTjwzcwS4cA3M0uEA9/MLBEOfDOzRDjwzcwS4cC3NkNSR0kPS1oo6f6PcJxjJD1aydqqQdJvJX2t2nXYusOBb61O0tGSJkt6T9KbeTB9rgKHPhzoA/SMiCPW9iARcVdE7FuBelYjaU9JIemXjdbvmK+fUOZxLpF0Z0v7RcTQiLhtLcu1NsiBb61K0lnAT4DLycK5L3Ad8OUKHL4f8LeIqK/AsYoyF9hdUs+SdV8D/lapBpTx/237AL8prNVI2hi4DDg1In4ZEYsiYnlEPBwR5+b7tJf0E0mz8p+fSGqfb9tT0uuSzpb0Vv7p4Lh826XAaGBY/slhZOMzYUlb5mfStfnyCEmvSXpX0gxJx5Ssf6bkebtLmpR3FU2StHvJtgmSxkiamB/nUUm9mvlrWAb8Cjgqf34NcCRwV6O/q6sl/UvSO5Kek7RHvn4/4Fslr3NqSR3flTQReB8YkK87Pt9+vaQHSo7/fUlPSFLZ/4C23nPgW2v6D6AD8FAz+4wCdgN2AnYEPgNcVLJ9U2BjYAtgJPBTSd0j4mKyTw33RkTniPhZc4VI2gi4BhgaEV2A3YHn17BfD+CRfN+ewI+ARxqdoR8NHAdsAmwInNNc28DtwLH54y8BLwOzGu0ziezvoAdwN3C/pA4RMb7R69yx5DlfBU4EugAzGx3vbOBT+S+zPcj+7r4WnlslKQ58a009gXktdLkcA1wWEW9FxFzgUrIga7A83748IsYB7wED17KelcAOkjpGxJsR8fIa9vlPYHpE3BER9RFxDzANOLBkn1sj4m8RsRi4jyyomxQRfwB6SBpIFvy3r2GfOyOiLm/zh0B7Wn6dP4+Il/PnLG90vPeBr5D9wroTOD0iXm/heNbGOPCtNdUBvRq6VJqwOaufnc7M1606RqNfGO8DnT9sIRGxCBgGnAy8KekRSYPKqKehpi1KlmevRT13AKcBe7GGTzx5t9UreTfSArJPNc11FQH8q7mNEfEn4DVAZL+YLDEOfGtNzwJLgIOb2WcW2cXXBn35YHdHuRYBnUqWNy3dGBG/i4gvApuRnbXfXEY9DTW9sZY1NbgDOAUYl599r5J3uZxP1rffPSK6AQvJghqgqW6YZrtnJJ1K9klhFnDe2pdu6ysHvrWaiFhIdmH1p5IOltRJ0gaShkq6Mt/tHuAiSb3zi5+jybog1sbzwBBJffMLxhc2bJDUR9JBeV/+UrKuoRVrOMY4YNt8KGmtpGHAdsBv1rImACJiBvB5smsWjXUB6slG9NRKGg10Ldk+B9jyw4zEkbQt8B2ybp2vAudJarbrydoeB761qoj4EXAW2YXYuWTdEKeRjVyBLJQmAy8ALwJT8nVr09ZjwL35sZ5j9ZBuR3YhcxbwNln4nrKGY9QBB+T71pGdGR8QEfPWpqZGx34mItb06eV3wG/JhmrOJPtUVNpd0/ClsjpJU1pqJ+9CuxP4fkRMjYjpZCN97mgYAWVpkC/Sm5mlwWf4ZmaJcOCbmSXCgW9mlggHvplZIpr7AkxVdRx8mq8m2zpp/qRrq12CWZM61NLk/Eg+wzczS4QD38wsEQ58M7NEOPDNzBLhwDczS4QD38wsEQ58M7NEOPDNzBLhwDczS4QD38wsEQ58M7NEOPDNzBLhwDczS4QD38wsEQ58M7NEOPDNzBLhwDczS4QD38wsEQ58M7NEOPDNzBLhwDczS4QD38wsEQ58M7NEOPDNzBLhwDczS4QD38wsEQ58M7NEOPDNzBLhwDczS4QD38wsEQ58M7NEOPDNzBLhwDczS4QD38wsEQ58M7NEOPDNzBLhwDczS4QD38wsEQ58M7NE1BbdgKQaoE9pWxHxz6LbNTOz1RUa+JJOBy4G5gAr89UBfKrIds3M7IOKPsM/AxgYEXUFt2NmZi0oug//X8DCgtswM7MyFH2G/xowQdIjwNKGlRHxo4LbNTOzRooO/H/mPxvmP2ZmViWFBn5EXFrk8c3MrHxFj9J5mGxUTqmFwGTgxohYUmT7Zmb2b0VftH0NeA+4Of95h2yI5rb5spmZtZKi+/AHR8SQkuWHJT0VEUMkvVxw22ZmVqLoM/zekvo2LOSPe+WLywpu28zMShR9hn828IykVwEB/YFTJG0E3FZw223aDRcfw9AhOzD37XfZ9YjLV63/xlGf5+RhQ6hfsZLxT7/EqKt/za7b9+Pabw8HQILv3jCOsU++QOdO7Xn8f85c9dwtNunGL8ZN4tyrHmz112PpGX3RhTz1+wn06NGTX/76N9UuJwmKaHxNtcINSO2BQWSBP63cC7UdB59WbGHruc/uvBWL3l/KLWOOXRX4Q3bdhvOP/xKHnH4Dy5bX07t7Z+bOf4+OHTZg2fIVrFixkk17deX/7r2QAfuOYsWKlasdc+Jd53HeDx9k4pRXq/GS1hvzJ11b7RLahOcmT6JTp06MuvB8B34FdahFTW0r5Axf0t4R8b+SDm20aYAkIuKXRbSbkolTXqXvZj1WW3fiEXtw1a2PsWx5PQBz578HwOIly1ft037DDVjTL/mt+vZmkx5dHPbWanbZ9dO88cbr1S4jKUV16Xwe+F/gwDVsC8CBX4Ct+23CZwdvxaWnHsiSZcu58EcP8dxfsolJP71DP2645Cv03awHIy+67QNn90futwsPPDqlGmWbWSspJPAj4uL8z+M+zPMknQicCFD7sT2p7bV9AdW1XbU17ejetRNDjr2KXbfvx51Xfp1PHHAJAJNemskuh3+Xgf37cMtlX+V3E//C0mX1q557xJd2YeRFt1epcjNrDUV/8ao9cBiwJavPh3/ZmvaPiJuAm8B9+GvjjTkL+NUTUwGY/PJMVq4MenXvzLy8awfgrzPmsGjxMrbfenOm5Gf/n9x2C2pravjzK/+qSt1m1jqKHpb5a+DLQD2wqOTHCvDwhBfY8zPbArB1303YcINa5s1/j36b96SmJvun7rtZd7bdsg8zZ/17xuoj99uF+8ZPrkrNZtZ6ih6W+bGI2K/gNpJ02xUj2GOXbejVrTN/Hz+GMTeM47ZfPcuNlxzD5Pu/xbLlKzh+9B0A7D54AOccty/L61ewcmVwxuX3Urfg3793D/vizhx8+vXVeimWqPPPOYvJk/7EggXz+eLeQ/jGqadz6GFHVLusNq3QYZmSbgL+OyJe/LDPdZeOras8LNPWZa0+LLPE54ARkmaQzYcvICLCtzg0M2tlRQf+0IKPb2ZmZSr0om1EzAQ+DuydP36/6DbNzGzNCg1fSRcD5wMX5qs2AO4ssk0zM1uzos+2DwEOIh+KGRGzgC4Ft2lmZmtQdOAvi2wYUADks2SamVkVFB3490m6Eegm6QTgcXynKzOzqij6JuZXSfoi2a0NBwKjI+KxIts0M7M1K3pYJnnAPyapF1DX0v5mZlaMQrp0JO0maYKkX0oaLOkl4CVgjiRPtWBmVgVFneFfC3wL2JhsXvyhEfFHSYOAe4DxBbVrZmZNKOqibW1EPBoR9wOzI+KPABExraD2zMysBUUFfuntlBY32uZJ0czMqqCoLp0dJb1DNllax/wx+XKHgto0M7NmFHWLw5oijmtmZmvPE5mZmSXCgW9mlggHvplZIhz4ZmaJcOCbmSXCgW9mlggHvplZIhz4ZmaJcOCbmSXCgW9mlggHvplZIhz4ZmaJcOCbmSXCgW9mlggHvplZIhz4ZmaJcOCbmSXCgW9mlogmb3EoaefmnhgRUypfjpmZFaW5e9r+sJltAexd4VrMzKxATQZ+ROzVmoWYmVmxmjvDX0XSDsB2QIeGdRFxe1FFmZlZ5bUY+JIuBvYkC/xxwFDgGcCBb2a2HilnlM7hwBeA2RFxHLAj0L7QqszMrOLKCfzFEbESqJfUFXgLGFBsWWZmVmnl9OFPltQNuBl4DngP+FOhVZmZWcW1GPgRcUr+8AZJ44GuEfFCsWWZmVmllXPRdsia1kXEU8WUZGZmRSinS+fckscdgM+Qde34i1dmZuuRcrp0DixdlvRx4MrCKjIzs0KszeRprwM7VLoQMzMrVjl9+P9NNncOZL8gdgKmFlmUmZlVXlnDMkse1wP3RMTEguoxM7OClBP43SLi6tIVks5ovM7MzNZtiojmd5CmRMTOjdb9OSIGF1nYrAXLmi/MrEqW1q+sdglmTerfq4Oa2tbcDVCGA0cD/SWNLdnUBairXHlmZtYamuvS+QPwJtCL1W+G8i7gb9qama1nmrsBykxgpqRjgFkRsQRAUkfgY8A/WqVCMzOriHLG4d8HlHZargDuL6YcMzMrSjmBXxsRyxoW8scbFleSmZkVoZzAnyvpoIYFSV8G5hVXkpmZFaGccfgnA3dJujZffh04triSzMysCOVMnvYqsJukzmTj9t8tviwzM6u0Frt0JF0uqVtEvBcR70rqLuk7rVGcmZlVTjl9+EMjYkHDQkTMB/YvriQzMytCOYFfI6l9w0I+Dr99M/ubmdk6qJyLtncCT0i6NV8+DrituJLMzKwI5Vy0vVLSC8A+gIDxQL+iCzMzs8oq945Xs8m+bXsY8AXglcIqMjOzQjQ3W+a2wFHAcLLZMe8lG5a5VyvVZmZmFdRcl8404GngwIj4O4CkM1ulKjMzq7jmunQOI+vKeVLSzZK+QNaHb2Zm66EmAz8iHoqIYcAgYAJwJtBH0vWS9m2l+szMrEJavGgbEYsi4q6IOIBsHvzngQsKr8zMzCqqxXvaVovvaWvrKt/T1tZlzd3TttxhmWZmtp5z4JuZJcKBb2aWCAe+mVkiHPhmZolw4JuZJcKBb2aWCAe+mVkiHPhmZolw4JuZJcKBb2aWCAe+mVkiHPhmZolw4JuZJcKBb2aWCAe+mVkiHPhmZolw4JuZJcKBb2aWCAe+mVkiCgt8SSPXsO57RbVnZmbNqy3w2IdLWhIRdwFIug5oX2B7ZmbWjCID/1BgrKSVwFDg7Yg4pcD2zMysGRUPfEk9ShaPB34FTAQuk9QjIt6udJtmZtYyRURlDyjNAAJQyZ8NIiIGlHOcWQuWVbYwswpZWr+y2iWYNal/rw5qalvFz/Ajon+lj2lmZh9dkaN0TpXUrWS5uyT34ZuZVUmR4/BPiIgFDQsRMR84ocD2zMysGUUGfjtJq/qSJNUAGxbYnpmZNaPIYZm/A+6TdAPZxduTgfEFtpes74/5Nn+c+BTduvfg1nseAuDnN1/HI79+kI27dQfg+G98k90+O4RXXn6RH15xKQARwYgTTmGPPb9QtdqtbZs7ZzY/GDOK+W/XIYn9v3w4Bx95DO++s5DLv30ec2bPos+mm/OtMT+gS9euLF++nGuuvIzp0/6C2rXj5DPOY8edP13tl9FmVHyUzqoDS+2Ak4AvkI3UeRS4JSJWlPN8j9Ip39Q/T6Zjx05ccemo1QK/Y8dODPvKiNX2XbJkMRvUbkBNbS118+Zy/FcO54HfPEFNbZG/+9sWj9IpX928ubxdN49tBn6C9xct4vSRRzH6ip/w2LixdOnalWFfHcm9d/yM9959h5GnnMnYB3/B9Gkvc/aoMSyYX8dFZ5/KNbfcTbt2ngWmXM2N0insbzEiVkbE9RFxeEQcFhE3lhv29uHsOHhXunbduKx9O3TouCrcly1bSpPvDLMK6NmrN9sM/AQAnTbaiI/3G0Dd3Ld49ukn2WfoQQDsM/Qg/vDUkwD88x+vsdOu/w+Abt170rlzF6ZPe7k6xbdBRY7S2UbSA5L+Ium1hp+i2rMPeuiBexh5zKF8f8y3efedhavW/+WlFxhx1MF8/ehDOfOC0T67t1Yx+803eHX6NAZu/0kWzH+bnr16A9kvhYULsu9jDth6W559egIr6uuZPet1pv/1FebOmVPNstuUIj8n3QpcD9QDewG3A3c09wRJJ0qaLGnynT+/pcDS2r6DDj2Sux4cx813PEDPXr257uqrVm3bbodP8fNf/Iobbv0Fd992C8uWLq1ipZaCxe+/z3dGnc1J3zyXjTbq3OR+X/rPg+nduw+njzyaG67+AdvtsCM1tTWtWGnbVuSpXceIeEKSImImcImkp4GLm3pCRNwE3ATuw/+oevTsterxAV8+jAvPPu0D+/TrP4AOHToy47W/M/AT27dmeZaQ+vrljBl1Fnvtuz+f23MfALp170HdvLn07NWbunlz2bhbNiNLTW0tJ51x7qrnnnnSsWz+sb5VqbstKvIMf0l+4Xa6pNMkHQJsUmB7VqJu3txVj5/+/RP0H7A1AG/Oep0V9fUAzH5zFv/65z/YdLPNq1KjtX0RwY+vuIS+/QZw2FHHrlq/2+f25PHfjgXg8d+O5T/22AvIBhUsWfw+AFP+9Cw1NTX0679V6xfeRhU5SufTwCtAN2AM0BW4MiL+r5zn+wy/fGMuOo/np0xi4YIFdO/RgxEnnsrU5ybx9+nTkMSmm23BWReMpmev3jw67mHuvv1n1NbW0q5dO44deRKf+7yHZX4YHqVTvpemTuGcU45jy622oZ2y88sRJ53OoO0/yeXfPpe35sxmkz6bMuo7V9Gl68bMfvMNRp35Ddq1a0fP3ptw5oWX0GdTn5B8GM2N0iky8I+IiPtbWtcUB76tqxz4ti6ryrBM4MIy15mZWSsoYj78ocD+wBaSrinZ1JVsxI6ZmVVBEaN0ZgGTgYOA50rWvwucWUB7ZmZWhiLmw58KTJXUJyJuK90m6Qzg6kq3aWZmLSuyD/+oNawbUWB7ZmbWjCL68IcDRwP9JY0t2dQVmFfp9szMrDxF9OH/AXgT6AX8sGR9AMMKaM/MzMpQRB/+TGAm8B+SdiI72z8SmAE8WOn2zMysPEV06WxL1n8/HKgD7iX7gtdelW7LzMzKV0SXzjTgaeDAiPg7gCQPxzQzq7IiRukcBswGnpR0s6SGO16ZmVkVVTzwI+KhiBgGDAImkH3Zqo+k6yXtW+n2zMysPEXe4nBRRNwVEQcAHwOeBy4oqj0zM2teYbNlflSeLdPWVZ4t09Zl1Zot08zM1iEOfDOzRDjwzcwS4cA3M0uEA9/MLBEOfDOzRDjwzcwS4cA3M0uEA9/MLBEOfDOzRDjwzcwS4cA3M0uEA9/MLBEOfDOzRDjwzcwS4cA3M0uEA9/MLBEOfDOzRDjwzcwS4cA3M0uEA9/MLBEOfDOzRDjwzcwS4cA3M0uEA9/MLBEOfDOzRDjwzcwS4cA3M0uEA9/MLBEOfDOzRDjwzcwS4cA3M0uEA9/MLBEOfDOzRDjwzcwS4cA3M0uEIqLaNVgrkHRiRNxU7TrMGvN7s/X4DD8dJ1a7ALMm+L3ZShz4ZmaJcOCbmSXCgZ8O95HausrvzVbii7ZmZonwGb6ZWSIc+GZmiXDgrwckrZD0vKSpkqZI2v0jHOsySftUsj4zAEmHSApJg/LlLSUdXbJ9J0n7f4Tj/0NSr0rUmioH/vphcUTsFBE7AhcCV6ztgSJidEQ8XrnSzFYZDjwDHJUvbwkcXbJ9J2CtA98+Ogf++qcrML9hQdK5kiZJekHSpfm6LSW9IulmSS9LelRSx3zbzyUdnj/eX9I0Sc9IukbSb/L1l0j6H0kTJL0m6ZtVeJ22HpHUGfgsMJJ/B/73gD3yT6fnA5cBw/LlYZI+I+kPkv6c/zkwP1aNpKskvZi/r09v1FZHSeMlndCKL7FNqK12AVaWjpKeBzoAmwF7A0jaF9gG+AwgYKykIcA/8/XDI+IESfcBhwF3NhxQUgfgRmBIRMyQdE+jNgcBewFdgL9Kuj4ilhf5Im29djAwPiL+JultSTsDFwDnRMQBAJLmALtGxGn5cley91993s14Odn79ESgPzA439ajpJ3OwC+A2yPi9lZ7dW2Ez/DXDw1dOoOA/YDbJQnYN//5MzCFLKS3yZ8zIyKezx8/R/bxutQg4LWImJEvNw78RyJiaUTMA94C+lTyBVmbM5wsiMn/HF7GczYG7pf0EvBjYPt8/T7ADRFRDxARb5c859fArQ77teMz/PVMRDybX7jqTXZWf0VE3Fi6j6QtgaUlq1YAHRsdSi001fj5fq/YGknqSfapcwdJAdQAAYxr4aljgCcj4pD8PTuh4ZD589dkIjBU0t3hLxF9aD7DX8/kIyBqgDrgd8DX8/5TJG0haZMyDzUNGJD/RwMYVuFSLR2Hk3Wx9IuILSPi48AMYCVZl2CDdxstbwy8kT8eUbL+UeBkSbUAjbp0RpO996+r6CtIhAN//dAxv9D1PHAv8LWIWBERjwJ3A89KehF4gNX/QzUpIhYDpwDjJT0DzAEWFlO+tXHDgYcarXuQ7OJtfT6c+EzgSWC7hou2wJXAFZImkp3ENLiF7DrUC5KmsvpIH4D/AjpIurKA19KmeWqFhEnqHBHv5dcDfgpMj4gfV7suMyuGz/DTdkL+qeFlso/XN7awv5mtx3yGb2aWCJ/hm5klwoFvZpYIB76ZWSIc+NZmlcwy+pKk+yV1+gjH2rNkrqGDJF3QzL7dJJ2yFm1cIumcta3RrCUOfGvLGqak2AFYBpxculGZD/1/ICLGRsT3mtmlG9l3HMzWKQ58S8XTwNYlM4leRzb/0Mcl7Svp2fxeA/eXfHN5v4bZRIFDGw4kaYSka/PHfSQ9lH+5aKqyexV8D9gq/3Txg3y/D8xqmq8fJemvkh4HBrba34YlyYFvbV7+Ff2hwIv5qoFkUwEMBhYBFwH7RMTOwGTgrHw20ZuBA4E9gE2bOPw1wO/zexXsTPadhguAV/NPF+c2mtV0J2AXSUMk7UL2bdTBZL9QPl3hl262Gk+IZW1Zw7TSkJ3h/wzYHJgZEX/M1+8GbAdMzL5wzIbAs2Szic6IiOkAku4km7a3sb2BYwEiYgWwUFL3RvuUzmoK2RS/25BNg/FQRLyftzH2I71asxY48K0tWxwRO5WuyEN9Uekq4LGIGN5ov51oesbGD6upWU3/q4JtmLXIXTqWuj8Cn5W0NYCkTpK2JZtNtL+krfL9mprf/QngG/lza/KbejSeFbKpWU2fAg7J7+DUhaz7yKwwDnxLWkTMJZua9x5JL5D9AhgUEUvIunAeyS/azmziEGcAe+WzlT4HbB8RdWRdRC9J+kFTs5pGxBSy2U+fJ5td8unCXqgZnkvHzCwZPsM3M0uEA9/MLBEOfDOzRDjwzcwS4cA3M0uEA9/MLBEOfDOzRPx/QQOreDywqpcAAAAASUVORK5CYII=",
-      "text/plain": [
-       "<Figure size 432x288 with 1 Axes>"
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "PzkJJd8ZYTNM"
+      },
+      "source": [
+        "### Prepare Data"
       ]
-     },
-     "output_type": "display_data",
-     "metadata": {}
-    }
-   ],
-   "source": [
-    "# Create confusion matrix\n",
-    "conf_matrix = confusion_matrix(y_true, y_pred)\n",
-    "\n",
-    "# Show confusion matrix\n",
-    "ax = plt.subplot()\n",
-    "sns.heatmap(conf_matrix, annot=True, ax=ax, cmap=\"Blues\", fmt=\"g\", cbar=False)\n",
-    "\n",
-    "# Add labels, title and ticks\n",
-    "ax.set_xlabel(\"Predicted\")\n",
-    "ax.set_ylabel(\"Acctual\")\n",
-    "ax.set_title(\"Confusion Matrix\")\n",
-    "ax.xaxis.set_ticklabels([\"Benign\", \"Attack\"])\n",
-    "ax.yaxis.set_ticklabels([\"Benign\", \"Attack\"])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "-H7rWFguYTNZ"
-   },
-   "source": [
-    "Now we can calculate overall accuracy and per class accuracy."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/"
-    },
-    "id": "uHTInLt2YTNZ",
-    "outputId": "e48f2726-5eba-4d17-ffe7-7b17c62d5ee8"
-   },
-   "outputs": [
-    {
-     "output_type": "stream",
-     "text": [
-      "Accuracy: 0.923\n",
-      "Precision: 0.995\n",
-      "Recall: 0.577\n"
-     ],
-     "name": "stdout"
-    }
-   ],
-   "source": [
-    "# Calculate accuracy\n",
-    "acc = accuracy_score(y_true, y_pred, normalize=True, sample_weight=None)\n",
-    "precision = precision_score(y_true, y_pred)\n",
-    "recall = recall_score(y_true, y_pred)\n",
-    "\n",
-    "print(f\"Accuracy: {acc:.3f}\")\n",
-    "print(f\"Precision: {precision:.3f}\")\n",
-    "print(f\"Recall: {recall:.3f}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 112
-    },
-    "id": "ZNzyqAH9YTNZ",
-    "outputId": "7d2e35a1-df80-47bc-f5e7-fd425e79a7f4"
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>type</th>\n",
-       "      <th>accuracy</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>Benign</td>\n",
-       "      <td>1.00</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>Attack</td>\n",
-       "      <td>0.58</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "-N0bh6dUYTNN"
+      },
+      "source": [
+        "The datasets we use consist of benign (normal) network traffic and malicious traffic\n",
+        "generated from several different network attacks. We will focus on web attacks only. \n",
+        "\n",
+        "The web attack category consists of three common attacks: \n",
+        "- Cross-site scripting (BruteForce-XSS), \n",
+        "- SQL-Injection (SQL-Injection), \n",
+        "- Brute force administrative and user passwords (BruteForce-Web)\n",
+        "\n",
+        "The original data was recorded over two days."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "vuOLNAehYTNN"
+      },
+      "source": [
+        "**Download data for 22-02-2018 and 23-02-2018**\n",
+        "\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "pCrwPkDJYTNO"
+      },
+      "source": [
+        "Files should be downloaded to the current directory. We will be using one date for training and generating vectors, and another one for testing."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "HshKyF0KywpR",
+        "outputId": "d5b3ceee-b584-47e2-a428-43e5c835968f"
+      },
+      "source": [
+        "!wget \"https://cse-cic-ids2018.s3.ca-central-1.amazonaws.com/Processed%20Traffic%20Data%20for%20ML%20Algorithms/Thursday-22-02-2018_TrafficForML_CICFlowMeter.csv\" -q --show-progress\n",
+        "!wget \"https://cse-cic-ids2018.s3.ca-central-1.amazonaws.com/Processed%20Traffic%20Data%20for%20ML%20Algorithms/Friday-23-02-2018_TrafficForML_CICFlowMeter.csv\" -q --show-progress"
       ],
-      "text/plain": [
-       "     type  accuracy\n",
-       "0  Benign      1.00\n",
-       "1  Attack      0.58"
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Thursday-22-02-2018 100%[===================>] 364.91M  3.07MB/s    in 2m 6s   \n",
+            "Friday-23-02-2018_T 100%[===================>] 365.10M  3.07MB/s    in 1m 53s  \n"
+          ]
+        }
       ]
-     },
-     "output_type": "display_data",
-     "metadata": {}
-    }
-   ],
-   "source": [
-    "# Calculate per class accuracy\n",
-    "cmd = confusion_matrix(y_true, y_pred, normalize=\"true\").diagonal()\n",
-    "per_class_accuracy_df = pd.DataFrame(\n",
-    "    [(index, round(value, 4)) for index, value in zip([\"Benign\", \"Attack\"], cmd)],\n",
-    "    columns=[\"type\", \"accuracy\"],\n",
-    ")\n",
-    "per_class_accuracy_df = per_class_accuracy_df.round(2)\n",
-    "display(per_class_accuracy_df)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "Gfy_LW5zXIj6"
-   },
-   "source": [
-    "We got great results using Pinecone! Let's see what happens if we skip the similarity search step and predict values from the model directly. In other words, let's use the model that created the embeddings as a classifier. It would be interesting to compare its and the similarity search approach accuracy. "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "Oxya9-mMYh5A"
-   },
-   "outputs": [],
-   "source": [
-    "import numpy as np\n",
-    "from keras.utils.np_utils import normalize\n",
-    "\n",
-    "data_sample = normalize(data_22_cleaned.iloc[:, :-1])[-2000:]\n",
-    "y_pred_model = model.predict(normalize(data_sample)).flatten()\n",
-    "y_pred_model = np.round(y_pred_model)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 313
     },
-    "id": "GWssFePDXEks",
-    "outputId": "9eee2c60-f1c1-4a34-c682-665389fe3aee"
-   },
-   "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "[Text(0, 0.5, 'Benign'), Text(0, 1.5, 'Attack')]"
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "gr-hay4rfk0d"
+      },
+      "source": [
+        "Let's look at the data events first."
       ]
-     },
-     "output_type": "execute_result",
-     "metadata": {},
-     "execution_count": 1
     },
     {
-     "data": {
-      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAEWCAYAAABliCz2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAbqElEQVR4nO3dd5xV9Z3G8c8DI02kI1ixy0Z3xRLjakBRg2JXVAQrKqyxxMTgWhcs0aBJTIwaxbI2jFGz9oKokYglu6JR7A2ChSJNlCbM8N0/zhlyGZmZK94zl5nzvF+vec099fe9w/DMub9zzu8oIjAzs6avWbkLMDOzhuHANzPLCQe+mVlOOPDNzHLCgW9mlhMOfDOznHDgW5MhqbWkRyTNl3Tfd9jP0ZLGlbK2cpD0hKTjy12HrTkc+NbgJA2WNFHSAknT02D6YQl2fTjQDegcEUes7k4i4q6I6FeCelYiaQ9JIen+GvO3S+ePL3I/F0kaU996EdE/Im5fzXKtCXLgW4OSdBbwO+ByknDeGPgDcHAJdt8DeD8iKkuwr6zMAnaV1Llg3vHA+6VqQAn/37Zv8C+FNRhJ7YFLgNMi4v6IWBgRyyLikYg4O12npaTfSZqWfv1OUst02R6SPpX0c0mfp58OhqTLLgZGAAPTTw4n1TwSlrRJeiRdkU6fIGmypK8kTZF0dMH85wu221XSy2lX0cuSdi1YNl7SpZJeSPczTlKXOn4MS4EHgaPS7ZsDRwJ31fhZXS3pE0lfSnpFUu90/r7A+QXv8/WCOi6T9AKwCNgsnXdyuvx6SX8u2P8Vkp6RpKL/Aa3Rc+BbQ/p3oBXwQB3rXADsAvQCtgN2Bi4sWN4daA9sAJwEXCepY0SMJPnUcE9EtI2IW+oqRNLawO+B/hGxDrAr8Noq1usEPJau2xm4CnisxhH6YGAIsC7QAhheV9vAHcBx6et9gLeAaTXWeZnkZ9AJ+CNwn6RWETG2xvvcrmCbY4FhwDrA1Br7+znwb+kfs94kP7vjw2Or5IoD3xpSZ2B2PV0uRwOXRMTnETELuJgkyKotS5cvi4jHgQXA1qtZz3JgW0mtI2J6RLy1inX2Bz6IiDsjojIi7gbeBQ4sWOfWiHg/IhYD95IEda0i4kWgk6StSYL/jlWsMyYi5qRt/gZoSf3v87aIeCvdZlmN/S0CjiH5gzUGOCMiPq1nf9bEOPCtIc0BulR3qdRifVY+Op2azluxjxp/MBYBbb9tIRGxEBgInAJMl/SYpJ5F1FNd0wYF0zNWo547gdOBvqziE0/abfVO2o30Bcmnmrq6igA+qWthRPwfMBkQyR8myxkHvjWkl4AlwCF1rDON5ORrtY35ZndHsRYCbQqmuxcujIgnI+JHwHokR+03FVFPdU2frWZN1e4ETgUeT4++V0i7XM4h6dvvGBEdgPkkQQ1QWzdMnd0zkk4j+aQwDfjP1S/dGisHvjWYiJhPcmL1OkmHSGojaS1J/SVdma52N3ChpK7pyc8RJF0Qq+M1oI+kjdMTxudVL5DUTdJBaV/+1yRdQ1Wr2MfjwFbppaQVkgYC3wMeXc2aAIiIKcDuJOcsaloHqCS5oqdC0gigXcHymcAm3+ZKHElbAb8g6dY5FvhPSXV2PVnT48C3BhURVwFnkZyInUXSDXE6yZUrkITSRGAS8Abwajpvddp6Crgn3dcrrBzSzUhOZE4D5pKE76mr2Mcc4IB03TkkR8YHRMTs1ampxr6fj4hVfXp5EniC5FLNqSSfigq7a6pvKpsj6dX62km70MYAV0TE6xHxAcmVPndWXwFl+SCfpDczywcf4ZuZ5YQD38wsJxz4ZmY54cA3M8uJum6AKavW25/us8m2Rpr38rXlLsGsVq0qqHV8JB/hm5nlhAPfzCwnHPhmZjnhwDczywkHvplZTjjwzcxywoFvZpYTDnwzs5xw4JuZ5YQD38wsJxz4ZmY54cA3M8sJB76ZWU448M3McsKBb2aWEw58M7OccOCbmeWEA9/MLCcc+GZmOeHANzPLCQe+mVlOOPDNzHLCgW9mlhMOfDOznHDgm5nlhAPfzCwnHPhmZjnhwDczywkHvplZTjjwzcxywoFvZpYTDnwzs5xw4JuZ5YQD38wsJxz4ZmY54cA3M8sJB76ZWU448M3McsKBb2aWExVZNyCpOdCtsK2I+Djrds3MbGWZBr6kM4CRwExgeTo7gH/Lsl0zM/umrI/wzwS2jog5GbdjZmb1yLoP/xNgfsZtmJlZEbI+wp8MjJf0GPB19cyIuCrjds3MrIasA//j9KtF+mVmZmWSaeBHxMVZ7t/MzIqX9VU6j5BclVNoPjARGB0RS7Js38zM/inrk7aTgQXATenXlySXaG6VTpuZWQPJug9/+4joUzD9iKTnIqKPpLcybtvMzApkfYTfVdLG1RPp6y7p5NKM2zYzswJZH+H/HHhe0keAgE2BUyWtDdyecdtN2g0jj6Z/n22ZNfcrdjri8hXzf3zU7pwysA+VVcsZO+FNLrj6IXbapgfX/tcgACS47IbHefjZSQAcue+OnH3iPkQE02fN58QLb2fOFwvL8p4sf16Y8BxXjLqM5VXLOXTAEZw0dFi5S2rSFFHznGqJG5BaAj1JAv/dYk/Utt7+9GwLa+R222FzFi76mpsvPW5F4PfZaUvOOXkfDj3jBpYuq6Rrx7bMmreA1q3WYumyKqqqltO9Szv+957z2KzfBQBMHncZOwz4BXO+WMhlZx7MoiXLuGz04+V8a2u8eS9fW+4SmoSqqioO2n8fRt90K926dWPwwMMZ9aur2HyLLcpdWqPWqgLVtiyTLh1Je6bfDwP2BzYHNgP2S+fZd/TCqx8xd/6ileYNO6I3v771KZYuqwRg1rwFACxesoyqqmQoo5Yt1qL6j7yUfK3dOrlFYp22rZk+yzdGW8N4841JbLRRDzbcaCPWatGCfffbn/HPPlPuspq0rLp0dgf+Ahy4imUB3J9Ru7m2RY912W37zbn4tANZsnQZ5131AK+8nQxM+v1te3DDRcew8XqdOOnC21f8ATjz8nt4+d7zWbh4KR99Mouf/vKecr4Fy5HPZ86k+3rdV0yv260bb0yaVMaKmr5MjvAjYmT6fcgqvk6sbTtJwyRNlDSxcrYv4vm2Kpo3o2O7NvQ57tec/9sHGXPlP3/UL785lR0Pv4wfHnMlZ5/Yj5YtKqioaMbQw3uzy6Ar2KzfBbz5/mecfWK/Mr4Dy5P4xi06INXaG2ElkPWNVy2BAcAmrDwe/iWrWj8ibgRuBPfhr47PZn7Bg8+8DsDEt6ayfHnQpWNbZqddOwDvTZnJwsVL2WaL9Vd09E35dDYAf37qVYYPceBbw+jWrTszps9YMf35zJmsu+66Zayo6cv6ssyHgIOBSmBhwZdl4JHxk9hj560A2GLjdWmxVgWz5y2gx/qdad48+afeeL2ObLVJN6ZOm8O0WfPpuVl3unRsC8Beu/TkvSkzat2/WSlts+2/8vHH/+DTTz9h2dKljH38MXbvu2e5y2rSsr4sc8OI2DfjNnLp9l+eQO8dt6RLh7Z8OPZSLr3hcW5/8CVGX3Q0E+87n6XLqjh5xJ0A7Lr9Zgwf0o9llVUsXx6cefk9Ky69vPzGJ3jq5p+yrLKKj6fPZdjIMeV8W5YjFRUVnHfBCH487GSWL6/ikEMHsMUWW5a7rCYt08syJd0IXBMRb3zbbd2lY2sqX5Zpa7K6LsvM+gj/h8AJkqaQjIcvICLCjzg0M2tgWQd+/4z3b2ZmRcr0pG1ETAU2AvZMXy/Kuk0zM1u1TMNX0kjgHOC8dNZagM8KmpmVQdZH24cCB5FeihkR04B1Mm7TzMxWIevAXxrJZUABkI6SaWZmZZB14N8raTTQQdJQ4Gn8pCszs7LI+iHmv5b0I5JHG24NjIiIp7Js08zMVi3ryzJJA/4pSV2AOVm3Z2Zmq5bVePi7SBov6X5J20t6E3gTmCnJQy2YmZVBVkf41wLnA+1JxsXvHxF/k9QTuBsYm1G7ZmZWi6xO2lZExLiIuA+YERF/A4iIdzNqz8zM6pFV4C8veL24xjIPimZmVgZZdelsJ+lLksHSWqevSadbZdSmmZnVIZPAj4jmWezXzMxWnwcyMzPLCQe+mVlOOPDNzHLCgW9mlhMOfDOznHDgm5nlhAPfzCwnHPhmZjnhwDczywkHvplZTjjwzcxywoFvZpYTDnwzs5xw4JuZ5YQD38wsJxz4ZmY54cA3M8sJB76ZWU7U+ohDSTvUtWFEvFr6cszMLCt1PdP2N3UsC2DPEtdiZmYZqjXwI6JvQxZiZmbZqusIfwVJ2wLfA1pVz4uIO7IqyszMSq/ewJc0EtiDJPAfB/oDzwMOfDOzRqSYq3QOB/YCZkTEEGA7oGWmVZmZWckVE/iLI2I5UCmpHfA5sFm2ZZmZWakV04c/UVIH4CbgFWAB8H+ZVmVmZiVXb+BHxKnpyxskjQXaRcSkbMsyM7NSK+akbZ9VzYuI57IpyczMslBMl87ZBa9bATuTdO34xiszs0akmC6dAwunJW0EXJlZRWZmlonVGTztU2DbUhdiZmbZKqYP/xqSsXMg+QPRC3g9y6LMzKz0iross+B1JXB3RLyQUT1mZpaRYgK/Q0RcXThD0pk155mZ2ZpNEVH3CtKrEbFDjXl/j4jtsyzsvRmL6i7MrEw6tW1R7hLMatW1bYVqW1bXA1AGAYOBTSU9XLBoHWBO6cozM7OGUFeXzovAdKALKz8M5SvAd9qamTUydT0AZSowVdLRwLSIWAIgqTWwIfCPBqnQzMxKopjr8O8FlhdMVwH3ZVOOmZllpZjAr4iIpdUT6WuftTIza2SKCfxZkg6qnpB0MDA7u5LMzCwLxVyHfwpwl6Rr0+lPgeOyK8nMzLJQzOBpHwG7SGpLct3+V9mXZWZmpVZvl46kyyV1iIgFEfGVpI6SftEQxZmZWekU04ffPyK+qJ6IiHnAftmVZGZmWSgm8JtLalk9kV6H37KO9c3MbA1UzEnbMcAzkm5Np4cAt2dXkpmZZaGYk7ZXSpoE7A0IGAv0yLowMzMrrWKfeDWD5G7bAcBewDuZVWRmZpmoa7TMrYCjgEEko2PeQ3JZZt8Gqs3MzEqori6dd4EJwIER8SGApJ81SFVmZlZydXXpDCDpynlW0k2S9iLpwzczs0ao1sCPiAciYiDQExgP/AzoJul6Sf0aqD4zMyuRek/aRsTCiLgrIg4gGQf/NeDczCszM7OSqveZtuXiZ9ramsrPtLU1WV3PtC32skwzM2vkHPhmZjnhwDczywkHvplZTjjwzcxywoFvZpYTDnwzs5xw4JuZ5YQD38wsJxz4ZmY54cA3M8sJB76ZWU448M3McsKBb2aWEw58M7OccOCbmeWEA9/MLCcc+GZmOeHANzPLCQe+mVlOZBb4kk5axbxRWbVnZmZ1q8hw34dLWhIRdwFI+gPQMsP2zMysDlkG/mHAw5KWA/2BuRFxaobtmZlZHUoe+JI6FUyeDDwIvABcIqlTRMwtdZtmZlY/RURpdyhNAQJQwfdqERGbFbOf92YsKm1hZiXSqW2LcpdgVquubStU27KSH+FHxKal3qeZmX13WV6lc5qkDgXTHSW5D9/MrEyyvA5/aER8UT0REfOAoRm2Z2Zmdcgy8JtJWtGXJKk54M5PM7MyyfKyzCeBeyXdQHLy9hRgbIbt5dasz2fwu8v+i3lz56BmYp8DB3DQ4YP54603MO7R+2nfoSMAxw49nZ126U1l5TKuufISJr//LlVVVfTdZ3+OOOYb98mZlcTlF1/IixP+SsdOnbjz3ocA+HL+F4w4bzgzpn1G9/U34JJRv6Fdu/YrtpkxfRrHHnEQQ4adxuDjhpSr9CYny8A/B/gP4MckV+qMA27OsL3cat68OSeedhabb/UvLFq0kLOGDqbXTj8A4OAjjuHQo45baf0Xnn2aymVLuea2+/h6yWJOO34AffbqT7f11i9H+dbE7XfgIQw4cjC/GHneinljbruZHb//A44dMpQ7b72JMbfdzKk/+fmK5ddcdQU/2LV3Ocpt0jLr0omI5RFxfUQcHhEDImJ0RFRl1V6ederclc23+hcA2rRZmw17bMqcWbNq30CwZPESqior+frrr6moWIs2a6/dQNVa3vTaYSfatW+/0rwJf32W/gccAkD/Aw5hwvi/rFj23LPPsP4GG7Hp5ls0aJ15kOVVOltK+rOktyVNrv7Kqj1LzJw+jckfvMfW39sWgMce+BNnDDmSq0ddxIKvvgRgtz32plXrVhx/2I846cj+HDLwONZp176u3ZqV1Lw5c+jStSsAXbp2Zd7c5H7MxYsXcdfttzBk2I/LWV6TleVJ21uB64FKoC9wB3BnXRtIGiZpoqSJ99z53xmW1jQtXrSIUSOGc/IZw2mzdlv6H3wEo//4CFff8ic6de7CLdddBcD777xFs2bNue3+cdz0p8d46N47mTHt0zJXbwa33HAdRw4+jjZt/IkzC1n24beOiGckKSKmAhdJmgCMrG2DiLgRuBF8p+23VVm5jFEjhrP73v3Ztc9eAHTs1HnF8n4HHMal5/0EgOeefoIddt6Vioq16NCxEz237cWH775N9/U3LEvtlj8dO3dm9qxZdOnaldmzZtGxUzIiy9tvTmL8M+O4/ve/YcFXX6FmomXLFgwYeHSZK24asgz8JZKaAR9IOh34DFg3w/ZyKyK45oqL2bDHphwy8NgV8+fOmUWnzsnH5r9N+As9Nt0cgK7dujPp1ZfZo9/+fL1kCe+/PYmDjhhcltotn37Ypy9PPPogxw4ZyhOPPkjv3fsC8Idb/tkJcMvo62jduo3DvoRKPpbOih1L3wfeAToAlwLtgCsj4n+L2d5H+MV7e9LfOfeME+mx2ZY0a5bc+nDs0NN57uknmfLheyDRrft6nDr8Qjp17sriRYu4etRIPpk6GSLYq//BHDbo+DK/i8bDY+l8OyPPH85rE1/miy++oFPnzpz0H6fRe4+9GHHuWcycMZ1u3dfj0iuuol37DittVx34vizz26lrLJ0sA/+IiLivvnm1ceDbmsqBb2uyugI/y5O25xU5z8zMGkAW4+H3B/YDNpD0+4JF7Uiu2DEzszLI4qTtNGAicBDwSsH8r4CfZdCemZkVIYvx8F8HXpfULSJuL1wm6Uzg6lK3aWZm9cuyD/+oVcw7IcP2zMysDln04Q8CBgObSnq4YFE7YHap2zMzs+Jk0Yf/IjAd6AL8pmB+AAMzaM/MzIqQRR/+VGAq8O+SepEc7R8JTAH+p9TtmZlZcbLo0tmKpP9+EDAHuIfkBq++pW7LzMyKl0WXzrvABODAiPgQQJIvxzQzK7MsrtIZAMwAnpV0k6S9SJ54ZWZmZVTywI+IByJiINATGE9ys1U3SddL6lfq9szMrDhZPuJwYUTcFREHABsCrwHnZtWemZnVLbPRMr8rj5ZpayqPlmlrsnKNlmlmZmsQB76ZWU448M3McsKBb2aWEw58M7OccOCbmeWEA9/MLCcc+GZmOeHANzPLCQe+mVlOOPDNzHLCgW9mlhMOfDOznHDgm5nlhAPfzCwnHPhmZjnhwDczywkHvplZTjjwzcxywoFvZpYTDnwzs5xw4JuZ5YQD38wsJxz4ZmY54cA3M8sJB76ZWU448M3McsKBb2aWEw58M7OccOCbmeWEA9/MLCcc+GZmOeHANzPLCQe+mVlOOPDNzHLCgW9mlhOKiHLXYA1A0rCIuLHcdZjV5N/NhuMj/PwYVu4CzGrh380G4sA3M8sJB76ZWU448PPDfaS2pvLvZgPxSVszs5zwEb6ZWU448M3McsKB3whIqpL0mqTXJb0qadfvsK9LJO1dyvrMACQdKikk9UynN5E0uGB5L0n7fYf9/0NSl1LUmlcO/MZhcUT0iojtgPOAX67ujiJiREQ8XbrSzFYYBDwPHJVObwIMLljeC1jtwLfvzoHf+LQD5lVPSDpb0suSJkm6OJ23iaR3JN0k6S1J4yS1TpfdJunw9PV+kt6V9Lyk30t6NJ1/kaT/ljRe0mRJPynD+7RGRFJbYDfgJP4Z+KOA3umn03OAS4CB6fRASTtLelHS39PvW6f7ai7p15LeSH+vz6jRVmtJYyUNbcC32CRUlLsAK0prSa8BrYD1gD0BJPUDtgR2BgQ8LKkP8HE6f1BEDJV0LzAAGFO9Q0mtgNFAn4iYIunuGm32BPoC6wDvSbo+IpZl+SatUTsEGBsR70uaK2kH4FxgeEQcACBpJrBTRJyeTrcj+f2rTLsZLyf5PR0GbApsny7rVNBOW+BPwB0RcUeDvbsmwkf4jUN1l05PYF/gDkkC+qVffwdeJQnpLdNtpkTEa+nrV0g+XhfqCUyOiCnpdM3Afywivo6I2cDnQLdSviFrcgaRBDHp90FFbNMeuE/Sm8BvgW3S+XsDN0REJUBEzC3Y5iHgVof96vERfiMTES+lJ666khzV/zIiRheuI2kT4OuCWVVA6xq7Uj1N1dzevyu2SpI6k3zq3FZSAM2BAB6vZ9NLgWcj4tD0d3Z89S7T7VflBaC/pD+GbyL61nyE38ikV0A0B+YATwInpv2nSNpA0rpF7updYLP0PxrAwBKXavlxOEkXS4+I2CQiNgKmAMtJugSrfVVjuj3wWfr6hIL544BTJFUA1OjSGUHyu/+Hkr6DnHDgNw6t0xNdrwH3AMdHRFVEjAP+CLwk6Q3gz6z8H6pWEbEYOBUYK+l5YCYwP5vyrYkbBDxQY97/kJy8rUwvJ/4Z8CzwveqTtsCVwC8lvUByEFPtZpLzUJMkvc7KV/oA/BRoJenKDN5Lk+ahFXJMUtuIWJCeD7gO+CAiflvuuswsGz7Cz7eh6aeGt0g+Xo+uZ30za8R8hG9mlhM+wjczywkHvplZTjjwzcxywoFvTVbBKKNvSrpPUpvvsK89CsYaOkjSuXWs20HSqavRxkWShq9ujWb1ceBbU1Y9JMW2wFLglMKFSnzr/wMR8XBEjKpjlQ4k9ziYrVEc+JYXE4AtCkYS/QPJ+EMbSeon6aX0WQP3Fdy5vG/1aKLAYdU7knSCpGvT190kPZDeXPS6kmcVjAI2Tz9d/Cpd7xujmqbzL5D0nqSnga0b7KdhueTAtyYvvUW/P/BGOmtrkqEAtgcWAhcCe0fEDsBE4Kx0NNGbgAOB3kD3Wnb/e+Cv6bMKdiC5p+Fc4KP008XZNUY17QXsKKmPpB1J7kbdnuQPyvdL/NbNVuIBsawpqx5WGpIj/FuA9YGpEfG3dP4uwPeAF5IbjmkBvEQymuiUiPgAQNIYkmF7a9oTOA4gIqqA+ZI61lincFRTSIb43ZJkGIwHImJR2sbD3+ndmtXDgW9N2eKI6FU4Iw31hYWzgKciYlCN9XpR+4iN31Zto5r+tIRtmNXLXTqWd38DdpO0BYCkNpK2IhlNdFNJm6fr1Ta++zPAj9Ntm6cP9ag5KmRto5o+BxyaPsFpHZLuI7PMOPAt1yJiFsnQvHdLmkTyB6BnRCwh6cJ5LD1pO7WWXZwJ9E1HK30F2CYi5pB0Eb0p6Ve1jWoaEa+SjH76GsnokhMye6NmeCwdM7Pc8BG+mVlOOPDNzHLCgW9mlhMOfDOznHDgm5nlhAPfzCwnHPhmZjnx/zT+z1Wh63oMAAAAAElFTkSuQmCC",
-      "text/plain": [
-       "<Figure size 432x288 with 1 Axes>"
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "bSSZ4YCZywpT",
+        "outputId": "27d111b0-c0c1-4694-9e07-65965d2014f1"
+      },
+      "source": [
+        "data = pd.read_csv(\"Friday-23-02-2018_TrafficForML_CICFlowMeter.csv\")\n",
+        "data.Label.value_counts()"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "Benign              1048009\n",
+              "Brute Force -Web        362\n",
+              "Brute Force -XSS        151\n",
+              "SQL Injection            53\n",
+              "Name: Label, dtype: int64"
+            ]
+          }
+        }
       ]
-     },
-     "output_type": "display_data",
-     "metadata": {}
-    }
-   ],
-   "source": [
-    "# Create confusion matrix\n",
-    "conf_matrix = confusion_matrix(y_true, y_pred_model)\n",
-    "\n",
-    "# Show confusion matrix\n",
-    "ax = plt.subplot()\n",
-    "sns.heatmap(conf_matrix, annot=True, ax=ax, cmap=\"Blues\", fmt=\"g\", cbar=False)\n",
-    "\n",
-    "# Add labels, title and ticks\n",
-    "ax.set_xlabel(\"Predicted\")\n",
-    "ax.set_ylabel(\"Acctual\")\n",
-    "ax.set_title(\"Confusion Matrix\")\n",
-    "ax.xaxis.set_ticklabels([\"Benign\", \"Attack\"])\n",
-    "ax.yaxis.set_ticklabels([\"Benign\", \"Attack\"])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/"
-    },
-    "id": "VJRwvXOvYtBL",
-    "outputId": "9d45426b-6bd8-4374-dcf8-9ffecedd7176"
-   },
-   "outputs": [
-    {
-     "output_type": "stream",
-     "text": [
-      "Accuracy: 0.871\n",
-      "Precision: 1.000\n",
-      "Recall: 0.287\n"
-     ],
-     "name": "stdout"
-    }
-   ],
-   "source": [
-    "# Calculate accuracy\n",
-    "acc = accuracy_score(y_true, y_pred_model, normalize=True, sample_weight=None)\n",
-    "precision = precision_score(y_true, y_pred_model)\n",
-    "recall = recall_score(y_true, y_pred_model)\n",
-    "\n",
-    "print(f\"Accuracy: {acc:.3f}\")\n",
-    "print(f\"Precision: {precision:.3f}\")\n",
-    "print(f\"Recall: {recall:.3f}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 112
-    },
-    "id": "TFbfVi_-W6GT",
-    "outputId": "68facf43-ac41-44de-d59a-a489627cc893"
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>type</th>\n",
-       "      <th>accuracy</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>Benign</td>\n",
-       "      <td>1.00</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>Attack</td>\n",
-       "      <td>0.29</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "vYl82iXeywpT"
+      },
+      "source": [
+        "**Clean the data** using a python script from the cloned repository."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "y-trCZOcywpT",
+        "outputId": "c51b5e77-4e66-47a3-c4f6-d492a7dfaef8"
+      },
+      "source": [
+        "!python DeepLearning-IDS/data_cleanup.py \"Friday-23-02-2018_TrafficForML_CICFlowMeter.csv\" \"result23022018\""
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "cleaning Friday-23-02-2018_TrafficForML_CICFlowMeter.csv\n",
+            "total rows read = 1048576\n",
+            "all done writing 1042868 rows; dropped 5708 rows\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "oKHL8HCjYTNQ"
+      },
+      "source": [
+        "Load the file that you got from the previous step."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 278
+        },
+        "id": "uzH6_tBpywpU",
+        "outputId": "017263dd-8e2a-45a4-9693-5b5b763197ce"
+      },
+      "source": [
+        "data_23_cleaned = pd.read_csv(\"result23022018.csv\")\n",
+        "data_23_cleaned.head()"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>Dst Port</th>\n",
+              "      <th>Protocol</th>\n",
+              "      <th>Timestamp</th>\n",
+              "      <th>Flow Duration</th>\n",
+              "      <th>Tot Fwd Pkts</th>\n",
+              "      <th>Tot Bwd Pkts</th>\n",
+              "      <th>TotLen Fwd Pkts</th>\n",
+              "      <th>TotLen Bwd Pkts</th>\n",
+              "      <th>Fwd Pkt Len Max</th>\n",
+              "      <th>Fwd Pkt Len Min</th>\n",
+              "      <th>...</th>\n",
+              "      <th>Fwd Seg Size Min</th>\n",
+              "      <th>Active Mean</th>\n",
+              "      <th>Active Std</th>\n",
+              "      <th>Active Max</th>\n",
+              "      <th>Active Min</th>\n",
+              "      <th>Idle Mean</th>\n",
+              "      <th>Idle Std</th>\n",
+              "      <th>Idle Max</th>\n",
+              "      <th>Idle Min</th>\n",
+              "      <th>Label</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>22</td>\n",
+              "      <td>6</td>\n",
+              "      <td>1.519374e+09</td>\n",
+              "      <td>1532698</td>\n",
+              "      <td>11</td>\n",
+              "      <td>11</td>\n",
+              "      <td>1179</td>\n",
+              "      <td>1969</td>\n",
+              "      <td>648</td>\n",
+              "      <td>0</td>\n",
+              "      <td>...</td>\n",
+              "      <td>32</td>\n",
+              "      <td>0.0</td>\n",
+              "      <td>0.0</td>\n",
+              "      <td>0</td>\n",
+              "      <td>0</td>\n",
+              "      <td>0.0</td>\n",
+              "      <td>0.000000e+00</td>\n",
+              "      <td>0</td>\n",
+              "      <td>0</td>\n",
+              "      <td>Benign</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>500</td>\n",
+              "      <td>17</td>\n",
+              "      <td>1.519374e+09</td>\n",
+              "      <td>117573855</td>\n",
+              "      <td>3</td>\n",
+              "      <td>0</td>\n",
+              "      <td>1500</td>\n",
+              "      <td>0</td>\n",
+              "      <td>500</td>\n",
+              "      <td>500</td>\n",
+              "      <td>...</td>\n",
+              "      <td>8</td>\n",
+              "      <td>0.0</td>\n",
+              "      <td>0.0</td>\n",
+              "      <td>0</td>\n",
+              "      <td>0</td>\n",
+              "      <td>58786927.5</td>\n",
+              "      <td>2.375324e+07</td>\n",
+              "      <td>75583006</td>\n",
+              "      <td>41990849</td>\n",
+              "      <td>Benign</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>500</td>\n",
+              "      <td>17</td>\n",
+              "      <td>1.519374e+09</td>\n",
+              "      <td>117573848</td>\n",
+              "      <td>3</td>\n",
+              "      <td>0</td>\n",
+              "      <td>1500</td>\n",
+              "      <td>0</td>\n",
+              "      <td>500</td>\n",
+              "      <td>500</td>\n",
+              "      <td>...</td>\n",
+              "      <td>8</td>\n",
+              "      <td>0.0</td>\n",
+              "      <td>0.0</td>\n",
+              "      <td>0</td>\n",
+              "      <td>0</td>\n",
+              "      <td>58786924.0</td>\n",
+              "      <td>2.375325e+07</td>\n",
+              "      <td>75583007</td>\n",
+              "      <td>41990841</td>\n",
+              "      <td>Benign</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>22</td>\n",
+              "      <td>6</td>\n",
+              "      <td>1.519374e+09</td>\n",
+              "      <td>1745392</td>\n",
+              "      <td>11</td>\n",
+              "      <td>11</td>\n",
+              "      <td>1179</td>\n",
+              "      <td>1969</td>\n",
+              "      <td>648</td>\n",
+              "      <td>0</td>\n",
+              "      <td>...</td>\n",
+              "      <td>32</td>\n",
+              "      <td>0.0</td>\n",
+              "      <td>0.0</td>\n",
+              "      <td>0</td>\n",
+              "      <td>0</td>\n",
+              "      <td>0.0</td>\n",
+              "      <td>0.000000e+00</td>\n",
+              "      <td>0</td>\n",
+              "      <td>0</td>\n",
+              "      <td>Benign</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>500</td>\n",
+              "      <td>17</td>\n",
+              "      <td>1.519374e+09</td>\n",
+              "      <td>89483474</td>\n",
+              "      <td>6</td>\n",
+              "      <td>0</td>\n",
+              "      <td>3000</td>\n",
+              "      <td>0</td>\n",
+              "      <td>500</td>\n",
+              "      <td>500</td>\n",
+              "      <td>...</td>\n",
+              "      <td>8</td>\n",
+              "      <td>4000364.0</td>\n",
+              "      <td>0.0</td>\n",
+              "      <td>4000364</td>\n",
+              "      <td>4000364</td>\n",
+              "      <td>21370777.5</td>\n",
+              "      <td>1.528092e+07</td>\n",
+              "      <td>41989576</td>\n",
+              "      <td>7200485</td>\n",
+              "      <td>Benign</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>5 rows × 80 columns</p>\n",
+              "</div>"
+            ],
+            "text/plain": [
+              "   Dst Port  Protocol     Timestamp  Flow Duration  Tot Fwd Pkts  \\\n",
+              "0        22         6  1.519374e+09        1532698            11   \n",
+              "1       500        17  1.519374e+09      117573855             3   \n",
+              "2       500        17  1.519374e+09      117573848             3   \n",
+              "3        22         6  1.519374e+09        1745392            11   \n",
+              "4       500        17  1.519374e+09       89483474             6   \n",
+              "\n",
+              "   Tot Bwd Pkts  TotLen Fwd Pkts  TotLen Bwd Pkts  Fwd Pkt Len Max  \\\n",
+              "0            11             1179             1969              648   \n",
+              "1             0             1500                0              500   \n",
+              "2             0             1500                0              500   \n",
+              "3            11             1179             1969              648   \n",
+              "4             0             3000                0              500   \n",
+              "\n",
+              "   Fwd Pkt Len Min  ...  Fwd Seg Size Min  Active Mean  Active Std  \\\n",
+              "0                0  ...                32          0.0         0.0   \n",
+              "1              500  ...                 8          0.0         0.0   \n",
+              "2              500  ...                 8          0.0         0.0   \n",
+              "3                0  ...                32          0.0         0.0   \n",
+              "4              500  ...                 8    4000364.0         0.0   \n",
+              "\n",
+              "   Active Max  Active Min   Idle Mean      Idle Std  Idle Max  Idle Min  \\\n",
+              "0           0           0         0.0  0.000000e+00         0         0   \n",
+              "1           0           0  58786927.5  2.375324e+07  75583006  41990849   \n",
+              "2           0           0  58786924.0  2.375325e+07  75583007  41990841   \n",
+              "3           0           0         0.0  0.000000e+00         0         0   \n",
+              "4     4000364     4000364  21370777.5  1.528092e+07  41989576   7200485   \n",
+              "\n",
+              "    Label  \n",
+              "0  Benign  \n",
+              "1  Benign  \n",
+              "2  Benign  \n",
+              "3  Benign  \n",
+              "4  Benign  \n",
+              "\n",
+              "[5 rows x 80 columns]"
+            ]
+          }
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "cGXf6PmhYTNR",
+        "outputId": "3371cd8e-e8e3-4382-ad17-bfffd8e3d8ac"
+      },
+      "source": [
+        "data_23_cleaned.Label.value_counts()"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "Benign              1042301\n",
+              "Brute Force -Web        362\n",
+              "Brute Force -XSS        151\n",
+              "SQL Injection            53\n",
+              "Name: Label, dtype: int64"
+            ]
+          }
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "bWUf-dk1ywpU"
+      },
+      "source": [
+        "### Load the Model"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "97lIL236YTNR"
+      },
+      "source": [
+        "Here we load the pretrained model. The model is trained using the data from the same date."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Bq8_hM-RfEeR"
+      },
+      "source": [
+        "We have modified [the original model](https://github.com/rambasnet/DeepLearning-IDS/blob/master/keras_tensorflow_models/02-23-2018.csv_adam_10_10_multiclass_baseline_model_1561316601.model) slightly and changed the number of classes from four (Benign, BruteForce-Web, BruteForce-XSS, SQL-Injection) to two (Benign and Attack). In the step below we will download and unzip our modified model."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "eRTvBzhoqPkR"
+      },
+      "source": [
+        "!wget -q -O it_threat_model.model.zip \"https://drive.google.com/uc?export=download&id=1VYMHOk_XMAc-QFJ_8CAPvWFfHnLpS2J_\" \n",
+        "!unzip -q it_threat_model.model.zip"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "WP44njScywpU",
+        "outputId": "2a9d2001-0328-4602-c595-372c5bf67aa4"
+      },
+      "source": [
+        "model = keras.models.load_model(\"it_threat_model.model\")\n",
+        "model.summary()"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "WARNING:tensorflow:SavedModel saved prior to TF 2.5 detected when loading Keras model. Please ensure that you are saving the model with model.save() or tf.keras.models.save_model(), *NOT* tf.saved_model.save(). To confirm, there should be a file named \"keras_metadata.pb\" in the SavedModel directory.\n",
+            "Model: \"sequential\"\n",
+            "_________________________________________________________________\n",
+            "Layer (type)                 Output Shape              Param #   \n",
+            "=================================================================\n",
+            "dense (Dense)                (None, 128)               10240     \n",
+            "_________________________________________________________________\n",
+            "dense_1 (Dense)              (None, 64)                8256      \n",
+            "_________________________________________________________________\n",
+            "dense_2 (Dense)              (None, 1)                 65        \n",
+            "=================================================================\n",
+            "Total params: 18,561\n",
+            "Trainable params: 18,561\n",
+            "Non-trainable params: 0\n",
+            "_________________________________________________________________\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "2rCdzSrfywpV"
+      },
+      "source": [
+        "# Select the first layer\n",
+        "layer_name = \"dense\"\n",
+        "intermediate_layer_model = Model(\n",
+        "    inputs=model.input, outputs=model.get_layer(layer_name).output\n",
+        ")"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "V3dx9XkPYTNV"
+      },
+      "source": [
+        "### Upload Data"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "yO1eySIdjSOG"
+      },
+      "source": [
+        "\n",
+        "Let's define the item's ids in a way that will reflect the event's label.  Then, we index the events in Pinecone's vector index."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "nk4ZjGg-ywpX",
+        "outputId": "bce8f1fd-98e3-4313-d625-c404a4eca184"
+      },
+      "source": [
+        "from tqdm import tqdm\n",
+        "\n",
+        "items_to_upload = []\n",
+        "\n",
+        "model_res = intermediate_layer_model.predict(K.constant(data_23_cleaned.iloc[:, :-1]))\n",
+        "\n",
+        "for i, res in tqdm(zip(data_23_cleaned.iterrows(), model_res), total=len(model_res)):\n",
+        "    benign_or_attack = i[1][\"Label\"][:3]\n",
+        "    items_to_upload.append((benign_or_attack + \"_\" + str(i[0]), res.tolist()))"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1042867/1042867 [01:43<00:00, 10067.22it/s]\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "cMzD8s3ps3k0"
+      },
+      "source": [
+        "import itertools\n",
+        "\n",
+        "\n",
+        "def chunks(iterable, batch_size=100):\n",
+        "    it = iter(iterable)\n",
+        "    chunk = tuple(itertools.islice(it, batch_size))\n",
+        "    while chunk:\n",
+        "        yield chunk\n",
+        "        chunk = tuple(itertools.islice(it, batch_size))"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "6C7er-8Gl2Rg"
+      },
+      "source": [
+        "You can lower the NUMBER_OF_ITEMS and, by doing so, limit the number of uploaded items. "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "_Ti9p0P-ywpX",
+        "scrolled": true
+      },
+      "source": [
+        "NUMBER_OF_ITEMS = len(items_to_upload)\n",
+        "\n",
+        "for batch in chunks(items_to_upload[:NUMBER_OF_ITEMS], 50):\n",
+        "    index.upsert(vectors=batch)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "XgF8vW8PtaRX"
+      },
+      "source": [
+        "items_to_upload.clear()"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "fglWJfAq_kw3"
+      },
+      "source": [
+        "Let's verify all items were inserted. "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "xU172A4EywpY",
+        "outputId": "80008942-0000-40e5-92a1-66b767a489c6"
+      },
+      "source": [
+        "index.describe_index_stats()"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "{'dimension': 128, 'namespaces': {'': {'vector_count': 1042867}}}"
+            ]
+          }
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "3CGzW3mVywpY"
+      },
+      "source": [
+        "## Query"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Ywuld4BylAIu"
+      },
+      "source": [
+        "First, we will randomly select a Benign/Attack event and query the vector index using the event embedding. Then, we will use data from different day, that contains same set of attacks to query on a bigger sample."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "m5H4rMyOYTNX"
+      },
+      "source": [
+        "\n",
+        "### Evaluate the Rare Event Classification Model"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "velMK_XlYTNX"
+      },
+      "source": [
+        "We will use network intrusion dataset for 22-02-2018 for querying and testing the Pinecone."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "VC2AVfWsj7em"
+      },
+      "source": [
+        "First, let's clean the data."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "bW9mhYvOYTNX",
+        "outputId": "84a9d548-de90-40fe-c183-fc2ca48de2ec",
+        "scrolled": true
+      },
+      "source": [
+        "!python DeepLearning-IDS/data_cleanup.py \"Thursday-22-02-2018_TrafficForML_CICFlowMeter.csv\" \"result22022018\""
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "cleaning Thursday-22-02-2018_TrafficForML_CICFlowMeter.csv\n",
+            "total rows read = 1048576\n",
+            "all done writing 1042966 rows; dropped 5610 rows\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 278
+        },
+        "id": "xqMuz0jKYTNX",
+        "outputId": "d9f8c333-5e3f-4509-b84a-901bacf42487"
+      },
+      "source": [
+        "data_22_cleaned = pd.read_csv(\"result22022018.csv\")\n",
+        "data_22_cleaned.head()"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>Dst Port</th>\n",
+              "      <th>Protocol</th>\n",
+              "      <th>Timestamp</th>\n",
+              "      <th>Flow Duration</th>\n",
+              "      <th>Tot Fwd Pkts</th>\n",
+              "      <th>Tot Bwd Pkts</th>\n",
+              "      <th>TotLen Fwd Pkts</th>\n",
+              "      <th>TotLen Bwd Pkts</th>\n",
+              "      <th>Fwd Pkt Len Max</th>\n",
+              "      <th>Fwd Pkt Len Min</th>\n",
+              "      <th>...</th>\n",
+              "      <th>Fwd Seg Size Min</th>\n",
+              "      <th>Active Mean</th>\n",
+              "      <th>Active Std</th>\n",
+              "      <th>Active Max</th>\n",
+              "      <th>Active Min</th>\n",
+              "      <th>Idle Mean</th>\n",
+              "      <th>Idle Std</th>\n",
+              "      <th>Idle Max</th>\n",
+              "      <th>Idle Min</th>\n",
+              "      <th>Label</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>22</td>\n",
+              "      <td>6</td>\n",
+              "      <td>1.519288e+09</td>\n",
+              "      <td>20553406</td>\n",
+              "      <td>10</td>\n",
+              "      <td>7</td>\n",
+              "      <td>1063</td>\n",
+              "      <td>1297</td>\n",
+              "      <td>744</td>\n",
+              "      <td>0</td>\n",
+              "      <td>...</td>\n",
+              "      <td>20</td>\n",
+              "      <td>1027304.0</td>\n",
+              "      <td>0.0</td>\n",
+              "      <td>1027304</td>\n",
+              "      <td>1027304</td>\n",
+              "      <td>1.952608e+07</td>\n",
+              "      <td>0.000000e+00</td>\n",
+              "      <td>19526080</td>\n",
+              "      <td>19526080</td>\n",
+              "      <td>Benign</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>34989</td>\n",
+              "      <td>6</td>\n",
+              "      <td>1.519288e+09</td>\n",
+              "      <td>790</td>\n",
+              "      <td>2</td>\n",
+              "      <td>0</td>\n",
+              "      <td>848</td>\n",
+              "      <td>0</td>\n",
+              "      <td>848</td>\n",
+              "      <td>0</td>\n",
+              "      <td>...</td>\n",
+              "      <td>20</td>\n",
+              "      <td>0.0</td>\n",
+              "      <td>0.0</td>\n",
+              "      <td>0</td>\n",
+              "      <td>0</td>\n",
+              "      <td>0.000000e+00</td>\n",
+              "      <td>0.000000e+00</td>\n",
+              "      <td>0</td>\n",
+              "      <td>0</td>\n",
+              "      <td>Benign</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>500</td>\n",
+              "      <td>17</td>\n",
+              "      <td>1.519288e+09</td>\n",
+              "      <td>99745913</td>\n",
+              "      <td>5</td>\n",
+              "      <td>0</td>\n",
+              "      <td>2500</td>\n",
+              "      <td>0</td>\n",
+              "      <td>500</td>\n",
+              "      <td>500</td>\n",
+              "      <td>...</td>\n",
+              "      <td>8</td>\n",
+              "      <td>4000203.0</td>\n",
+              "      <td>0.0</td>\n",
+              "      <td>4000203</td>\n",
+              "      <td>4000203</td>\n",
+              "      <td>3.191524e+07</td>\n",
+              "      <td>3.792787e+07</td>\n",
+              "      <td>75584115</td>\n",
+              "      <td>7200679</td>\n",
+              "      <td>Benign</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>500</td>\n",
+              "      <td>17</td>\n",
+              "      <td>1.519288e+09</td>\n",
+              "      <td>99745913</td>\n",
+              "      <td>5</td>\n",
+              "      <td>0</td>\n",
+              "      <td>2500</td>\n",
+              "      <td>0</td>\n",
+              "      <td>500</td>\n",
+              "      <td>500</td>\n",
+              "      <td>...</td>\n",
+              "      <td>8</td>\n",
+              "      <td>4000189.0</td>\n",
+              "      <td>0.0</td>\n",
+              "      <td>4000189</td>\n",
+              "      <td>4000189</td>\n",
+              "      <td>3.191524e+07</td>\n",
+              "      <td>3.792788e+07</td>\n",
+              "      <td>75584130</td>\n",
+              "      <td>7200693</td>\n",
+              "      <td>Benign</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>500</td>\n",
+              "      <td>17</td>\n",
+              "      <td>1.519288e+09</td>\n",
+              "      <td>89481361</td>\n",
+              "      <td>6</td>\n",
+              "      <td>0</td>\n",
+              "      <td>3000</td>\n",
+              "      <td>0</td>\n",
+              "      <td>500</td>\n",
+              "      <td>500</td>\n",
+              "      <td>...</td>\n",
+              "      <td>8</td>\n",
+              "      <td>4000554.0</td>\n",
+              "      <td>0.0</td>\n",
+              "      <td>4000554</td>\n",
+              "      <td>4000554</td>\n",
+              "      <td>2.137020e+07</td>\n",
+              "      <td>1.528109e+07</td>\n",
+              "      <td>41990741</td>\n",
+              "      <td>7200848</td>\n",
+              "      <td>Benign</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>5 rows × 80 columns</p>\n",
+              "</div>"
+            ],
+            "text/plain": [
+              "   Dst Port  Protocol     Timestamp  Flow Duration  Tot Fwd Pkts  \\\n",
+              "0        22         6  1.519288e+09       20553406            10   \n",
+              "1     34989         6  1.519288e+09            790             2   \n",
+              "2       500        17  1.519288e+09       99745913             5   \n",
+              "3       500        17  1.519288e+09       99745913             5   \n",
+              "4       500        17  1.519288e+09       89481361             6   \n",
+              "\n",
+              "   Tot Bwd Pkts  TotLen Fwd Pkts  TotLen Bwd Pkts  Fwd Pkt Len Max  \\\n",
+              "0             7             1063             1297              744   \n",
+              "1             0              848                0              848   \n",
+              "2             0             2500                0              500   \n",
+              "3             0             2500                0              500   \n",
+              "4             0             3000                0              500   \n",
+              "\n",
+              "   Fwd Pkt Len Min  ...  Fwd Seg Size Min  Active Mean  Active Std  \\\n",
+              "0                0  ...                20    1027304.0         0.0   \n",
+              "1                0  ...                20          0.0         0.0   \n",
+              "2              500  ...                 8    4000203.0         0.0   \n",
+              "3              500  ...                 8    4000189.0         0.0   \n",
+              "4              500  ...                 8    4000554.0         0.0   \n",
+              "\n",
+              "   Active Max  Active Min     Idle Mean      Idle Std  Idle Max  Idle Min  \\\n",
+              "0     1027304     1027304  1.952608e+07  0.000000e+00  19526080  19526080   \n",
+              "1           0           0  0.000000e+00  0.000000e+00         0         0   \n",
+              "2     4000203     4000203  3.191524e+07  3.792787e+07  75584115   7200679   \n",
+              "3     4000189     4000189  3.191524e+07  3.792788e+07  75584130   7200693   \n",
+              "4     4000554     4000554  2.137020e+07  1.528109e+07  41990741   7200848   \n",
+              "\n",
+              "    Label  \n",
+              "0  Benign  \n",
+              "1  Benign  \n",
+              "2  Benign  \n",
+              "3  Benign  \n",
+              "4  Benign  \n",
+              "\n",
+              "[5 rows x 80 columns]"
+            ]
+          }
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "262mxbQDYTNY",
+        "outputId": "7540316e-2c26-49ad-a487-85a4fe0bd84c"
+      },
+      "source": [
+        "data_22_cleaned.Label.value_counts()"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "Benign              1042603\n",
+              "Brute Force -Web        249\n",
+              "Brute Force -XSS         79\n",
+              "SQL Injection            34\n",
+              "Name: Label, dtype: int64"
+            ]
+          }
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "P6nJdtveYTNY"
+      },
+      "source": [
+        "Let's define a sample that will include all different types of web attacks for this specific date."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "N7vwZk6HYTNY",
+        "outputId": "96c03fc3-c086-4398-b58a-2c5c57eb4ded"
+      },
+      "source": [
+        "data_sample = data_22_cleaned[-2000:]\n",
+        "data_sample.Label.value_counts()"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "Benign              1638\n",
+              "Brute Force -Web     249\n",
+              "Brute Force -XSS      79\n",
+              "SQL Injection         34\n",
+              "Name: Label, dtype: int64"
+            ]
+          }
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "neSxNwYckGMK"
+      },
+      "source": [
+        "Now, we will query the test dataset and save predicted and expected results to create a confusion matrix."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 136,
+          "referenced_widgets": [
+            "567d04116b1b4d21bac2348535b750a0",
+            "8f92a228cbc54b30bcb22d0598e9577f",
+            "8fe529938a4f40ac905e145e423d856e",
+            "a4b449113a734e90825bc1ef87ca3d3c",
+            "cda25dc5f86344b3850c92e59085c06c",
+            "3586c81ff82048ed80d69b7a4b5bd6b3",
+            "c427de521f054c4997d69586251bed4f",
+            "90d8d1d9da814d90b0f0bf331102d4df",
+            "7f2cabefd9eb4674a63ef3d56a5be122",
+            "dda94f4a1ea946b7996a928374dda4a5",
+            "ea6f763369cd4b478998ea4d3e8f20e6",
+            "e752273786584dd4baa77ac3f4528849",
+            "aef6058200fa454c90f51760685e25db",
+            "87b4d4b12452401cb82285364dae3576",
+            "8a60896e0288471a91089a03a75b210b",
+            "fe75c2a93c6e4619a2731a4c01a71619",
+            "5c64d617d0d94a6a9797d20f0d1e80f3",
+            "25ac309f2f5d43169ed0bda88300c8d2",
+            "9ebe5f94bbb743058602c9af26cd4eaf",
+            "3db0b4717ac140b78bf7d75e7ebeaf39",
+            "fcc29e1c6b304044a53522c550c4b49d",
+            "cd9e8a060d05491f8c4d74871c9560fa"
+          ]
+        },
+        "id": "8u6cg_1tYTNY",
+        "outputId": "dbe81cb3-88fb-4cd9-91fe-773c960ca108",
+        "scrolled": true
+      },
+      "source": [
+        "y_true = []\n",
+        "y_pred = []\n",
+        "\n",
+        "BATCH_SIZE = 100\n",
+        "\n",
+        "for i in tqdm(range(0, len(data_sample), BATCH_SIZE)):\n",
+        "    test_data = data_sample.iloc[i : i + BATCH_SIZE, :]\n",
+        "\n",
+        "    # Create vector embedding using the model\n",
+        "    test_vector = intermediate_layer_model.predict(K.constant(test_data.iloc[:, :-1]))\n",
+        "    # Query using the vector embedding\n",
+        "    query_results = []\n",
+        "\n",
+        "    for xq in test_vector.tolist():\n",
+        "        query_res = index.query(vector=xq, top_k=50)\n",
+        "        query_results.append(query_res)\n",
+        "\n",
+        "    ids = [res.id for result in query_results for res in result.matches]\n",
+        "\n",
+        "    for label, res in zip(test_data.Label.values, query_results):\n",
+        "        # Add to the true list\n",
+        "        if label == \"Benign\":\n",
+        "            y_true.append(0)\n",
+        "        else:\n",
+        "            y_true.append(1)\n",
+        "\n",
+        "        counter = Counter(match.id.split(\"_\")[0] for match in res.matches)\n",
+        "\n",
+        "        # Add to the predicted list\n",
+        "        if counter[\"Bru\"] or counter[\"SQL\"]:\n",
+        "            y_pred.append(1)\n",
+        "        else:\n",
+        "            y_pred.append(0)"
       ],
-      "text/plain": [
-       "     type  accuracy\n",
-       "0  Benign      1.00\n",
-       "1  Attack      0.29"
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [10:48<00:00, 32.44s/it]\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 313
+        },
+        "id": "HV3-gkdWYTNZ",
+        "outputId": "20103ea7-713b-4e2b-9590-ecc70ef9b76e"
+      },
+      "source": [
+        "# Create confusion matrix\n",
+        "conf_matrix = confusion_matrix(y_true, y_pred)\n",
+        "\n",
+        "# Show confusion matrix\n",
+        "ax = plt.subplot()\n",
+        "sns.heatmap(conf_matrix, annot=True, ax=ax, cmap=\"Blues\", fmt=\"g\", cbar=False)\n",
+        "\n",
+        "# Add labels, title and ticks\n",
+        "ax.set_xlabel(\"Predicted\")\n",
+        "ax.set_ylabel(\"Acctual\")\n",
+        "ax.set_title(\"Confusion Matrix\")\n",
+        "ax.xaxis.set_ticklabels([\"Benign\", \"Attack\"])\n",
+        "ax.yaxis.set_ticklabels([\"Benign\", \"Attack\"])"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "[Text(0, 0.5, 'Benign'), Text(0, 1.5, 'Attack')]"
+            ]
+          }
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAEWCAYAAABliCz2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAbKUlEQVR4nO3debxVdb3/8debc5RBQEZxKBAcILUUtX5eS1IzE6+aM6JlGE455HXWMBwoLbNBrzl3zTmnLEwihyupZDeQxCEpUqIUQTgCKjId+Pz+WOvQ5sg5Z4t7nQ3n+34+HufBXsNe38+Gzfus/V3f/V2KCMzMrO1rV+0CzMysdTjwzcwS4cA3M0uEA9/MLBEOfDOzRDjwzcwS4cC3NkNSR0kPS1oo6f6PcJxjJD1aydqqQdJvJX2t2nXYusOBb61O0tGSJkt6T9KbeTB9rgKHPhzoA/SMiCPW9iARcVdE7FuBelYjaU9JIemXjdbvmK+fUOZxLpF0Z0v7RcTQiLhtLcu1NsiBb61K0lnAT4DLycK5L3Ad8OUKHL4f8LeIqK/AsYoyF9hdUs+SdV8D/lapBpTx/237AL8prNVI2hi4DDg1In4ZEYsiYnlEPBwR5+b7tJf0E0mz8p+fSGqfb9tT0uuSzpb0Vv7p4Lh826XAaGBY/slhZOMzYUlb5mfStfnyCEmvSXpX0gxJx5Ssf6bkebtLmpR3FU2StHvJtgmSxkiamB/nUUm9mvlrWAb8Cjgqf34NcCRwV6O/q6sl/UvSO5Kek7RHvn4/4Fslr3NqSR3flTQReB8YkK87Pt9+vaQHSo7/fUlPSFLZ/4C23nPgW2v6D6AD8FAz+4wCdgN2AnYEPgNcVLJ9U2BjYAtgJPBTSd0j4mKyTw33RkTniPhZc4VI2gi4BhgaEV2A3YHn17BfD+CRfN+ewI+ARxqdoR8NHAdsAmwInNNc28DtwLH54y8BLwOzGu0ziezvoAdwN3C/pA4RMb7R69yx5DlfBU4EugAzGx3vbOBT+S+zPcj+7r4WnlslKQ58a009gXktdLkcA1wWEW9FxFzgUrIga7A83748IsYB7wED17KelcAOkjpGxJsR8fIa9vlPYHpE3BER9RFxDzANOLBkn1sj4m8RsRi4jyyomxQRfwB6SBpIFvy3r2GfOyOiLm/zh0B7Wn6dP4+Il/PnLG90vPeBr5D9wroTOD0iXm/heNbGOPCtNdUBvRq6VJqwOaufnc7M1606RqNfGO8DnT9sIRGxCBgGnAy8KekRSYPKqKehpi1KlmevRT13AKcBe7GGTzx5t9UreTfSArJPNc11FQH8q7mNEfEn4DVAZL+YLDEOfGtNzwJLgIOb2WcW2cXXBn35YHdHuRYBnUqWNy3dGBG/i4gvApuRnbXfXEY9DTW9sZY1NbgDOAUYl599r5J3uZxP1rffPSK6AQvJghqgqW6YZrtnJJ1K9klhFnDe2pdu6ysHvrWaiFhIdmH1p5IOltRJ0gaShkq6Mt/tHuAiSb3zi5+jybog1sbzwBBJffMLxhc2bJDUR9JBeV/+UrKuoRVrOMY4YNt8KGmtpGHAdsBv1rImACJiBvB5smsWjXUB6slG9NRKGg10Ldk+B9jyw4zEkbQt8B2ybp2vAudJarbrydoeB761qoj4EXAW2YXYuWTdEKeRjVyBLJQmAy8ALwJT8nVr09ZjwL35sZ5j9ZBuR3YhcxbwNln4nrKGY9QBB+T71pGdGR8QEfPWpqZGx34mItb06eV3wG/JhmrOJPtUVNpd0/ClsjpJU1pqJ+9CuxP4fkRMjYjpZCN97mgYAWVpkC/Sm5mlwWf4ZmaJcOCbmSXCgW9mlggHvplZIpr7AkxVdRx8mq8m2zpp/qRrq12CWZM61NLk/Eg+wzczS4QD38wsEQ58M7NEOPDNzBLhwDczS4QD38wsEQ58M7NEOPDNzBLhwDczS4QD38wsEQ58M7NEOPDNzBLhwDczS4QD38wsEQ58M7NEOPDNzBLhwDczS4QD38wsEQ58M7NEOPDNzBLhwDczS4QD38wsEQ58M7NEOPDNzBLhwDczS4QD38wsEQ58M7NEOPDNzBLhwDczS4QD38wsEQ58M7NEOPDNzBLhwDczS4QD38wsEQ58M7NEOPDNzBLhwDczS4QD38wsEQ58M7NE1BbdgKQaoE9pWxHxz6LbNTOz1RUa+JJOBy4G5gAr89UBfKrIds3M7IOKPsM/AxgYEXUFt2NmZi0oug//X8DCgtswM7MyFH2G/xowQdIjwNKGlRHxo4LbNTOzRooO/H/mPxvmP2ZmViWFBn5EXFrk8c3MrHxFj9J5mGxUTqmFwGTgxohYUmT7Zmb2b0VftH0NeA+4Of95h2yI5rb5spmZtZKi+/AHR8SQkuWHJT0VEUMkvVxw22ZmVqLoM/zekvo2LOSPe+WLywpu28zMShR9hn828IykVwEB/YFTJG0E3FZw223aDRcfw9AhOzD37XfZ9YjLV63/xlGf5+RhQ6hfsZLxT7/EqKt/za7b9+Pabw8HQILv3jCOsU++QOdO7Xn8f85c9dwtNunGL8ZN4tyrHmz112PpGX3RhTz1+wn06NGTX/76N9UuJwmKaHxNtcINSO2BQWSBP63cC7UdB59WbGHruc/uvBWL3l/KLWOOXRX4Q3bdhvOP/xKHnH4Dy5bX07t7Z+bOf4+OHTZg2fIVrFixkk17deX/7r2QAfuOYsWKlasdc+Jd53HeDx9k4pRXq/GS1hvzJ11b7RLahOcmT6JTp06MuvB8B34FdahFTW0r5Axf0t4R8b+SDm20aYAkIuKXRbSbkolTXqXvZj1WW3fiEXtw1a2PsWx5PQBz578HwOIly1ft037DDVjTL/mt+vZmkx5dHPbWanbZ9dO88cbr1S4jKUV16Xwe+F/gwDVsC8CBX4Ct+23CZwdvxaWnHsiSZcu58EcP8dxfsolJP71DP2645Cv03awHIy+67QNn90futwsPPDqlGmWbWSspJPAj4uL8z+M+zPMknQicCFD7sT2p7bV9AdW1XbU17ejetRNDjr2KXbfvx51Xfp1PHHAJAJNemskuh3+Xgf37cMtlX+V3E//C0mX1q557xJd2YeRFt1epcjNrDUV/8ao9cBiwJavPh3/ZmvaPiJuAm8B9+GvjjTkL+NUTUwGY/PJMVq4MenXvzLy8awfgrzPmsGjxMrbfenOm5Gf/n9x2C2pravjzK/+qSt1m1jqKHpb5a+DLQD2wqOTHCvDwhBfY8zPbArB1303YcINa5s1/j36b96SmJvun7rtZd7bdsg8zZ/17xuoj99uF+8ZPrkrNZtZ6ih6W+bGI2K/gNpJ02xUj2GOXbejVrTN/Hz+GMTeM47ZfPcuNlxzD5Pu/xbLlKzh+9B0A7D54AOccty/L61ewcmVwxuX3Urfg3793D/vizhx8+vXVeimWqPPPOYvJk/7EggXz+eLeQ/jGqadz6GFHVLusNq3QYZmSbgL+OyJe/LDPdZeOras8LNPWZa0+LLPE54ARkmaQzYcvICLCtzg0M2tlRQf+0IKPb2ZmZSr0om1EzAQ+DuydP36/6DbNzGzNCg1fSRcD5wMX5qs2AO4ssk0zM1uzos+2DwEOIh+KGRGzgC4Ft2lmZmtQdOAvi2wYUADks2SamVkVFB3490m6Eegm6QTgcXynKzOzqij6JuZXSfoi2a0NBwKjI+KxIts0M7M1K3pYJnnAPyapF1DX0v5mZlaMQrp0JO0maYKkX0oaLOkl4CVgjiRPtWBmVgVFneFfC3wL2JhsXvyhEfFHSYOAe4DxBbVrZmZNKOqibW1EPBoR9wOzI+KPABExraD2zMysBUUFfuntlBY32uZJ0czMqqCoLp0dJb1DNllax/wx+XKHgto0M7NmFHWLw5oijmtmZmvPE5mZmSXCgW9mlggHvplZIhz4ZmaJcOCbmSXCgW9mlggHvplZIhz4ZmaJcOCbmSXCgW9mlggHvplZIhz4ZmaJcOCbmSXCgW9mlggHvplZIhz4ZmaJcOCbmSXCgW9mlogmb3EoaefmnhgRUypfjpmZFaW5e9r+sJltAexd4VrMzKxATQZ+ROzVmoWYmVmxmjvDX0XSDsB2QIeGdRFxe1FFmZlZ5bUY+JIuBvYkC/xxwFDgGcCBb2a2HilnlM7hwBeA2RFxHLAj0L7QqszMrOLKCfzFEbESqJfUFXgLGFBsWWZmVmnl9OFPltQNuBl4DngP+FOhVZmZWcW1GPgRcUr+8AZJ44GuEfFCsWWZmVmllXPRdsia1kXEU8WUZGZmRSinS+fckscdgM+Qde34i1dmZuuRcrp0DixdlvRx4MrCKjIzs0KszeRprwM7VLoQMzMrVjl9+P9NNncOZL8gdgKmFlmUmZlVXlnDMkse1wP3RMTEguoxM7OClBP43SLi6tIVks5ovM7MzNZtiojmd5CmRMTOjdb9OSIGF1nYrAXLmi/MrEqW1q+sdglmTerfq4Oa2tbcDVCGA0cD/SWNLdnUBairXHlmZtYamuvS+QPwJtCL1W+G8i7gb9qama1nmrsBykxgpqRjgFkRsQRAUkfgY8A/WqVCMzOriHLG4d8HlHZargDuL6YcMzMrSjmBXxsRyxoW8scbFleSmZkVoZzAnyvpoIYFSV8G5hVXkpmZFaGccfgnA3dJujZffh04triSzMysCOVMnvYqsJukzmTj9t8tviwzM6u0Frt0JF0uqVtEvBcR70rqLuk7rVGcmZlVTjl9+EMjYkHDQkTMB/YvriQzMytCOYFfI6l9w0I+Dr99M/ubmdk6qJyLtncCT0i6NV8+DrituJLMzKwI5Vy0vVLSC8A+gIDxQL+iCzMzs8oq945Xs8m+bXsY8AXglcIqMjOzQjQ3W+a2wFHAcLLZMe8lG5a5VyvVZmZmFdRcl8404GngwIj4O4CkM1ulKjMzq7jmunQOI+vKeVLSzZK+QNaHb2Zm66EmAz8iHoqIYcAgYAJwJtBH0vWS9m2l+szMrEJavGgbEYsi4q6IOIBsHvzngQsKr8zMzCqqxXvaVovvaWvrKt/T1tZlzd3TttxhmWZmtp5z4JuZJcKBb2aWCAe+mVkiHPhmZolw4JuZJcKBb2aWCAe+mVkiHPhmZolw4JuZJcKBb2aWCAe+mVkiHPhmZolw4JuZJcKBb2aWCAe+mVkiHPhmZolw4JuZJcKBb2aWCAe+mVkiCgt8SSPXsO57RbVnZmbNqy3w2IdLWhIRdwFIug5oX2B7ZmbWjCID/1BgrKSVwFDg7Yg4pcD2zMysGRUPfEk9ShaPB34FTAQuk9QjIt6udJtmZtYyRURlDyjNAAJQyZ8NIiIGlHOcWQuWVbYwswpZWr+y2iWYNal/rw5qalvFz/Ajon+lj2lmZh9dkaN0TpXUrWS5uyT34ZuZVUmR4/BPiIgFDQsRMR84ocD2zMysGUUGfjtJq/qSJNUAGxbYnpmZNaPIYZm/A+6TdAPZxduTgfEFtpes74/5Nn+c+BTduvfg1nseAuDnN1/HI79+kI27dQfg+G98k90+O4RXXn6RH15xKQARwYgTTmGPPb9QtdqtbZs7ZzY/GDOK+W/XIYn9v3w4Bx95DO++s5DLv30ec2bPos+mm/OtMT+gS9euLF++nGuuvIzp0/6C2rXj5DPOY8edP13tl9FmVHyUzqoDS+2Ak4AvkI3UeRS4JSJWlPN8j9Ip39Q/T6Zjx05ccemo1QK/Y8dODPvKiNX2XbJkMRvUbkBNbS118+Zy/FcO54HfPEFNbZG/+9sWj9IpX928ubxdN49tBn6C9xct4vSRRzH6ip/w2LixdOnalWFfHcm9d/yM9959h5GnnMnYB3/B9Gkvc/aoMSyYX8dFZ5/KNbfcTbt2ngWmXM2N0insbzEiVkbE9RFxeEQcFhE3lhv29uHsOHhXunbduKx9O3TouCrcly1bSpPvDLMK6NmrN9sM/AQAnTbaiI/3G0Dd3Ld49ukn2WfoQQDsM/Qg/vDUkwD88x+vsdOu/w+Abt170rlzF6ZPe7k6xbdBRY7S2UbSA5L+Ium1hp+i2rMPeuiBexh5zKF8f8y3efedhavW/+WlFxhx1MF8/ehDOfOC0T67t1Yx+803eHX6NAZu/0kWzH+bnr16A9kvhYULsu9jDth6W559egIr6uuZPet1pv/1FebOmVPNstuUIj8n3QpcD9QDewG3A3c09wRJJ0qaLGnynT+/pcDS2r6DDj2Sux4cx813PEDPXr257uqrVm3bbodP8fNf/Iobbv0Fd992C8uWLq1ipZaCxe+/z3dGnc1J3zyXjTbq3OR+X/rPg+nduw+njzyaG67+AdvtsCM1tTWtWGnbVuSpXceIeEKSImImcImkp4GLm3pCRNwE3ATuw/+oevTsterxAV8+jAvPPu0D+/TrP4AOHToy47W/M/AT27dmeZaQ+vrljBl1Fnvtuz+f23MfALp170HdvLn07NWbunlz2bhbNiNLTW0tJ51x7qrnnnnSsWz+sb5VqbstKvIMf0l+4Xa6pNMkHQJsUmB7VqJu3txVj5/+/RP0H7A1AG/Oep0V9fUAzH5zFv/65z/YdLPNq1KjtX0RwY+vuIS+/QZw2FHHrlq/2+f25PHfjgXg8d+O5T/22AvIBhUsWfw+AFP+9Cw1NTX0679V6xfeRhU5SufTwCtAN2AM0BW4MiL+r5zn+wy/fGMuOo/np0xi4YIFdO/RgxEnnsrU5ybx9+nTkMSmm23BWReMpmev3jw67mHuvv1n1NbW0q5dO44deRKf+7yHZX4YHqVTvpemTuGcU45jy622oZ2y88sRJ53OoO0/yeXfPpe35sxmkz6bMuo7V9Gl68bMfvMNRp35Ddq1a0fP3ptw5oWX0GdTn5B8GM2N0iky8I+IiPtbWtcUB76tqxz4ti6ryrBM4MIy15mZWSsoYj78ocD+wBaSrinZ1JVsxI6ZmVVBEaN0ZgGTgYOA50rWvwucWUB7ZmZWhiLmw58KTJXUJyJuK90m6Qzg6kq3aWZmLSuyD/+oNawbUWB7ZmbWjCL68IcDRwP9JY0t2dQVmFfp9szMrDxF9OH/AXgT6AX8sGR9AMMKaM/MzMpQRB/+TGAm8B+SdiI72z8SmAE8WOn2zMysPEV06WxL1n8/HKgD7iX7gtdelW7LzMzKV0SXzjTgaeDAiPg7gCQPxzQzq7IiRukcBswGnpR0s6SGO16ZmVkVVTzwI+KhiBgGDAImkH3Zqo+k6yXtW+n2zMysPEXe4nBRRNwVEQcAHwOeBy4oqj0zM2teYbNlflSeLdPWVZ4t09Zl1Zot08zM1iEOfDOzRDjwzcwS4cA3M0uEA9/MLBEOfDOzRDjwzcwS4cA3M0uEA9/MLBEOfDOzRDjwzcwS4cA3M0uEA9/MLBEOfDOzRDjwzcwS4cA3M0uEA9/MLBEOfDOzRDjwzcwS4cA3M0uEA9/MLBEOfDOzRDjwzcwS4cA3M0uEA9/MLBEOfDOzRDjwzcwS4cA3M0uEA9/MLBEOfDOzRDjwzcwS4cA3M0uEA9/MLBEOfDOzRDjwzcwS4cA3M0uEIqLaNVgrkHRiRNxU7TrMGvN7s/X4DD8dJ1a7ALMm+L3ZShz4ZmaJcOCbmSXCgZ8O95HausrvzVbii7ZmZonwGb6ZWSIc+GZmiXDgrwckrZD0vKSpkqZI2v0jHOsySftUsj4zAEmHSApJg/LlLSUdXbJ9J0n7f4Tj/0NSr0rUmioH/vphcUTsFBE7AhcCV6ztgSJidEQ8XrnSzFYZDjwDHJUvbwkcXbJ9J2CtA98+Ogf++qcrML9hQdK5kiZJekHSpfm6LSW9IulmSS9LelRSx3zbzyUdnj/eX9I0Sc9IukbSb/L1l0j6H0kTJL0m6ZtVeJ22HpHUGfgsMJJ/B/73gD3yT6fnA5cBw/LlYZI+I+kPkv6c/zkwP1aNpKskvZi/r09v1FZHSeMlndCKL7FNqK12AVaWjpKeBzoAmwF7A0jaF9gG+AwgYKykIcA/8/XDI+IESfcBhwF3NhxQUgfgRmBIRMyQdE+jNgcBewFdgL9Kuj4ilhf5Im29djAwPiL+JultSTsDFwDnRMQBAJLmALtGxGn5cley91993s14Odn79ESgPzA439ajpJ3OwC+A2yPi9lZ7dW2Ez/DXDw1dOoOA/YDbJQnYN//5MzCFLKS3yZ8zIyKezx8/R/bxutQg4LWImJEvNw78RyJiaUTMA94C+lTyBVmbM5wsiMn/HF7GczYG7pf0EvBjYPt8/T7ADRFRDxARb5c859fArQ77teMz/PVMRDybX7jqTXZWf0VE3Fi6j6QtgaUlq1YAHRsdSi001fj5fq/YGknqSfapcwdJAdQAAYxr4aljgCcj4pD8PTuh4ZD589dkIjBU0t3hLxF9aD7DX8/kIyBqgDrgd8DX8/5TJG0haZMyDzUNGJD/RwMYVuFSLR2Hk3Wx9IuILSPi48AMYCVZl2CDdxstbwy8kT8eUbL+UeBkSbUAjbp0RpO996+r6CtIhAN//dAxv9D1PHAv8LWIWBERjwJ3A89KehF4gNX/QzUpIhYDpwDjJT0DzAEWFlO+tXHDgYcarXuQ7OJtfT6c+EzgSWC7hou2wJXAFZImkp3ENLiF7DrUC5KmsvpIH4D/AjpIurKA19KmeWqFhEnqHBHv5dcDfgpMj4gfV7suMyuGz/DTdkL+qeFlso/XN7awv5mtx3yGb2aWCJ/hm5klwoFvZpYIB76ZWSIc+NZmlcwy+pKk+yV1+gjH2rNkrqGDJF3QzL7dJJ2yFm1cIumcta3RrCUOfGvLGqak2AFYBpxculGZD/1/ICLGRsT3mtmlG9l3HMzWKQ58S8XTwNYlM4leRzb/0Mcl7Svp2fxeA/eXfHN5v4bZRIFDGw4kaYSka/PHfSQ9lH+5aKqyexV8D9gq/3Txg3y/D8xqmq8fJemvkh4HBrba34YlyYFvbV7+Ff2hwIv5qoFkUwEMBhYBFwH7RMTOwGTgrHw20ZuBA4E9gE2bOPw1wO/zexXsTPadhguAV/NPF+c2mtV0J2AXSUMk7UL2bdTBZL9QPl3hl262Gk+IZW1Zw7TSkJ3h/wzYHJgZEX/M1+8GbAdMzL5wzIbAs2Szic6IiOkAku4km7a3sb2BYwEiYgWwUFL3RvuUzmoK2RS/25BNg/FQRLyftzH2I71asxY48K0tWxwRO5WuyEN9Uekq4LGIGN5ov51oesbGD6upWU3/q4JtmLXIXTqWuj8Cn5W0NYCkTpK2JZtNtL+krfL9mprf/QngG/lza/KbejSeFbKpWU2fAg7J7+DUhaz7yKwwDnxLWkTMJZua9x5JL5D9AhgUEUvIunAeyS/azmziEGcAe+WzlT4HbB8RdWRdRC9J+kFTs5pGxBSy2U+fJ5td8unCXqgZnkvHzCwZPsM3M0uEA9/MLBEOfDOzRDjwzcwS4cA3M0uEA9/MLBEOfDOzRPx/QQOreDywqpcAAAAASUVORK5CYII=",
+            "text/plain": [
+              "<Figure size 432x288 with 1 Axes>"
+            ]
+          }
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "-H7rWFguYTNZ"
+      },
+      "source": [
+        "Now we can calculate overall accuracy and per class accuracy."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "uHTInLt2YTNZ",
+        "outputId": "e48f2726-5eba-4d17-ffe7-7b17c62d5ee8"
+      },
+      "source": [
+        "# Calculate accuracy\n",
+        "acc = accuracy_score(y_true, y_pred, normalize=True, sample_weight=None)\n",
+        "precision = precision_score(y_true, y_pred)\n",
+        "recall = recall_score(y_true, y_pred)\n",
+        "\n",
+        "print(f\"Accuracy: {acc:.3f}\")\n",
+        "print(f\"Precision: {precision:.3f}\")\n",
+        "print(f\"Recall: {recall:.3f}\")"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Accuracy: 0.923\n",
+            "Precision: 0.995\n",
+            "Recall: 0.577\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 112
+        },
+        "id": "ZNzyqAH9YTNZ",
+        "outputId": "7d2e35a1-df80-47bc-f5e7-fd425e79a7f4"
+      },
+      "source": [
+        "# Calculate per class accuracy\n",
+        "cmd = confusion_matrix(y_true, y_pred, normalize=\"true\").diagonal()\n",
+        "per_class_accuracy_df = pd.DataFrame(\n",
+        "    [(index, round(value, 4)) for index, value in zip([\"Benign\", \"Attack\"], cmd)],\n",
+        "    columns=[\"type\", \"accuracy\"],\n",
+        ")\n",
+        "per_class_accuracy_df = per_class_accuracy_df.round(2)\n",
+        "display(per_class_accuracy_df)"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>type</th>\n",
+              "      <th>accuracy</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>Benign</td>\n",
+              "      <td>1.00</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>Attack</td>\n",
+              "      <td>0.58</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>"
+            ],
+            "text/plain": [
+              "     type  accuracy\n",
+              "0  Benign      1.00\n",
+              "1  Attack      0.58"
+            ]
+          }
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Gfy_LW5zXIj6"
+      },
+      "source": [
+        "We got great results using Pinecone! Let's see what happens if we skip the similarity search step and predict values from the model directly. In other words, let's use the model that created the embeddings as a classifier. It would be interesting to compare its and the similarity search approach accuracy. "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "Oxya9-mMYh5A"
+      },
+      "source": [
+        "import numpy as np\n",
+        "from tensorflow.keras.utils import normalize\n",
+        "\n",
+        "data_sample = normalize(data_22_cleaned.iloc[:, :-1])[-2000:]\n",
+        "y_pred_model = model.predict(normalize(data_sample)).flatten()\n",
+        "y_pred_model = np.round(y_pred_model)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 313
+        },
+        "id": "GWssFePDXEks",
+        "outputId": "9eee2c60-f1c1-4a34-c682-665389fe3aee"
+      },
+      "source": [
+        "# Create confusion matrix\n",
+        "conf_matrix = confusion_matrix(y_true, y_pred_model)\n",
+        "\n",
+        "# Show confusion matrix\n",
+        "ax = plt.subplot()\n",
+        "sns.heatmap(conf_matrix, annot=True, ax=ax, cmap=\"Blues\", fmt=\"g\", cbar=False)\n",
+        "\n",
+        "# Add labels, title and ticks\n",
+        "ax.set_xlabel(\"Predicted\")\n",
+        "ax.set_ylabel(\"Acctual\")\n",
+        "ax.set_title(\"Confusion Matrix\")\n",
+        "ax.xaxis.set_ticklabels([\"Benign\", \"Attack\"])\n",
+        "ax.yaxis.set_ticklabels([\"Benign\", \"Attack\"])"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "[Text(0, 0.5, 'Benign'), Text(0, 1.5, 'Attack')]"
+            ]
+          }
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAEWCAYAAABliCz2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAbqElEQVR4nO3dd5xV9Z3G8c8DI02kI1ixy0Z3xRLjakBRg2JXVAQrKqyxxMTgWhcs0aBJTIwaxbI2jFGz9oKokYglu6JR7A2ChSJNlCbM8N0/zhlyGZmZK94zl5nzvF+vec099fe9w/DMub9zzu8oIjAzs6avWbkLMDOzhuHANzPLCQe+mVlOOPDNzHLCgW9mlhMOfDOznHDgW5MhqbWkRyTNl3Tfd9jP0ZLGlbK2cpD0hKTjy12HrTkc+NbgJA2WNFHSAknT02D6YQl2fTjQDegcEUes7k4i4q6I6FeCelYiaQ9JIen+GvO3S+ePL3I/F0kaU996EdE/Im5fzXKtCXLgW4OSdBbwO+ByknDeGPgDcHAJdt8DeD8iKkuwr6zMAnaV1Llg3vHA+6VqQAn/37Zv8C+FNRhJ7YFLgNMi4v6IWBgRyyLikYg4O12npaTfSZqWfv1OUst02R6SPpX0c0mfp58OhqTLLgZGAAPTTw4n1TwSlrRJeiRdkU6fIGmypK8kTZF0dMH85wu221XSy2lX0cuSdi1YNl7SpZJeSPczTlKXOn4MS4EHgaPS7ZsDRwJ31fhZXS3pE0lfSnpFUu90/r7A+QXv8/WCOi6T9AKwCNgsnXdyuvx6SX8u2P8Vkp6RpKL/Aa3Rc+BbQ/p3oBXwQB3rXADsAvQCtgN2Bi4sWN4daA9sAJwEXCepY0SMJPnUcE9EtI2IW+oqRNLawO+B/hGxDrAr8Noq1usEPJau2xm4CnisxhH6YGAIsC7QAhheV9vAHcBx6et9gLeAaTXWeZnkZ9AJ+CNwn6RWETG2xvvcrmCbY4FhwDrA1Br7+znwb+kfs94kP7vjw2Or5IoD3xpSZ2B2PV0uRwOXRMTnETELuJgkyKotS5cvi4jHgQXA1qtZz3JgW0mtI2J6RLy1inX2Bz6IiDsjojIi7gbeBQ4sWOfWiHg/IhYD95IEda0i4kWgk6StSYL/jlWsMyYi5qRt/gZoSf3v87aIeCvdZlmN/S0CjiH5gzUGOCMiPq1nf9bEOPCtIc0BulR3qdRifVY+Op2azluxjxp/MBYBbb9tIRGxEBgInAJMl/SYpJ5F1FNd0wYF0zNWo547gdOBvqziE0/abfVO2o30Bcmnmrq6igA+qWthRPwfMBkQyR8myxkHvjWkl4AlwCF1rDON5ORrtY35ZndHsRYCbQqmuxcujIgnI+JHwHokR+03FVFPdU2frWZN1e4ETgUeT4++V0i7XM4h6dvvGBEdgPkkQQ1QWzdMnd0zkk4j+aQwDfjP1S/dGisHvjWYiJhPcmL1OkmHSGojaS1J/SVdma52N3ChpK7pyc8RJF0Qq+M1oI+kjdMTxudVL5DUTdJBaV/+1yRdQ1Wr2MfjwFbppaQVkgYC3wMeXc2aAIiIKcDuJOcsaloHqCS5oqdC0gigXcHymcAm3+ZKHElbAb8g6dY5FvhPSXV2PVnT48C3BhURVwFnkZyInUXSDXE6yZUrkITSRGAS8Abwajpvddp6Crgn3dcrrBzSzUhOZE4D5pKE76mr2Mcc4IB03TkkR8YHRMTs1ampxr6fj4hVfXp5EniC5FLNqSSfigq7a6pvKpsj6dX62km70MYAV0TE6xHxAcmVPndWXwFl+SCfpDczywcf4ZuZ5YQD38wsJxz4ZmY54cA3M8uJum6AKavW25/us8m2Rpr38rXlLsGsVq0qqHV8JB/hm5nlhAPfzCwnHPhmZjnhwDczywkHvplZTjjwzcxywoFvZpYTDnwzs5xw4JuZ5YQD38wsJxz4ZmY54cA3M8sJB76ZWU448M3McsKBb2aWEw58M7OccOCbmeWEA9/MLCcc+GZmOeHANzPLCQe+mVlOOPDNzHLCgW9mlhMOfDOznHDgm5nlhAPfzCwnHPhmZjnhwDczywkHvplZTjjwzcxywoFvZpYTDnwzs5xw4JuZ5YQD38wsJxz4ZmY54cA3M8sJB76ZWU448M3McsKBb2aWExVZNyCpOdCtsK2I+Djrds3MbGWZBr6kM4CRwExgeTo7gH/Lsl0zM/umrI/wzwS2jog5GbdjZmb1yLoP/xNgfsZtmJlZEbI+wp8MjJf0GPB19cyIuCrjds3MrIasA//j9KtF+mVmZmWSaeBHxMVZ7t/MzIqX9VU6j5BclVNoPjARGB0RS7Js38zM/inrk7aTgQXATenXlySXaG6VTpuZWQPJug9/+4joUzD9iKTnIqKPpLcybtvMzApkfYTfVdLG1RPp6y7p5NKM2zYzswJZH+H/HHhe0keAgE2BUyWtDdyecdtN2g0jj6Z/n22ZNfcrdjri8hXzf3zU7pwysA+VVcsZO+FNLrj6IXbapgfX/tcgACS47IbHefjZSQAcue+OnH3iPkQE02fN58QLb2fOFwvL8p4sf16Y8BxXjLqM5VXLOXTAEZw0dFi5S2rSFFHznGqJG5BaAj1JAv/dYk/Utt7+9GwLa+R222FzFi76mpsvPW5F4PfZaUvOOXkfDj3jBpYuq6Rrx7bMmreA1q3WYumyKqqqltO9Szv+957z2KzfBQBMHncZOwz4BXO+WMhlZx7MoiXLuGz04+V8a2u8eS9fW+4SmoSqqioO2n8fRt90K926dWPwwMMZ9aur2HyLLcpdWqPWqgLVtiyTLh1Je6bfDwP2BzYHNgP2S+fZd/TCqx8xd/6ileYNO6I3v771KZYuqwRg1rwFACxesoyqqmQoo5Yt1qL6j7yUfK3dOrlFYp22rZk+yzdGW8N4841JbLRRDzbcaCPWatGCfffbn/HPPlPuspq0rLp0dgf+Ahy4imUB3J9Ru7m2RY912W37zbn4tANZsnQZ5131AK+8nQxM+v1te3DDRcew8XqdOOnC21f8ATjz8nt4+d7zWbh4KR99Mouf/vKecr4Fy5HPZ86k+3rdV0yv260bb0yaVMaKmr5MjvAjYmT6fcgqvk6sbTtJwyRNlDSxcrYv4vm2Kpo3o2O7NvQ57tec/9sHGXPlP3/UL785lR0Pv4wfHnMlZ5/Yj5YtKqioaMbQw3uzy6Ar2KzfBbz5/mecfWK/Mr4Dy5P4xi06INXaG2ElkPWNVy2BAcAmrDwe/iWrWj8ibgRuBPfhr47PZn7Bg8+8DsDEt6ayfHnQpWNbZqddOwDvTZnJwsVL2WaL9Vd09E35dDYAf37qVYYPceBbw+jWrTszps9YMf35zJmsu+66Zayo6cv6ssyHgIOBSmBhwZdl4JHxk9hj560A2GLjdWmxVgWz5y2gx/qdad48+afeeL2ObLVJN6ZOm8O0WfPpuVl3unRsC8Beu/TkvSkzat2/WSlts+2/8vHH/+DTTz9h2dKljH38MXbvu2e5y2rSsr4sc8OI2DfjNnLp9l+eQO8dt6RLh7Z8OPZSLr3hcW5/8CVGX3Q0E+87n6XLqjh5xJ0A7Lr9Zgwf0o9llVUsXx6cefk9Ky69vPzGJ3jq5p+yrLKKj6fPZdjIMeV8W5YjFRUVnHfBCH487GSWL6/ikEMHsMUWW5a7rCYt08syJd0IXBMRb3zbbd2lY2sqX5Zpa7K6LsvM+gj/h8AJkqaQjIcvICLCjzg0M2tgWQd+/4z3b2ZmRcr0pG1ETAU2AvZMXy/Kuk0zM1u1TMNX0kjgHOC8dNZagM8KmpmVQdZH24cCB5FeihkR04B1Mm7TzMxWIevAXxrJZUABkI6SaWZmZZB14N8raTTQQdJQ4Gn8pCszs7LI+iHmv5b0I5JHG24NjIiIp7Js08zMVi3ryzJJA/4pSV2AOVm3Z2Zmq5bVePi7SBov6X5J20t6E3gTmCnJQy2YmZVBVkf41wLnA+1JxsXvHxF/k9QTuBsYm1G7ZmZWi6xO2lZExLiIuA+YERF/A4iIdzNqz8zM6pFV4C8veL24xjIPimZmVgZZdelsJ+lLksHSWqevSadbZdSmmZnVIZPAj4jmWezXzMxWnwcyMzPLCQe+mVlOOPDNzHLCgW9mlhMOfDOznHDgm5nlhAPfzCwnHPhmZjnhwDczywkHvplZTjjwzcxywoFvZpYTDnwzs5xw4JuZ5YQD38wsJxz4ZmY54cA3M8sJB76ZWU7U+ohDSTvUtWFEvFr6cszMLCt1PdP2N3UsC2DPEtdiZmYZqjXwI6JvQxZiZmbZqusIfwVJ2wLfA1pVz4uIO7IqyszMSq/ewJc0EtiDJPAfB/oDzwMOfDOzRqSYq3QOB/YCZkTEEGA7oGWmVZmZWckVE/iLI2I5UCmpHfA5sFm2ZZmZWakV04c/UVIH4CbgFWAB8H+ZVmVmZiVXb+BHxKnpyxskjQXaRcSkbMsyM7NSK+akbZ9VzYuI57IpyczMslBMl87ZBa9bATuTdO34xiszs0akmC6dAwunJW0EXJlZRWZmlonVGTztU2DbUhdiZmbZKqYP/xqSsXMg+QPRC3g9y6LMzKz0iross+B1JXB3RLyQUT1mZpaRYgK/Q0RcXThD0pk155mZ2ZpNEVH3CtKrEbFDjXl/j4jtsyzsvRmL6i7MrEw6tW1R7hLMatW1bYVqW1bXA1AGAYOBTSU9XLBoHWBO6cozM7OGUFeXzovAdKALKz8M5SvAd9qamTUydT0AZSowVdLRwLSIWAIgqTWwIfCPBqnQzMxKopjr8O8FlhdMVwH3ZVOOmZllpZjAr4iIpdUT6WuftTIza2SKCfxZkg6qnpB0MDA7u5LMzCwLxVyHfwpwl6Rr0+lPgeOyK8nMzLJQzOBpHwG7SGpLct3+V9mXZWZmpVZvl46kyyV1iIgFEfGVpI6SftEQxZmZWekU04ffPyK+qJ6IiHnAftmVZGZmWSgm8JtLalk9kV6H37KO9c3MbA1UzEnbMcAzkm5Np4cAt2dXkpmZZaGYk7ZXSpoE7A0IGAv0yLowMzMrrWKfeDWD5G7bAcBewDuZVWRmZpmoa7TMrYCjgEEko2PeQ3JZZt8Gqs3MzEqori6dd4EJwIER8SGApJ81SFVmZlZydXXpDCDpynlW0k2S9iLpwzczs0ao1sCPiAciYiDQExgP/AzoJul6Sf0aqD4zMyuRek/aRsTCiLgrIg4gGQf/NeDczCszM7OSqveZtuXiZ9ramsrPtLU1WV3PtC32skwzM2vkHPhmZjnhwDczywkHvplZTjjwzcxywoFvZpYTDnwzs5xw4JuZ5YQD38wsJxz4ZmY54cA3M8sJB76ZWU448M3McsKBb2aWEw58M7OccOCbmeWEA9/MLCcc+GZmOeHANzPLCQe+mVlOZBb4kk5axbxRWbVnZmZ1q8hw34dLWhIRdwFI+gPQMsP2zMysDlkG/mHAw5KWA/2BuRFxaobtmZlZHUoe+JI6FUyeDDwIvABcIqlTRMwtdZtmZlY/RURpdyhNAQJQwfdqERGbFbOf92YsKm1hZiXSqW2LcpdgVquubStU27KSH+FHxKal3qeZmX13WV6lc5qkDgXTHSW5D9/MrEyyvA5/aER8UT0REfOAoRm2Z2Zmdcgy8JtJWtGXJKk54M5PM7MyyfKyzCeBeyXdQHLy9hRgbIbt5dasz2fwu8v+i3lz56BmYp8DB3DQ4YP54603MO7R+2nfoSMAxw49nZ126U1l5TKuufISJr//LlVVVfTdZ3+OOOYb98mZlcTlF1/IixP+SsdOnbjz3ocA+HL+F4w4bzgzpn1G9/U34JJRv6Fdu/YrtpkxfRrHHnEQQ4adxuDjhpSr9CYny8A/B/gP4MckV+qMA27OsL3cat68OSeedhabb/UvLFq0kLOGDqbXTj8A4OAjjuHQo45baf0Xnn2aymVLuea2+/h6yWJOO34AffbqT7f11i9H+dbE7XfgIQw4cjC/GHneinljbruZHb//A44dMpQ7b72JMbfdzKk/+fmK5ddcdQU/2LV3Ocpt0jLr0omI5RFxfUQcHhEDImJ0RFRl1V6ederclc23+hcA2rRZmw17bMqcWbNq30CwZPESqior+frrr6moWIs2a6/dQNVa3vTaYSfatW+/0rwJf32W/gccAkD/Aw5hwvi/rFj23LPPsP4GG7Hp5ls0aJ15kOVVOltK+rOktyVNrv7Kqj1LzJw+jckfvMfW39sWgMce+BNnDDmSq0ddxIKvvgRgtz32plXrVhx/2I846cj+HDLwONZp176u3ZqV1Lw5c+jStSsAXbp2Zd7c5H7MxYsXcdfttzBk2I/LWV6TleVJ21uB64FKoC9wB3BnXRtIGiZpoqSJ99z53xmW1jQtXrSIUSOGc/IZw2mzdlv6H3wEo//4CFff8ic6de7CLdddBcD777xFs2bNue3+cdz0p8d46N47mTHt0zJXbwa33HAdRw4+jjZt/IkzC1n24beOiGckKSKmAhdJmgCMrG2DiLgRuBF8p+23VVm5jFEjhrP73v3Ztc9eAHTs1HnF8n4HHMal5/0EgOeefoIddt6Vioq16NCxEz237cWH775N9/U3LEvtlj8dO3dm9qxZdOnaldmzZtGxUzIiy9tvTmL8M+O4/ve/YcFXX6FmomXLFgwYeHSZK24asgz8JZKaAR9IOh34DFg3w/ZyKyK45oqL2bDHphwy8NgV8+fOmUWnzsnH5r9N+As9Nt0cgK7dujPp1ZfZo9/+fL1kCe+/PYmDjhhcltotn37Ypy9PPPogxw4ZyhOPPkjv3fsC8Idb/tkJcMvo62jduo3DvoRKPpbOih1L3wfeAToAlwLtgCsj4n+L2d5H+MV7e9LfOfeME+mx2ZY0a5bc+nDs0NN57uknmfLheyDRrft6nDr8Qjp17sriRYu4etRIPpk6GSLYq//BHDbo+DK/i8bDY+l8OyPPH85rE1/miy++oFPnzpz0H6fRe4+9GHHuWcycMZ1u3dfj0iuuol37DittVx34vizz26lrLJ0sA/+IiLivvnm1ceDbmsqBb2uyugI/y5O25xU5z8zMGkAW4+H3B/YDNpD0+4JF7Uiu2DEzszLI4qTtNGAicBDwSsH8r4CfZdCemZkVIYvx8F8HXpfULSJuL1wm6Uzg6lK3aWZm9cuyD/+oVcw7IcP2zMysDln04Q8CBgObSnq4YFE7YHap2zMzs+Jk0Yf/IjAd6AL8pmB+AAMzaM/MzIqQRR/+VGAq8O+SepEc7R8JTAH+p9TtmZlZcbLo0tmKpP9+EDAHuIfkBq++pW7LzMyKl0WXzrvABODAiPgQQJIvxzQzK7MsrtIZAMwAnpV0k6S9SJ54ZWZmZVTywI+IByJiINATGE9ys1U3SddL6lfq9szMrDhZPuJwYUTcFREHABsCrwHnZtWemZnVLbPRMr8rj5ZpayqPlmlrsnKNlmlmZmsQB76ZWU448M3McsKBb2aWEw58M7OccOCbmeWEA9/MLCcc+GZmOeHANzPLCQe+mVlOOPDNzHLCgW9mlhMOfDOznHDgm5nlhAPfzCwnHPhmZjnhwDczywkHvplZTjjwzcxywoFvZpYTDnwzs5xw4JuZ5YQD38wsJxz4ZmY54cA3M8sJB76ZWU448M3McsKBb2aWEw58M7OccOCbmeWEA9/MLCcc+GZmOeHANzPLCQe+mVlOOPDNzHLCgW9mlhOKiHLXYA1A0rCIuLHcdZjV5N/NhuMj/PwYVu4CzGrh380G4sA3M8sJB76ZWU448PPDfaS2pvLvZgPxSVszs5zwEb6ZWU448M3McsKB3whIqpL0mqTXJb0qadfvsK9LJO1dyvrMACQdKikk9UynN5E0uGB5L0n7fYf9/0NSl1LUmlcO/MZhcUT0iojtgPOAX67ujiJiREQ8XbrSzFYYBDwPHJVObwIMLljeC1jtwLfvzoHf+LQD5lVPSDpb0suSJkm6OJ23iaR3JN0k6S1J4yS1TpfdJunw9PV+kt6V9Lyk30t6NJ1/kaT/ljRe0mRJPynD+7RGRFJbYDfgJP4Z+KOA3umn03OAS4CB6fRASTtLelHS39PvW6f7ai7p15LeSH+vz6jRVmtJYyUNbcC32CRUlLsAK0prSa8BrYD1gD0BJPUDtgR2BgQ8LKkP8HE6f1BEDJV0LzAAGFO9Q0mtgNFAn4iYIunuGm32BPoC6wDvSbo+IpZl+SatUTsEGBsR70uaK2kH4FxgeEQcACBpJrBTRJyeTrcj+f2rTLsZLyf5PR0GbApsny7rVNBOW+BPwB0RcUeDvbsmwkf4jUN1l05PYF/gDkkC+qVffwdeJQnpLdNtpkTEa+nrV0g+XhfqCUyOiCnpdM3Afywivo6I2cDnQLdSviFrcgaRBDHp90FFbNMeuE/Sm8BvgW3S+XsDN0REJUBEzC3Y5iHgVof96vERfiMTES+lJ666khzV/zIiRheuI2kT4OuCWVVA6xq7Uj1N1dzevyu2SpI6k3zq3FZSAM2BAB6vZ9NLgWcj4tD0d3Z89S7T7VflBaC/pD+GbyL61nyE38ikV0A0B+YATwInpv2nSNpA0rpF7updYLP0PxrAwBKXavlxOEkXS4+I2CQiNgKmAMtJugSrfVVjuj3wWfr6hIL544BTJFUA1OjSGUHyu/+Hkr6DnHDgNw6t0xNdrwH3AMdHRFVEjAP+CLwk6Q3gz6z8H6pWEbEYOBUYK+l5YCYwP5vyrYkbBDxQY97/kJy8rUwvJ/4Z8CzwveqTtsCVwC8lvUByEFPtZpLzUJMkvc7KV/oA/BRoJenKDN5Lk+ahFXJMUtuIWJCeD7gO+CAiflvuuswsGz7Cz7eh6aeGt0g+Xo+uZ30za8R8hG9mlhM+wjczywkHvplZTjjwzcxywoFvTVbBKKNvSrpPUpvvsK89CsYaOkjSuXWs20HSqavRxkWShq9ujWb1ceBbU1Y9JMW2wFLglMKFSnzr/wMR8XBEjKpjlQ4k9ziYrVEc+JYXE4AtCkYS/QPJ+EMbSeon6aX0WQP3Fdy5vG/1aKLAYdU7knSCpGvT190kPZDeXPS6kmcVjAI2Tz9d/Cpd7xujmqbzL5D0nqSnga0b7KdhueTAtyYvvUW/P/BGOmtrkqEAtgcWAhcCe0fEDsBE4Kx0NNGbgAOB3kD3Wnb/e+Cv6bMKdiC5p+Fc4KP008XZNUY17QXsKKmPpB1J7kbdnuQPyvdL/NbNVuIBsawpqx5WGpIj/FuA9YGpEfG3dP4uwPeAF5IbjmkBvEQymuiUiPgAQNIYkmF7a9oTOA4gIqqA+ZI61lincFRTSIb43ZJkGIwHImJR2sbD3+ndmtXDgW9N2eKI6FU4Iw31hYWzgKciYlCN9XpR+4iN31Zto5r+tIRtmNXLXTqWd38DdpO0BYCkNpK2IhlNdFNJm6fr1Ta++zPAj9Ntm6cP9ag5KmRto5o+BxyaPsFpHZLuI7PMOPAt1yJiFsnQvHdLmkTyB6BnRCwh6cJ5LD1pO7WWXZwJ9E1HK30F2CYi5pB0Eb0p6Ve1jWoaEa+SjH76GsnokhMye6NmeCwdM7Pc8BG+mVlOOPDNzHLCgW9mlhMOfDOznHDgm5nlhAPfzCwnHPhmZjnx/zT+z1Wh63oMAAAAAElFTkSuQmCC",
+            "text/plain": [
+              "<Figure size 432x288 with 1 Axes>"
+            ]
+          }
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "VJRwvXOvYtBL",
+        "outputId": "9d45426b-6bd8-4374-dcf8-9ffecedd7176"
+      },
+      "source": [
+        "# Calculate accuracy\n",
+        "acc = accuracy_score(y_true, y_pred_model, normalize=True, sample_weight=None)\n",
+        "precision = precision_score(y_true, y_pred_model)\n",
+        "recall = recall_score(y_true, y_pred_model)\n",
+        "\n",
+        "print(f\"Accuracy: {acc:.3f}\")\n",
+        "print(f\"Precision: {precision:.3f}\")\n",
+        "print(f\"Recall: {recall:.3f}\")"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Accuracy: 0.871\n",
+            "Precision: 1.000\n",
+            "Recall: 0.287\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 112
+        },
+        "id": "TFbfVi_-W6GT",
+        "outputId": "68facf43-ac41-44de-d59a-a489627cc893"
+      },
+      "source": [
+        "# Calculate per class accuracy\n",
+        "cmd = confusion_matrix(y_true, y_pred_model, normalize=\"true\").diagonal()\n",
+        "per_class_accuracy_df = pd.DataFrame(\n",
+        "    [(index, round(value, 4)) for index, value in zip([\"Benign\", \"Attack\"], cmd)],\n",
+        "    columns=[\"type\", \"accuracy\"],\n",
+        ")\n",
+        "per_class_accuracy_df = per_class_accuracy_df.round(2)\n",
+        "display(per_class_accuracy_df)"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>type</th>\n",
+              "      <th>accuracy</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>Benign</td>\n",
+              "      <td>1.00</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>Attack</td>\n",
+              "      <td>0.29</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>"
+            ],
+            "text/plain": [
+              "     type  accuracy\n",
+              "0  Benign      1.00\n",
+              "1  Attack      0.29"
+            ]
+          }
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "_E5rnFehXSls"
+      },
+      "source": [
+        "As we can see, the direct application of our model produced much worse results. Pinecone's similarity search over the same model's embeddings improved our threat detection (i.e., \"Attack\") accuracy by over 50%!"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "OiT0JgQxktOC"
+      },
+      "source": [
+        "### Result summary"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "UBBv6-dnfEeX"
+      },
+      "source": [
+        "Using standard vector embeddings with Pinecone's similarity search service, we detected 85% of the attacks while keeping a low 3% false-positive rate. We also showed that our similarity search approach outperforms the direct classification approach that utilizes the classifier's embedding model. Similarity search-based detection gained 50% higher accuracy compared to the direct detector.\n",
+        "\n",
+        "[Original published results](https://github.com/rambasnet/DeepLearning-IDS/blob/master/graphics/confusion_matrices/) for 02-22-2018 show that the model was able to correctly detect 208520 benign cases out of 208520 benign cases, and 24 (18+1+5) attacks out of 70 attacks in the test set making this model **34.3% accurate in predicting attacks**. For testing purposes, 20% of the data for 02-22-2018 was used. \n",
+        "\n",
+        "![02-22-2018--6-15%281%29.png](https://raw.githubusercontent.com/rambasnet/DeepLearning-IDS/master/graphics/confusion_matrices/02-22-2018--6-15(1).png)\n",
+        "\n",
+        "As you can see, the model's performance for creating embeddings for Pinecone was much higher. \n",
+        "\n",
+        "The model we have created follows the academic paper ([model for the same date](https://github.com/rambasnet/DeepLearning-IDS/blob/master/keras_tensorflow_models/) (02-23-2018)) and is slightly modified, but still a straightforward, sequential, shallow model. We have changed the number of classes from four (Benign, BruteForce-Web, BruteForce-XSS, SQL-Injection) to two (Benign and Attack), only interested in whether we are detecting an attack or not. We have also changed validation metrics to precision and recall. These changes improved our results. Yet, there is still room for further improvements, for example, by adding more data covering multiple days and different types of attacks."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "JdZcEl1pfEeX"
+      },
+      "source": [
+        "## Delete the Index"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "hyxLeOnSfEeX"
+      },
+      "source": [
+        "Delete the index once you are sure that you do not want to use it anymore. Once it is deleted, you cannot reuse it."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "ywvaJrVkfEeX"
+      },
+      "source": [
+        "pc.delete_index(index_name)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Hu2mIbHms3k7"
+      },
+      "source": [
+        "---"
       ]
-     },
-     "output_type": "display_data",
-     "metadata": {}
     }
-   ],
-   "source": [
-    "# Calculate per class accuracy\n",
-    "cmd = confusion_matrix(y_true, y_pred_model, normalize=\"true\").diagonal()\n",
-    "per_class_accuracy_df = pd.DataFrame(\n",
-    "    [(index, round(value, 4)) for index, value in zip([\"Benign\", \"Attack\"], cmd)],\n",
-    "    columns=[\"type\", \"accuracy\"],\n",
-    ")\n",
-    "per_class_accuracy_df = per_class_accuracy_df.round(2)\n",
-    "display(per_class_accuracy_df)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "_E5rnFehXSls"
-   },
-   "source": [
-    "As we can see, the direct application of our model produced much worse results. Pinecone's similarity search over the same model's embeddings improved our threat detection (i.e., \"Attack\") accuracy by over 50%!"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "OiT0JgQxktOC"
-   },
-   "source": [
-    "### Result summary"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "UBBv6-dnfEeX"
-   },
-   "source": [
-    "Using standard vector embeddings with Pinecone's similarity search service, we detected 85% of the attacks while keeping a low 3% false-positive rate. We also showed that our similarity search approach outperforms the direct classification approach that utilizes the classifier's embedding model. Similarity search-based detection gained 50% higher accuracy compared to the direct detector.\n",
-    "\n",
-    "[Original published results](https://github.com/rambasnet/DeepLearning-IDS/blob/master/graphics/confusion_matrices/) for 02-22-2018 show that the model was able to correctly detect 208520 benign cases out of 208520 benign cases, and 24 (18+1+5) attacks out of 70 attacks in the test set making this model **34.3% accurate in predicting attacks**. For testing purposes, 20% of the data for 02-22-2018 was used. \n",
-    "\n",
-    "![02-22-2018--6-15%281%29.png](https://raw.githubusercontent.com/rambasnet/DeepLearning-IDS/master/graphics/confusion_matrices/02-22-2018--6-15(1).png)\n",
-    "\n",
-    "As you can see, the model's performance for creating embeddings for Pinecone was much higher. \n",
-    "\n",
-    "The model we have created follows the academic paper ([model for the same date](https://github.com/rambasnet/DeepLearning-IDS/blob/master/keras_tensorflow_models/) (02-23-2018)) and is slightly modified, but still a straightforward, sequential, shallow model. We have changed the number of classes from four (Benign, BruteForce-Web, BruteForce-XSS, SQL-Injection) to two (Benign and Attack), only interested in whether we are detecting an attack or not. We have also changed validation metrics to precision and recall. These changes improved our results. Yet, there is still room for further improvements, for example, by adding more data covering multiple days and different types of attacks."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "JdZcEl1pfEeX"
-   },
-   "source": [
-    "## Delete the Index"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "hyxLeOnSfEeX"
-   },
-   "source": [
-    "Delete the index once you are sure that you do not want to use it anymore. Once it is deleted, you cannot reuse it."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "ywvaJrVkfEeX"
-   },
-   "outputs": [],
-   "source": [
-    "pc.delete_index(index_name)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "Hu2mIbHms3k7"
-   },
-   "source": [
-    "---"
-   ]
-  }
- ],
- "metadata": {
-  "accelerator": "TPU",
-  "colab": {
-   "collapsed_sections": [],
-   "machine_shape": "hm",
-   "name": "it_threat_detection.ipynb",
-   "provenance": []
-  },
-  "environment": {
-   "name": "tf2-gpu.2-3.m65",
-   "type": "gcloud",
-   "uri": "gcr.io/deeplearning-platform-release/tf2-gpu.2-3:m65"
-  },
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.7 (main, Sep 14 2022, 22:38:23) [Clang 14.0.0 (clang-1400.0.29.102)]"
+  ],
+  "metadata": {
+    "accelerator": "TPU",
+    "colab": {
+      "collapsed_sections": [],
+      "machine_shape": "hm",
+      "name": "it_threat_detection.ipynb",
+      "provenance": []
+    },
+    "environment": {
+      "name": "tf2-gpu.2-3.m65",
+      "type": "gcloud",
+      "uri": "gcr.io/deeplearning-platform-release/tf2-gpu.2-3:m65"
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.10.7 (main, Sep 14 2022, 22:38:23) [Clang 14.0.0 (clang-1400.0.29.102)]"
+    },
+    "vscode": {
+      "interpreter": {
+        "hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e"
+      }
+    }
   },
-  "vscode": {
-   "interpreter": {
-    "hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 1
-}
+  "nbformat": 4,
+  "nbformat_minor": 1
+}
\ No newline at end of file

From fa0636c72dbe6bc87fe3c5febc5ae8aa5d29e70e Mon Sep 17 00:00:00 2001
From: Jen Hamon <jhamon@pinecone.io>
Date: Thu, 29 Jan 2026 10:20:14 -0500
Subject: [PATCH 4/5] fix(learn): add required metadata/name to
 it-threat-detection notebook outputs

---
 learn/security/it-threat-detection.ipynb | 3488 +++++++++++-----------
 scripts/ticketbot.py                     |  371 +++
 2 files changed, 2131 insertions(+), 1728 deletions(-)
 create mode 100644 scripts/ticketbot.py

diff --git a/learn/security/it-threat-detection.ipynb b/learn/security/it-threat-detection.ipynb
index 01746744..37f037bc 100644
--- a/learn/security/it-threat-detection.ipynb
+++ b/learn/security/it-threat-detection.ipynb
@@ -1,1761 +1,1793 @@
 {
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pinecone-io/examples/blob/master/learn/security/it-threat-detection.ipynb) [![Open nbviewer](https://raw.githubusercontent.com/pinecone-io/examples/master/assets/nbviewer-shield.svg)](https://nbviewer.org/github/pinecone-io/examples/blob/master/learn/security/it-threat-detection.ipynb)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "b4Yv1jeGywpL"
-      },
-      "source": [
-        "## IT Threat Detection With Similarity Search\n",
-        "\n",
-        "This notebook shows how to use Pinecone's similarity search as a service to build an application for detecting rare events. Such application is common in cyber-security and fraud detection domains wherein only a tiny fraction of the events are malicious. \n",
-        "\n",
-        "Here we will build a network intrusion detector. Network intrusion detection systems monitor incoming and outgoing network traffic flow, raising alarms whenever a threat is detected. Here we use a deep-learning model and similarity search in detecting and classifying network intrusion traffic.\n",
-        "\n",
-        "We will start by indexing a set of labeled traffic events in the form of vector embeddings. Each event is either benign or malicious. The vector embeddings are rich, mathematical representations of the network traffic events. It is making it possible to determine how similar the network events are to one another using similarity-search algorithms built into Pinecone. Here we will transform network traffic events into vectors using a deep learning model from recent academic work.\n",
-        "\n",
-        "\n",
-        "We will then take some new (unseen) network events and search through the index to find the most similar matches, along with their labels. In such a way, we will propagate the matched labels to classify the unseen events as benign or malicious. Mind that the intrusion detection task is a challenging classification task because malicious events are sporadic. The similarity search service helps us sift the most relevant historical labeled events. That way, we identify these rare events while keeping a low rate of false alarms. \n"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "1OUSClcPhU4j"
-      },
-      "source": [
-        "## Setting up Pinecone\n",
-        "\n",
-        "We will first install and initialize Pinecone. You can get your [API Key here](https://app.pinecone.io)."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "251n1avKzCrm"
-      },
-      "source": [
-        "!pip install -qU pinecone"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "_cGTuY8dywpV",
-        "outputId": "da0b57bf-bde2-401f-e502-6ad80f7fc6cc"
-      },
-      "source": [
-        "import os\n",
-        "from getpass import getpass\n",
-        "\n",
-        "from pinecone import Pinecone\n",
-        "\n",
-        "# initialize connection to pinecone (get API key at app.pinecone.io)\n",
-        "api_key = os.environ.get(\"PINECONE_API_KEY\") or getpass(\"Enter your Pinecone API key: \")\n",
-        "\n",
-        "# configure client\n",
-        "pc = Pinecone(api_key=api_key)"
-      ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "[]"
-            ]
-          }
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "a7ysNAlrjD_k"
-      },
-      "source": [
-        "## Installing other dependencies"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "r3g-b61IywpQ"
-      },
-      "source": [
-        "!pip install -qU pip python-dateutil tensorflow scikit-learn matplotlib seaborn"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "0uuHcP-WywpQ"
-      },
-      "source": [
-        "from collections import Counter\n",
-        "\n",
-        "import matplotlib.pyplot as plt\n",
-        "import pandas as pd\n",
-        "import seaborn as sns\n",
-        "import tensorflow.keras.backend as K\n",
-        "from sklearn.metrics import (\n",
-        "    accuracy_score,\n",
-        "    confusion_matrix,\n",
-        "    precision_score,\n",
-        "    recall_score,\n",
-        ")\n",
-        "from tensorflow import keras\n",
-        "from tensorflow.keras.models import Model"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "lEwqF1osp83o"
-      },
-      "source": [
-        "We will use some of the code from an [academic work on deep learning for intrusion detection](https://github.com/rambasnet/DeepLearning-IDS). Let's clone the repository that we will use to prepare data."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "FUDeovNiywpT"
-      },
-      "source": [
-        "!git clone -q https://github.com/rambasnet/DeepLearning-IDS.git "
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "mc4ERmwniO1H"
-      },
-      "source": [
-        "## Define a New Pinecone Index"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "Now we setup our index specification, this allows us to define the cloud provider and region where we want to deploy our index. You can find a list of all [available providers and regions here](https://docs.pinecone.io/guides/get-started/understanding-organizations#regions)."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {},
-      "source": [
-        "from pinecone import ServerlessSpec\n",
-        "\n",
-        "cloud = os.environ.get(\"PINECONE_CLOUD\") or \"aws\"\n",
-        "region = os.environ.get(\"PINECONE_REGION\") or \"us-east-1\"\n",
-        "\n",
-        "spec = ServerlessSpec(cloud=cloud, region=region)"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "Create the index:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "9_TIgYxBywpV"
-      },
-      "source": [
-        "# Pick a name for the new service\n",
-        "index_name = \"it-threats\""
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {},
-      "source": [
-        "import time\n",
-        "\n",
-        "# check if index already exists (it shouldn't if this is first time)\n",
-        "if not pc.has_index(index_name):\n",
-        "    # if does not exist, create index\n",
-        "    pc.create_index(name=index_name, dimension=128, metric=\"euclidean\", spec=spec)\n",
-        "    # wait for index to be initialized\n",
-        "    while not pc.describe_index(index_name).status[\"ready\"]:\n",
-        "        time.sleep(1)\n",
-        "\n",
-        "# connect to index\n",
-        "index = pc.Index(index_name)\n",
-        "# view index stats\n",
-        "index.describe_index_stats()"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "IOP9jCo5ywpX"
-      },
-      "source": [
-        "## Upload\n",
-        "Here we transform network events into vector embeddings, then upload them into Pinecone's vector index. "
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "PzkJJd8ZYTNM"
-      },
-      "source": [
-        "### Prepare Data"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "-N0bh6dUYTNN"
-      },
-      "source": [
-        "The datasets we use consist of benign (normal) network traffic and malicious traffic\n",
-        "generated from several different network attacks. We will focus on web attacks only. \n",
-        "\n",
-        "The web attack category consists of three common attacks: \n",
-        "- Cross-site scripting (BruteForce-XSS), \n",
-        "- SQL-Injection (SQL-Injection), \n",
-        "- Brute force administrative and user passwords (BruteForce-Web)\n",
-        "\n",
-        "The original data was recorded over two days."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "vuOLNAehYTNN"
-      },
-      "source": [
-        "**Download data for 22-02-2018 and 23-02-2018**\n",
-        "\n",
-        "\n"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "pCrwPkDJYTNO"
-      },
-      "source": [
-        "Files should be downloaded to the current directory. We will be using one date for training and generating vectors, and another one for testing."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "HshKyF0KywpR",
-        "outputId": "d5b3ceee-b584-47e2-a428-43e5c835968f"
-      },
-      "source": [
-        "!wget \"https://cse-cic-ids2018.s3.ca-central-1.amazonaws.com/Processed%20Traffic%20Data%20for%20ML%20Algorithms/Thursday-22-02-2018_TrafficForML_CICFlowMeter.csv\" -q --show-progress\n",
-        "!wget \"https://cse-cic-ids2018.s3.ca-central-1.amazonaws.com/Processed%20Traffic%20Data%20for%20ML%20Algorithms/Friday-23-02-2018_TrafficForML_CICFlowMeter.csv\" -q --show-progress"
-      ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "Thursday-22-02-2018 100%[===================>] 364.91M  3.07MB/s    in 2m 6s   \n",
-            "Friday-23-02-2018_T 100%[===================>] 365.10M  3.07MB/s    in 1m 53s  \n"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "gr-hay4rfk0d"
-      },
-      "source": [
-        "Let's look at the data events first."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "bSSZ4YCZywpT",
-        "outputId": "27d111b0-c0c1-4694-9e07-65965d2014f1"
-      },
-      "source": [
-        "data = pd.read_csv(\"Friday-23-02-2018_TrafficForML_CICFlowMeter.csv\")\n",
-        "data.Label.value_counts()"
-      ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "Benign              1048009\n",
-              "Brute Force -Web        362\n",
-              "Brute Force -XSS        151\n",
-              "SQL Injection            53\n",
-              "Name: Label, dtype: int64"
-            ]
-          }
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "vYl82iXeywpT"
-      },
-      "source": [
-        "**Clean the data** using a python script from the cloned repository."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "y-trCZOcywpT",
-        "outputId": "c51b5e77-4e66-47a3-c4f6-d492a7dfaef8"
-      },
-      "source": [
-        "!python DeepLearning-IDS/data_cleanup.py \"Friday-23-02-2018_TrafficForML_CICFlowMeter.csv\" \"result23022018\""
-      ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "cleaning Friday-23-02-2018_TrafficForML_CICFlowMeter.csv\n",
-            "total rows read = 1048576\n",
-            "all done writing 1042868 rows; dropped 5708 rows\n"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "oKHL8HCjYTNQ"
-      },
-      "source": [
-        "Load the file that you got from the previous step."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 278
-        },
-        "id": "uzH6_tBpywpU",
-        "outputId": "017263dd-8e2a-45a4-9693-5b5b763197ce"
-      },
-      "source": [
-        "data_23_cleaned = pd.read_csv(\"result23022018.csv\")\n",
-        "data_23_cleaned.head()"
-      ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/html": [
-              "<div>\n",
-              "<style scoped>\n",
-              "    .dataframe tbody tr th:only-of-type {\n",
-              "        vertical-align: middle;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe tbody tr th {\n",
-              "        vertical-align: top;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe thead th {\n",
-              "        text-align: right;\n",
-              "    }\n",
-              "</style>\n",
-              "<table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              "    <tr style=\"text-align: right;\">\n",
-              "      <th></th>\n",
-              "      <th>Dst Port</th>\n",
-              "      <th>Protocol</th>\n",
-              "      <th>Timestamp</th>\n",
-              "      <th>Flow Duration</th>\n",
-              "      <th>Tot Fwd Pkts</th>\n",
-              "      <th>Tot Bwd Pkts</th>\n",
-              "      <th>TotLen Fwd Pkts</th>\n",
-              "      <th>TotLen Bwd Pkts</th>\n",
-              "      <th>Fwd Pkt Len Max</th>\n",
-              "      <th>Fwd Pkt Len Min</th>\n",
-              "      <th>...</th>\n",
-              "      <th>Fwd Seg Size Min</th>\n",
-              "      <th>Active Mean</th>\n",
-              "      <th>Active Std</th>\n",
-              "      <th>Active Max</th>\n",
-              "      <th>Active Min</th>\n",
-              "      <th>Idle Mean</th>\n",
-              "      <th>Idle Std</th>\n",
-              "      <th>Idle Max</th>\n",
-              "      <th>Idle Min</th>\n",
-              "      <th>Label</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <th>0</th>\n",
-              "      <td>22</td>\n",
-              "      <td>6</td>\n",
-              "      <td>1.519374e+09</td>\n",
-              "      <td>1532698</td>\n",
-              "      <td>11</td>\n",
-              "      <td>11</td>\n",
-              "      <td>1179</td>\n",
-              "      <td>1969</td>\n",
-              "      <td>648</td>\n",
-              "      <td>0</td>\n",
-              "      <td>...</td>\n",
-              "      <td>32</td>\n",
-              "      <td>0.0</td>\n",
-              "      <td>0.0</td>\n",
-              "      <td>0</td>\n",
-              "      <td>0</td>\n",
-              "      <td>0.0</td>\n",
-              "      <td>0.000000e+00</td>\n",
-              "      <td>0</td>\n",
-              "      <td>0</td>\n",
-              "      <td>Benign</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>1</th>\n",
-              "      <td>500</td>\n",
-              "      <td>17</td>\n",
-              "      <td>1.519374e+09</td>\n",
-              "      <td>117573855</td>\n",
-              "      <td>3</td>\n",
-              "      <td>0</td>\n",
-              "      <td>1500</td>\n",
-              "      <td>0</td>\n",
-              "      <td>500</td>\n",
-              "      <td>500</td>\n",
-              "      <td>...</td>\n",
-              "      <td>8</td>\n",
-              "      <td>0.0</td>\n",
-              "      <td>0.0</td>\n",
-              "      <td>0</td>\n",
-              "      <td>0</td>\n",
-              "      <td>58786927.5</td>\n",
-              "      <td>2.375324e+07</td>\n",
-              "      <td>75583006</td>\n",
-              "      <td>41990849</td>\n",
-              "      <td>Benign</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>2</th>\n",
-              "      <td>500</td>\n",
-              "      <td>17</td>\n",
-              "      <td>1.519374e+09</td>\n",
-              "      <td>117573848</td>\n",
-              "      <td>3</td>\n",
-              "      <td>0</td>\n",
-              "      <td>1500</td>\n",
-              "      <td>0</td>\n",
-              "      <td>500</td>\n",
-              "      <td>500</td>\n",
-              "      <td>...</td>\n",
-              "      <td>8</td>\n",
-              "      <td>0.0</td>\n",
-              "      <td>0.0</td>\n",
-              "      <td>0</td>\n",
-              "      <td>0</td>\n",
-              "      <td>58786924.0</td>\n",
-              "      <td>2.375325e+07</td>\n",
-              "      <td>75583007</td>\n",
-              "      <td>41990841</td>\n",
-              "      <td>Benign</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>3</th>\n",
-              "      <td>22</td>\n",
-              "      <td>6</td>\n",
-              "      <td>1.519374e+09</td>\n",
-              "      <td>1745392</td>\n",
-              "      <td>11</td>\n",
-              "      <td>11</td>\n",
-              "      <td>1179</td>\n",
-              "      <td>1969</td>\n",
-              "      <td>648</td>\n",
-              "      <td>0</td>\n",
-              "      <td>...</td>\n",
-              "      <td>32</td>\n",
-              "      <td>0.0</td>\n",
-              "      <td>0.0</td>\n",
-              "      <td>0</td>\n",
-              "      <td>0</td>\n",
-              "      <td>0.0</td>\n",
-              "      <td>0.000000e+00</td>\n",
-              "      <td>0</td>\n",
-              "      <td>0</td>\n",
-              "      <td>Benign</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>4</th>\n",
-              "      <td>500</td>\n",
-              "      <td>17</td>\n",
-              "      <td>1.519374e+09</td>\n",
-              "      <td>89483474</td>\n",
-              "      <td>6</td>\n",
-              "      <td>0</td>\n",
-              "      <td>3000</td>\n",
-              "      <td>0</td>\n",
-              "      <td>500</td>\n",
-              "      <td>500</td>\n",
-              "      <td>...</td>\n",
-              "      <td>8</td>\n",
-              "      <td>4000364.0</td>\n",
-              "      <td>0.0</td>\n",
-              "      <td>4000364</td>\n",
-              "      <td>4000364</td>\n",
-              "      <td>21370777.5</td>\n",
-              "      <td>1.528092e+07</td>\n",
-              "      <td>41989576</td>\n",
-              "      <td>7200485</td>\n",
-              "      <td>Benign</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table>\n",
-              "<p>5 rows × 80 columns</p>\n",
-              "</div>"
-            ],
-            "text/plain": [
-              "   Dst Port  Protocol     Timestamp  Flow Duration  Tot Fwd Pkts  \\\n",
-              "0        22         6  1.519374e+09        1532698            11   \n",
-              "1       500        17  1.519374e+09      117573855             3   \n",
-              "2       500        17  1.519374e+09      117573848             3   \n",
-              "3        22         6  1.519374e+09        1745392            11   \n",
-              "4       500        17  1.519374e+09       89483474             6   \n",
-              "\n",
-              "   Tot Bwd Pkts  TotLen Fwd Pkts  TotLen Bwd Pkts  Fwd Pkt Len Max  \\\n",
-              "0            11             1179             1969              648   \n",
-              "1             0             1500                0              500   \n",
-              "2             0             1500                0              500   \n",
-              "3            11             1179             1969              648   \n",
-              "4             0             3000                0              500   \n",
-              "\n",
-              "   Fwd Pkt Len Min  ...  Fwd Seg Size Min  Active Mean  Active Std  \\\n",
-              "0                0  ...                32          0.0         0.0   \n",
-              "1              500  ...                 8          0.0         0.0   \n",
-              "2              500  ...                 8          0.0         0.0   \n",
-              "3                0  ...                32          0.0         0.0   \n",
-              "4              500  ...                 8    4000364.0         0.0   \n",
-              "\n",
-              "   Active Max  Active Min   Idle Mean      Idle Std  Idle Max  Idle Min  \\\n",
-              "0           0           0         0.0  0.000000e+00         0         0   \n",
-              "1           0           0  58786927.5  2.375324e+07  75583006  41990849   \n",
-              "2           0           0  58786924.0  2.375325e+07  75583007  41990841   \n",
-              "3           0           0         0.0  0.000000e+00         0         0   \n",
-              "4     4000364     4000364  21370777.5  1.528092e+07  41989576   7200485   \n",
-              "\n",
-              "    Label  \n",
-              "0  Benign  \n",
-              "1  Benign  \n",
-              "2  Benign  \n",
-              "3  Benign  \n",
-              "4  Benign  \n",
-              "\n",
-              "[5 rows x 80 columns]"
-            ]
-          }
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "cGXf6PmhYTNR",
-        "outputId": "3371cd8e-e8e3-4382-ad17-bfffd8e3d8ac"
-      },
-      "source": [
-        "data_23_cleaned.Label.value_counts()"
-      ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "Benign              1042301\n",
-              "Brute Force -Web        362\n",
-              "Brute Force -XSS        151\n",
-              "SQL Injection            53\n",
-              "Name: Label, dtype: int64"
-            ]
-          }
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "bWUf-dk1ywpU"
-      },
-      "source": [
-        "### Load the Model"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "97lIL236YTNR"
-      },
-      "source": [
-        "Here we load the pretrained model. The model is trained using the data from the same date."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Bq8_hM-RfEeR"
-      },
-      "source": [
-        "We have modified [the original model](https://github.com/rambasnet/DeepLearning-IDS/blob/master/keras_tensorflow_models/02-23-2018.csv_adam_10_10_multiclass_baseline_model_1561316601.model) slightly and changed the number of classes from four (Benign, BruteForce-Web, BruteForce-XSS, SQL-Injection) to two (Benign and Attack). In the step below we will download and unzip our modified model."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "eRTvBzhoqPkR"
-      },
-      "source": [
-        "!wget -q -O it_threat_model.model.zip \"https://drive.google.com/uc?export=download&id=1VYMHOk_XMAc-QFJ_8CAPvWFfHnLpS2J_\" \n",
-        "!unzip -q it_threat_model.model.zip"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "WP44njScywpU",
-        "outputId": "2a9d2001-0328-4602-c595-372c5bf67aa4"
-      },
-      "source": [
-        "model = keras.models.load_model(\"it_threat_model.model\")\n",
-        "model.summary()"
-      ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "WARNING:tensorflow:SavedModel saved prior to TF 2.5 detected when loading Keras model. Please ensure that you are saving the model with model.save() or tf.keras.models.save_model(), *NOT* tf.saved_model.save(). To confirm, there should be a file named \"keras_metadata.pb\" in the SavedModel directory.\n",
-            "Model: \"sequential\"\n",
-            "_________________________________________________________________\n",
-            "Layer (type)                 Output Shape              Param #   \n",
-            "=================================================================\n",
-            "dense (Dense)                (None, 128)               10240     \n",
-            "_________________________________________________________________\n",
-            "dense_1 (Dense)              (None, 64)                8256      \n",
-            "_________________________________________________________________\n",
-            "dense_2 (Dense)              (None, 1)                 65        \n",
-            "=================================================================\n",
-            "Total params: 18,561\n",
-            "Trainable params: 18,561\n",
-            "Non-trainable params: 0\n",
-            "_________________________________________________________________\n"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "2rCdzSrfywpV"
-      },
-      "source": [
-        "# Select the first layer\n",
-        "layer_name = \"dense\"\n",
-        "intermediate_layer_model = Model(\n",
-        "    inputs=model.input, outputs=model.get_layer(layer_name).output\n",
-        ")"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "V3dx9XkPYTNV"
-      },
-      "source": [
-        "### Upload Data"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "yO1eySIdjSOG"
-      },
-      "source": [
-        "\n",
-        "Let's define the item's ids in a way that will reflect the event's label.  Then, we index the events in Pinecone's vector index."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "nk4ZjGg-ywpX",
-        "outputId": "bce8f1fd-98e3-4313-d625-c404a4eca184"
-      },
-      "source": [
-        "from tqdm import tqdm\n",
-        "\n",
-        "items_to_upload = []\n",
-        "\n",
-        "model_res = intermediate_layer_model.predict(K.constant(data_23_cleaned.iloc[:, :-1]))\n",
-        "\n",
-        "for i, res in tqdm(zip(data_23_cleaned.iterrows(), model_res), total=len(model_res)):\n",
-        "    benign_or_attack = i[1][\"Label\"][:3]\n",
-        "    items_to_upload.append((benign_or_attack + \"_\" + str(i[0]), res.tolist()))"
-      ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1042867/1042867 [01:43<00:00, 10067.22it/s]\n"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "cMzD8s3ps3k0"
-      },
-      "source": [
-        "import itertools\n",
-        "\n",
-        "\n",
-        "def chunks(iterable, batch_size=100):\n",
-        "    it = iter(iterable)\n",
-        "    chunk = tuple(itertools.islice(it, batch_size))\n",
-        "    while chunk:\n",
-        "        yield chunk\n",
-        "        chunk = tuple(itertools.islice(it, batch_size))"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "6C7er-8Gl2Rg"
-      },
-      "source": [
-        "You can lower the NUMBER_OF_ITEMS and, by doing so, limit the number of uploaded items. "
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "_Ti9p0P-ywpX",
-        "scrolled": true
-      },
-      "source": [
-        "NUMBER_OF_ITEMS = len(items_to_upload)\n",
-        "\n",
-        "for batch in chunks(items_to_upload[:NUMBER_OF_ITEMS], 50):\n",
-        "    index.upsert(vectors=batch)"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "XgF8vW8PtaRX"
-      },
-      "source": [
-        "items_to_upload.clear()"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "fglWJfAq_kw3"
-      },
-      "source": [
-        "Let's verify all items were inserted. "
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "xU172A4EywpY",
-        "outputId": "80008942-0000-40e5-92a1-66b767a489c6"
-      },
-      "source": [
-        "index.describe_index_stats()"
-      ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "{'dimension': 128, 'namespaces': {'': {'vector_count': 1042867}}}"
-            ]
-          }
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "3CGzW3mVywpY"
-      },
-      "source": [
-        "## Query"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Ywuld4BylAIu"
-      },
-      "source": [
-        "First, we will randomly select a Benign/Attack event and query the vector index using the event embedding. Then, we will use data from different day, that contains same set of attacks to query on a bigger sample."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "m5H4rMyOYTNX"
-      },
-      "source": [
-        "\n",
-        "### Evaluate the Rare Event Classification Model"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "velMK_XlYTNX"
-      },
-      "source": [
-        "We will use network intrusion dataset for 22-02-2018 for querying and testing the Pinecone."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "VC2AVfWsj7em"
-      },
-      "source": [
-        "First, let's clean the data."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "bW9mhYvOYTNX",
-        "outputId": "84a9d548-de90-40fe-c183-fc2ca48de2ec",
-        "scrolled": true
-      },
-      "source": [
-        "!python DeepLearning-IDS/data_cleanup.py \"Thursday-22-02-2018_TrafficForML_CICFlowMeter.csv\" \"result22022018\""
-      ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "cleaning Thursday-22-02-2018_TrafficForML_CICFlowMeter.csv\n",
-            "total rows read = 1048576\n",
-            "all done writing 1042966 rows; dropped 5610 rows\n"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 278
-        },
-        "id": "xqMuz0jKYTNX",
-        "outputId": "d9f8c333-5e3f-4509-b84a-901bacf42487"
-      },
-      "source": [
-        "data_22_cleaned = pd.read_csv(\"result22022018.csv\")\n",
-        "data_22_cleaned.head()"
-      ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/html": [
-              "<div>\n",
-              "<style scoped>\n",
-              "    .dataframe tbody tr th:only-of-type {\n",
-              "        vertical-align: middle;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe tbody tr th {\n",
-              "        vertical-align: top;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe thead th {\n",
-              "        text-align: right;\n",
-              "    }\n",
-              "</style>\n",
-              "<table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              "    <tr style=\"text-align: right;\">\n",
-              "      <th></th>\n",
-              "      <th>Dst Port</th>\n",
-              "      <th>Protocol</th>\n",
-              "      <th>Timestamp</th>\n",
-              "      <th>Flow Duration</th>\n",
-              "      <th>Tot Fwd Pkts</th>\n",
-              "      <th>Tot Bwd Pkts</th>\n",
-              "      <th>TotLen Fwd Pkts</th>\n",
-              "      <th>TotLen Bwd Pkts</th>\n",
-              "      <th>Fwd Pkt Len Max</th>\n",
-              "      <th>Fwd Pkt Len Min</th>\n",
-              "      <th>...</th>\n",
-              "      <th>Fwd Seg Size Min</th>\n",
-              "      <th>Active Mean</th>\n",
-              "      <th>Active Std</th>\n",
-              "      <th>Active Max</th>\n",
-              "      <th>Active Min</th>\n",
-              "      <th>Idle Mean</th>\n",
-              "      <th>Idle Std</th>\n",
-              "      <th>Idle Max</th>\n",
-              "      <th>Idle Min</th>\n",
-              "      <th>Label</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <th>0</th>\n",
-              "      <td>22</td>\n",
-              "      <td>6</td>\n",
-              "      <td>1.519288e+09</td>\n",
-              "      <td>20553406</td>\n",
-              "      <td>10</td>\n",
-              "      <td>7</td>\n",
-              "      <td>1063</td>\n",
-              "      <td>1297</td>\n",
-              "      <td>744</td>\n",
-              "      <td>0</td>\n",
-              "      <td>...</td>\n",
-              "      <td>20</td>\n",
-              "      <td>1027304.0</td>\n",
-              "      <td>0.0</td>\n",
-              "      <td>1027304</td>\n",
-              "      <td>1027304</td>\n",
-              "      <td>1.952608e+07</td>\n",
-              "      <td>0.000000e+00</td>\n",
-              "      <td>19526080</td>\n",
-              "      <td>19526080</td>\n",
-              "      <td>Benign</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>1</th>\n",
-              "      <td>34989</td>\n",
-              "      <td>6</td>\n",
-              "      <td>1.519288e+09</td>\n",
-              "      <td>790</td>\n",
-              "      <td>2</td>\n",
-              "      <td>0</td>\n",
-              "      <td>848</td>\n",
-              "      <td>0</td>\n",
-              "      <td>848</td>\n",
-              "      <td>0</td>\n",
-              "      <td>...</td>\n",
-              "      <td>20</td>\n",
-              "      <td>0.0</td>\n",
-              "      <td>0.0</td>\n",
-              "      <td>0</td>\n",
-              "      <td>0</td>\n",
-              "      <td>0.000000e+00</td>\n",
-              "      <td>0.000000e+00</td>\n",
-              "      <td>0</td>\n",
-              "      <td>0</td>\n",
-              "      <td>Benign</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>2</th>\n",
-              "      <td>500</td>\n",
-              "      <td>17</td>\n",
-              "      <td>1.519288e+09</td>\n",
-              "      <td>99745913</td>\n",
-              "      <td>5</td>\n",
-              "      <td>0</td>\n",
-              "      <td>2500</td>\n",
-              "      <td>0</td>\n",
-              "      <td>500</td>\n",
-              "      <td>500</td>\n",
-              "      <td>...</td>\n",
-              "      <td>8</td>\n",
-              "      <td>4000203.0</td>\n",
-              "      <td>0.0</td>\n",
-              "      <td>4000203</td>\n",
-              "      <td>4000203</td>\n",
-              "      <td>3.191524e+07</td>\n",
-              "      <td>3.792787e+07</td>\n",
-              "      <td>75584115</td>\n",
-              "      <td>7200679</td>\n",
-              "      <td>Benign</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>3</th>\n",
-              "      <td>500</td>\n",
-              "      <td>17</td>\n",
-              "      <td>1.519288e+09</td>\n",
-              "      <td>99745913</td>\n",
-              "      <td>5</td>\n",
-              "      <td>0</td>\n",
-              "      <td>2500</td>\n",
-              "      <td>0</td>\n",
-              "      <td>500</td>\n",
-              "      <td>500</td>\n",
-              "      <td>...</td>\n",
-              "      <td>8</td>\n",
-              "      <td>4000189.0</td>\n",
-              "      <td>0.0</td>\n",
-              "      <td>4000189</td>\n",
-              "      <td>4000189</td>\n",
-              "      <td>3.191524e+07</td>\n",
-              "      <td>3.792788e+07</td>\n",
-              "      <td>75584130</td>\n",
-              "      <td>7200693</td>\n",
-              "      <td>Benign</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>4</th>\n",
-              "      <td>500</td>\n",
-              "      <td>17</td>\n",
-              "      <td>1.519288e+09</td>\n",
-              "      <td>89481361</td>\n",
-              "      <td>6</td>\n",
-              "      <td>0</td>\n",
-              "      <td>3000</td>\n",
-              "      <td>0</td>\n",
-              "      <td>500</td>\n",
-              "      <td>500</td>\n",
-              "      <td>...</td>\n",
-              "      <td>8</td>\n",
-              "      <td>4000554.0</td>\n",
-              "      <td>0.0</td>\n",
-              "      <td>4000554</td>\n",
-              "      <td>4000554</td>\n",
-              "      <td>2.137020e+07</td>\n",
-              "      <td>1.528109e+07</td>\n",
-              "      <td>41990741</td>\n",
-              "      <td>7200848</td>\n",
-              "      <td>Benign</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table>\n",
-              "<p>5 rows × 80 columns</p>\n",
-              "</div>"
-            ],
-            "text/plain": [
-              "   Dst Port  Protocol     Timestamp  Flow Duration  Tot Fwd Pkts  \\\n",
-              "0        22         6  1.519288e+09       20553406            10   \n",
-              "1     34989         6  1.519288e+09            790             2   \n",
-              "2       500        17  1.519288e+09       99745913             5   \n",
-              "3       500        17  1.519288e+09       99745913             5   \n",
-              "4       500        17  1.519288e+09       89481361             6   \n",
-              "\n",
-              "   Tot Bwd Pkts  TotLen Fwd Pkts  TotLen Bwd Pkts  Fwd Pkt Len Max  \\\n",
-              "0             7             1063             1297              744   \n",
-              "1             0              848                0              848   \n",
-              "2             0             2500                0              500   \n",
-              "3             0             2500                0              500   \n",
-              "4             0             3000                0              500   \n",
-              "\n",
-              "   Fwd Pkt Len Min  ...  Fwd Seg Size Min  Active Mean  Active Std  \\\n",
-              "0                0  ...                20    1027304.0         0.0   \n",
-              "1                0  ...                20          0.0         0.0   \n",
-              "2              500  ...                 8    4000203.0         0.0   \n",
-              "3              500  ...                 8    4000189.0         0.0   \n",
-              "4              500  ...                 8    4000554.0         0.0   \n",
-              "\n",
-              "   Active Max  Active Min     Idle Mean      Idle Std  Idle Max  Idle Min  \\\n",
-              "0     1027304     1027304  1.952608e+07  0.000000e+00  19526080  19526080   \n",
-              "1           0           0  0.000000e+00  0.000000e+00         0         0   \n",
-              "2     4000203     4000203  3.191524e+07  3.792787e+07  75584115   7200679   \n",
-              "3     4000189     4000189  3.191524e+07  3.792788e+07  75584130   7200693   \n",
-              "4     4000554     4000554  2.137020e+07  1.528109e+07  41990741   7200848   \n",
-              "\n",
-              "    Label  \n",
-              "0  Benign  \n",
-              "1  Benign  \n",
-              "2  Benign  \n",
-              "3  Benign  \n",
-              "4  Benign  \n",
-              "\n",
-              "[5 rows x 80 columns]"
-            ]
-          }
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "262mxbQDYTNY",
-        "outputId": "7540316e-2c26-49ad-a487-85a4fe0bd84c"
-      },
-      "source": [
-        "data_22_cleaned.Label.value_counts()"
-      ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "Benign              1042603\n",
-              "Brute Force -Web        249\n",
-              "Brute Force -XSS         79\n",
-              "SQL Injection            34\n",
-              "Name: Label, dtype: int64"
-            ]
-          }
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "P6nJdtveYTNY"
-      },
-      "source": [
-        "Let's define a sample that will include all different types of web attacks for this specific date."
-      ]
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pinecone-io/examples/blob/master/learn/security/it-threat-detection.ipynb) [![Open nbviewer](https://raw.githubusercontent.com/pinecone-io/examples/master/assets/nbviewer-shield.svg)](https://nbviewer.org/github/pinecone-io/examples/blob/master/learn/security/it-threat-detection.ipynb)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "b4Yv1jeGywpL"
+   },
+   "source": [
+    "## IT Threat Detection With Similarity Search\n",
+    "\n",
+    "This notebook shows how to use Pinecone's similarity search as a service to build an application for detecting rare events. Such application is common in cyber-security and fraud detection domains wherein only a tiny fraction of the events are malicious. \n",
+    "\n",
+    "Here we will build a network intrusion detector. Network intrusion detection systems monitor incoming and outgoing network traffic flow, raising alarms whenever a threat is detected. Here we use a deep-learning model and similarity search in detecting and classifying network intrusion traffic.\n",
+    "\n",
+    "We will start by indexing a set of labeled traffic events in the form of vector embeddings. Each event is either benign or malicious. The vector embeddings are rich, mathematical representations of the network traffic events. It is making it possible to determine how similar the network events are to one another using similarity-search algorithms built into Pinecone. Here we will transform network traffic events into vectors using a deep learning model from recent academic work.\n",
+    "\n",
+    "\n",
+    "We will then take some new (unseen) network events and search through the index to find the most similar matches, along with their labels. In such a way, we will propagate the matched labels to classify the unseen events as benign or malicious. Mind that the intrusion detection task is a challenging classification task because malicious events are sporadic. The similarity search service helps us sift the most relevant historical labeled events. That way, we identify these rare events while keeping a low rate of false alarms. \n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "1OUSClcPhU4j"
+   },
+   "source": [
+    "## Setting up Pinecone\n",
+    "\n",
+    "We will first install and initialize Pinecone. You can get your [API Key here](https://app.pinecone.io)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "251n1avKzCrm"
+   },
+   "outputs": [],
+   "source": [
+    "!pip install -qU pinecone"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "_cGTuY8dywpV",
+    "outputId": "da0b57bf-bde2-401f-e502-6ad80f7fc6cc"
+   },
+   "outputs": [
     {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "N7vwZk6HYTNY",
-        "outputId": "96c03fc3-c086-4398-b58a-2c5c57eb4ded"
-      },
-      "source": [
-        "data_sample = data_22_cleaned[-2000:]\n",
-        "data_sample.Label.value_counts()"
-      ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "Benign              1638\n",
-              "Brute Force -Web     249\n",
-              "Brute Force -XSS      79\n",
-              "SQL Injection         34\n",
-              "Name: Label, dtype: int64"
-            ]
-          }
-        }
+     "data": {
+      "text/plain": [
+       "[]"
       ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "neSxNwYckGMK"
-      },
-      "source": [
-        "Now, we will query the test dataset and save predicted and expected results to create a confusion matrix."
+     },
+     "execution_count": null,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import os\n",
+    "from getpass import getpass\n",
+    "\n",
+    "from pinecone import Pinecone\n",
+    "\n",
+    "# initialize connection to pinecone (get API key at app.pinecone.io)\n",
+    "api_key = os.environ.get(\"PINECONE_API_KEY\") or getpass(\"Enter your Pinecone API key: \")\n",
+    "\n",
+    "# configure client\n",
+    "pc = Pinecone(api_key=api_key)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "a7ysNAlrjD_k"
+   },
+   "source": [
+    "## Installing other dependencies"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "r3g-b61IywpQ"
+   },
+   "outputs": [],
+   "source": [
+    "!pip install -qU pip python-dateutil tensorflow scikit-learn matplotlib seaborn"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "0uuHcP-WywpQ"
+   },
+   "outputs": [],
+   "source": [
+    "from collections import Counter\n",
+    "\n",
+    "import matplotlib.pyplot as plt\n",
+    "import pandas as pd\n",
+    "import seaborn as sns\n",
+    "import tensorflow.keras.backend as K\n",
+    "from sklearn.metrics import (\n",
+    "    accuracy_score,\n",
+    "    confusion_matrix,\n",
+    "    precision_score,\n",
+    "    recall_score,\n",
+    ")\n",
+    "from tensorflow import keras\n",
+    "from tensorflow.keras.models import Model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "lEwqF1osp83o"
+   },
+   "source": [
+    "We will use some of the code from an [academic work on deep learning for intrusion detection](https://github.com/rambasnet/DeepLearning-IDS). Let's clone the repository that we will use to prepare data."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "FUDeovNiywpT"
+   },
+   "outputs": [],
+   "source": [
+    "!git clone -q https://github.com/rambasnet/DeepLearning-IDS.git "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "mc4ERmwniO1H"
+   },
+   "source": [
+    "## Define a New Pinecone Index"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now we setup our index specification, this allows us to define the cloud provider and region where we want to deploy our index. You can find a list of all [available providers and regions here](https://docs.pinecone.io/guides/get-started/understanding-organizations#regions)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pinecone import ServerlessSpec\n",
+    "\n",
+    "cloud = os.environ.get(\"PINECONE_CLOUD\") or \"aws\"\n",
+    "region = os.environ.get(\"PINECONE_REGION\") or \"us-east-1\"\n",
+    "\n",
+    "spec = ServerlessSpec(cloud=cloud, region=region)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Create the index:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "9_TIgYxBywpV"
+   },
+   "outputs": [],
+   "source": [
+    "# Pick a name for the new service\n",
+    "index_name = \"it-threats\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import time\n",
+    "\n",
+    "# check if index already exists (it shouldn't if this is first time)\n",
+    "if not pc.has_index(index_name):\n",
+    "    # if does not exist, create index\n",
+    "    pc.create_index(name=index_name, dimension=128, metric=\"euclidean\", spec=spec)\n",
+    "    # wait for index to be initialized\n",
+    "    while not pc.describe_index(index_name).status[\"ready\"]:\n",
+    "        time.sleep(1)\n",
+    "\n",
+    "# connect to index\n",
+    "index = pc.Index(index_name)\n",
+    "# view index stats\n",
+    "index.describe_index_stats()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "IOP9jCo5ywpX"
+   },
+   "source": [
+    "## Upload\n",
+    "Here we transform network events into vector embeddings, then upload them into Pinecone's vector index. "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "PzkJJd8ZYTNM"
+   },
+   "source": [
+    "### Prepare Data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "-N0bh6dUYTNN"
+   },
+   "source": [
+    "The datasets we use consist of benign (normal) network traffic and malicious traffic\n",
+    "generated from several different network attacks. We will focus on web attacks only. \n",
+    "\n",
+    "The web attack category consists of three common attacks: \n",
+    "- Cross-site scripting (BruteForce-XSS), \n",
+    "- SQL-Injection (SQL-Injection), \n",
+    "- Brute force administrative and user passwords (BruteForce-Web)\n",
+    "\n",
+    "The original data was recorded over two days."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "vuOLNAehYTNN"
+   },
+   "source": [
+    "**Download data for 22-02-2018 and 23-02-2018**\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "pCrwPkDJYTNO"
+   },
+   "source": [
+    "Files should be downloaded to the current directory. We will be using one date for training and generating vectors, and another one for testing."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "HshKyF0KywpR",
+    "outputId": "d5b3ceee-b584-47e2-a428-43e5c835968f"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Thursday-22-02-2018 100%[===================>] 364.91M  3.07MB/s    in 2m 6s   \n",
+      "Friday-23-02-2018_T 100%[===================>] 365.10M  3.07MB/s    in 1m 53s  \n"
+     ]
+    }
+   ],
+   "source": [
+    "!wget \"https://cse-cic-ids2018.s3.ca-central-1.amazonaws.com/Processed%20Traffic%20Data%20for%20ML%20Algorithms/Thursday-22-02-2018_TrafficForML_CICFlowMeter.csv\" -q --show-progress\n",
+    "!wget \"https://cse-cic-ids2018.s3.ca-central-1.amazonaws.com/Processed%20Traffic%20Data%20for%20ML%20Algorithms/Friday-23-02-2018_TrafficForML_CICFlowMeter.csv\" -q --show-progress"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "gr-hay4rfk0d"
+   },
+   "source": [
+    "Let's look at the data events first."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "bSSZ4YCZywpT",
+    "outputId": "27d111b0-c0c1-4694-9e07-65965d2014f1"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Benign              1048009\n",
+       "Brute Force -Web        362\n",
+       "Brute Force -XSS        151\n",
+       "SQL Injection            53\n",
+       "Name: Label, dtype: int64"
       ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 136,
-          "referenced_widgets": [
-            "567d04116b1b4d21bac2348535b750a0",
-            "8f92a228cbc54b30bcb22d0598e9577f",
-            "8fe529938a4f40ac905e145e423d856e",
-            "a4b449113a734e90825bc1ef87ca3d3c",
-            "cda25dc5f86344b3850c92e59085c06c",
-            "3586c81ff82048ed80d69b7a4b5bd6b3",
-            "c427de521f054c4997d69586251bed4f",
-            "90d8d1d9da814d90b0f0bf331102d4df",
-            "7f2cabefd9eb4674a63ef3d56a5be122",
-            "dda94f4a1ea946b7996a928374dda4a5",
-            "ea6f763369cd4b478998ea4d3e8f20e6",
-            "e752273786584dd4baa77ac3f4528849",
-            "aef6058200fa454c90f51760685e25db",
-            "87b4d4b12452401cb82285364dae3576",
-            "8a60896e0288471a91089a03a75b210b",
-            "fe75c2a93c6e4619a2731a4c01a71619",
-            "5c64d617d0d94a6a9797d20f0d1e80f3",
-            "25ac309f2f5d43169ed0bda88300c8d2",
-            "9ebe5f94bbb743058602c9af26cd4eaf",
-            "3db0b4717ac140b78bf7d75e7ebeaf39",
-            "fcc29e1c6b304044a53522c550c4b49d",
-            "cd9e8a060d05491f8c4d74871c9560fa"
-          ]
-        },
-        "id": "8u6cg_1tYTNY",
-        "outputId": "dbe81cb3-88fb-4cd9-91fe-773c960ca108",
-        "scrolled": true
-      },
-      "source": [
-        "y_true = []\n",
-        "y_pred = []\n",
-        "\n",
-        "BATCH_SIZE = 100\n",
-        "\n",
-        "for i in tqdm(range(0, len(data_sample), BATCH_SIZE)):\n",
-        "    test_data = data_sample.iloc[i : i + BATCH_SIZE, :]\n",
-        "\n",
-        "    # Create vector embedding using the model\n",
-        "    test_vector = intermediate_layer_model.predict(K.constant(test_data.iloc[:, :-1]))\n",
-        "    # Query using the vector embedding\n",
-        "    query_results = []\n",
-        "\n",
-        "    for xq in test_vector.tolist():\n",
-        "        query_res = index.query(vector=xq, top_k=50)\n",
-        "        query_results.append(query_res)\n",
-        "\n",
-        "    ids = [res.id for result in query_results for res in result.matches]\n",
-        "\n",
-        "    for label, res in zip(test_data.Label.values, query_results):\n",
-        "        # Add to the true list\n",
-        "        if label == \"Benign\":\n",
-        "            y_true.append(0)\n",
-        "        else:\n",
-        "            y_true.append(1)\n",
-        "\n",
-        "        counter = Counter(match.id.split(\"_\")[0] for match in res.matches)\n",
-        "\n",
-        "        # Add to the predicted list\n",
-        "        if counter[\"Bru\"] or counter[\"SQL\"]:\n",
-        "            y_pred.append(1)\n",
-        "        else:\n",
-        "            y_pred.append(0)"
+     },
+     "execution_count": null,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data = pd.read_csv(\"Friday-23-02-2018_TrafficForML_CICFlowMeter.csv\")\n",
+    "data.Label.value_counts()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "vYl82iXeywpT"
+   },
+   "source": [
+    "**Clean the data** using a python script from the cloned repository."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "y-trCZOcywpT",
+    "outputId": "c51b5e77-4e66-47a3-c4f6-d492a7dfaef8"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "cleaning Friday-23-02-2018_TrafficForML_CICFlowMeter.csv\n",
+      "total rows read = 1048576\n",
+      "all done writing 1042868 rows; dropped 5708 rows\n"
+     ]
+    }
+   ],
+   "source": [
+    "!python DeepLearning-IDS/data_cleanup.py \"Friday-23-02-2018_TrafficForML_CICFlowMeter.csv\" \"result23022018\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "oKHL8HCjYTNQ"
+   },
+   "source": [
+    "Load the file that you got from the previous step."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 278
+    },
+    "id": "uzH6_tBpywpU",
+    "outputId": "017263dd-8e2a-45a4-9693-5b5b763197ce"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Dst Port</th>\n",
+       "      <th>Protocol</th>\n",
+       "      <th>Timestamp</th>\n",
+       "      <th>Flow Duration</th>\n",
+       "      <th>Tot Fwd Pkts</th>\n",
+       "      <th>Tot Bwd Pkts</th>\n",
+       "      <th>TotLen Fwd Pkts</th>\n",
+       "      <th>TotLen Bwd Pkts</th>\n",
+       "      <th>Fwd Pkt Len Max</th>\n",
+       "      <th>Fwd Pkt Len Min</th>\n",
+       "      <th>...</th>\n",
+       "      <th>Fwd Seg Size Min</th>\n",
+       "      <th>Active Mean</th>\n",
+       "      <th>Active Std</th>\n",
+       "      <th>Active Max</th>\n",
+       "      <th>Active Min</th>\n",
+       "      <th>Idle Mean</th>\n",
+       "      <th>Idle Std</th>\n",
+       "      <th>Idle Max</th>\n",
+       "      <th>Idle Min</th>\n",
+       "      <th>Label</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>22</td>\n",
+       "      <td>6</td>\n",
+       "      <td>1.519374e+09</td>\n",
+       "      <td>1532698</td>\n",
+       "      <td>11</td>\n",
+       "      <td>11</td>\n",
+       "      <td>1179</td>\n",
+       "      <td>1969</td>\n",
+       "      <td>648</td>\n",
+       "      <td>0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>32</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.000000e+00</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>Benign</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>500</td>\n",
+       "      <td>17</td>\n",
+       "      <td>1.519374e+09</td>\n",
+       "      <td>117573855</td>\n",
+       "      <td>3</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1500</td>\n",
+       "      <td>0</td>\n",
+       "      <td>500</td>\n",
+       "      <td>500</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>58786927.5</td>\n",
+       "      <td>2.375324e+07</td>\n",
+       "      <td>75583006</td>\n",
+       "      <td>41990849</td>\n",
+       "      <td>Benign</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>500</td>\n",
+       "      <td>17</td>\n",
+       "      <td>1.519374e+09</td>\n",
+       "      <td>117573848</td>\n",
+       "      <td>3</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1500</td>\n",
+       "      <td>0</td>\n",
+       "      <td>500</td>\n",
+       "      <td>500</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>58786924.0</td>\n",
+       "      <td>2.375325e+07</td>\n",
+       "      <td>75583007</td>\n",
+       "      <td>41990841</td>\n",
+       "      <td>Benign</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>22</td>\n",
+       "      <td>6</td>\n",
+       "      <td>1.519374e+09</td>\n",
+       "      <td>1745392</td>\n",
+       "      <td>11</td>\n",
+       "      <td>11</td>\n",
+       "      <td>1179</td>\n",
+       "      <td>1969</td>\n",
+       "      <td>648</td>\n",
+       "      <td>0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>32</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.000000e+00</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>Benign</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>500</td>\n",
+       "      <td>17</td>\n",
+       "      <td>1.519374e+09</td>\n",
+       "      <td>89483474</td>\n",
+       "      <td>6</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3000</td>\n",
+       "      <td>0</td>\n",
+       "      <td>500</td>\n",
+       "      <td>500</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8</td>\n",
+       "      <td>4000364.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>4000364</td>\n",
+       "      <td>4000364</td>\n",
+       "      <td>21370777.5</td>\n",
+       "      <td>1.528092e+07</td>\n",
+       "      <td>41989576</td>\n",
+       "      <td>7200485</td>\n",
+       "      <td>Benign</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 80 columns</p>\n",
+       "</div>"
       ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [10:48<00:00, 32.44s/it]\n"
-          ]
-        }
+      "text/plain": [
+       "   Dst Port  Protocol     Timestamp  Flow Duration  Tot Fwd Pkts  \\\n",
+       "0        22         6  1.519374e+09        1532698            11   \n",
+       "1       500        17  1.519374e+09      117573855             3   \n",
+       "2       500        17  1.519374e+09      117573848             3   \n",
+       "3        22         6  1.519374e+09        1745392            11   \n",
+       "4       500        17  1.519374e+09       89483474             6   \n",
+       "\n",
+       "   Tot Bwd Pkts  TotLen Fwd Pkts  TotLen Bwd Pkts  Fwd Pkt Len Max  \\\n",
+       "0            11             1179             1969              648   \n",
+       "1             0             1500                0              500   \n",
+       "2             0             1500                0              500   \n",
+       "3            11             1179             1969              648   \n",
+       "4             0             3000                0              500   \n",
+       "\n",
+       "   Fwd Pkt Len Min  ...  Fwd Seg Size Min  Active Mean  Active Std  \\\n",
+       "0                0  ...                32          0.0         0.0   \n",
+       "1              500  ...                 8          0.0         0.0   \n",
+       "2              500  ...                 8          0.0         0.0   \n",
+       "3                0  ...                32          0.0         0.0   \n",
+       "4              500  ...                 8    4000364.0         0.0   \n",
+       "\n",
+       "   Active Max  Active Min   Idle Mean      Idle Std  Idle Max  Idle Min  \\\n",
+       "0           0           0         0.0  0.000000e+00         0         0   \n",
+       "1           0           0  58786927.5  2.375324e+07  75583006  41990849   \n",
+       "2           0           0  58786924.0  2.375325e+07  75583007  41990841   \n",
+       "3           0           0         0.0  0.000000e+00         0         0   \n",
+       "4     4000364     4000364  21370777.5  1.528092e+07  41989576   7200485   \n",
+       "\n",
+       "    Label  \n",
+       "0  Benign  \n",
+       "1  Benign  \n",
+       "2  Benign  \n",
+       "3  Benign  \n",
+       "4  Benign  \n",
+       "\n",
+       "[5 rows x 80 columns]"
       ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 313
-        },
-        "id": "HV3-gkdWYTNZ",
-        "outputId": "20103ea7-713b-4e2b-9590-ecc70ef9b76e"
-      },
-      "source": [
-        "# Create confusion matrix\n",
-        "conf_matrix = confusion_matrix(y_true, y_pred)\n",
-        "\n",
-        "# Show confusion matrix\n",
-        "ax = plt.subplot()\n",
-        "sns.heatmap(conf_matrix, annot=True, ax=ax, cmap=\"Blues\", fmt=\"g\", cbar=False)\n",
-        "\n",
-        "# Add labels, title and ticks\n",
-        "ax.set_xlabel(\"Predicted\")\n",
-        "ax.set_ylabel(\"Acctual\")\n",
-        "ax.set_title(\"Confusion Matrix\")\n",
-        "ax.xaxis.set_ticklabels([\"Benign\", \"Attack\"])\n",
-        "ax.yaxis.set_ticklabels([\"Benign\", \"Attack\"])"
-      ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "[Text(0, 0.5, 'Benign'), Text(0, 1.5, 'Attack')]"
-            ]
-          }
-        },
-        {
-          "output_type": "display_data",
-          "data": {
-            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAEWCAYAAABliCz2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAbKUlEQVR4nO3debxVdb3/8debc5RBQEZxKBAcILUUtX5eS1IzE6+aM6JlGE455HXWMBwoLbNBrzl3zTmnLEwihyupZDeQxCEpUqIUQTgCKjId+Pz+WOvQ5sg5Z4t7nQ3n+34+HufBXsNe38+Gzfus/V3f/V2KCMzMrO1rV+0CzMysdTjwzcwS4cA3M0uEA9/MLBEOfDOzRDjwzcwS4cC3NkNSR0kPS1oo6f6PcJxjJD1aydqqQdJvJX2t2nXYusOBb61O0tGSJkt6T9KbeTB9rgKHPhzoA/SMiCPW9iARcVdE7FuBelYjaU9JIemXjdbvmK+fUOZxLpF0Z0v7RcTQiLhtLcu1NsiBb61K0lnAT4DLycK5L3Ad8OUKHL4f8LeIqK/AsYoyF9hdUs+SdV8D/lapBpTx/237AL8prNVI2hi4DDg1In4ZEYsiYnlEPBwR5+b7tJf0E0mz8p+fSGqfb9tT0uuSzpb0Vv7p4Lh826XAaGBY/slhZOMzYUlb5mfStfnyCEmvSXpX0gxJx5Ssf6bkebtLmpR3FU2StHvJtgmSxkiamB/nUUm9mvlrWAb8Cjgqf34NcCRwV6O/q6sl/UvSO5Kek7RHvn4/4Fslr3NqSR3flTQReB8YkK87Pt9+vaQHSo7/fUlPSFLZ/4C23nPgW2v6D6AD8FAz+4wCdgN2AnYEPgNcVLJ9U2BjYAtgJPBTSd0j4mKyTw33RkTniPhZc4VI2gi4BhgaEV2A3YHn17BfD+CRfN+ewI+ARxqdoR8NHAdsAmwInNNc28DtwLH54y8BLwOzGu0ziezvoAdwN3C/pA4RMb7R69yx5DlfBU4EugAzGx3vbOBT+S+zPcj+7r4WnlslKQ58a009gXktdLkcA1wWEW9FxFzgUrIga7A83748IsYB7wED17KelcAOkjpGxJsR8fIa9vlPYHpE3BER9RFxDzANOLBkn1sj4m8RsRi4jyyomxQRfwB6SBpIFvy3r2GfOyOiLm/zh0B7Wn6dP4+Il/PnLG90vPeBr5D9wroTOD0iXm/heNbGOPCtNdUBvRq6VJqwOaufnc7M1606RqNfGO8DnT9sIRGxCBgGnAy8KekRSYPKqKehpi1KlmevRT13AKcBe7GGTzx5t9UreTfSArJPNc11FQH8q7mNEfEn4DVAZL+YLDEOfGtNzwJLgIOb2WcW2cXXBn35YHdHuRYBnUqWNy3dGBG/i4gvApuRnbXfXEY9DTW9sZY1NbgDOAUYl599r5J3uZxP1rffPSK6AQvJghqgqW6YZrtnJJ1K9klhFnDe2pdu6ysHvrWaiFhIdmH1p5IOltRJ0gaShkq6Mt/tHuAiSb3zi5+jybog1sbzwBBJffMLxhc2bJDUR9JBeV/+UrKuoRVrOMY4YNt8KGmtpGHAdsBv1rImACJiBvB5smsWjXUB6slG9NRKGg10Ldk+B9jyw4zEkbQt8B2ybp2vAudJarbrydoeB761qoj4EXAW2YXYuWTdEKeRjVyBLJQmAy8ALwJT8nVr09ZjwL35sZ5j9ZBuR3YhcxbwNln4nrKGY9QBB+T71pGdGR8QEfPWpqZGx34mItb06eV3wG/JhmrOJPtUVNpd0/ClsjpJU1pqJ+9CuxP4fkRMjYjpZCN97mgYAWVpkC/Sm5mlwWf4ZmaJcOCbmSXCgW9mlggHvplZIpr7AkxVdRx8mq8m2zpp/qRrq12CWZM61NLk/Eg+wzczS4QD38wsEQ58M7NEOPDNzBLhwDczS4QD38wsEQ58M7NEOPDNzBLhwDczS4QD38wsEQ58M7NEOPDNzBLhwDczS4QD38wsEQ58M7NEOPDNzBLhwDczS4QD38wsEQ58M7NEOPDNzBLhwDczS4QD38wsEQ58M7NEOPDNzBLhwDczS4QD38wsEQ58M7NEOPDNzBLhwDczS4QD38wsEQ58M7NEOPDNzBLhwDczS4QD38wsEQ58M7NEOPDNzBLhwDczS4QD38wsEQ58M7NE1BbdgKQaoE9pWxHxz6LbNTOz1RUa+JJOBy4G5gAr89UBfKrIds3M7IOKPsM/AxgYEXUFt2NmZi0oug//X8DCgtswM7MyFH2G/xowQdIjwNKGlRHxo4LbNTOzRooO/H/mPxvmP2ZmViWFBn5EXFrk8c3MrHxFj9J5mGxUTqmFwGTgxohYUmT7Zmb2b0VftH0NeA+4Of95h2yI5rb5spmZtZKi+/AHR8SQkuWHJT0VEUMkvVxw22ZmVqLoM/zekvo2LOSPe+WLywpu28zMShR9hn828IykVwEB/YFTJG0E3FZw223aDRcfw9AhOzD37XfZ9YjLV63/xlGf5+RhQ6hfsZLxT7/EqKt/za7b9+Pabw8HQILv3jCOsU++QOdO7Xn8f85c9dwtNunGL8ZN4tyrHmz112PpGX3RhTz1+wn06NGTX/76N9UuJwmKaHxNtcINSO2BQWSBP63cC7UdB59WbGHruc/uvBWL3l/KLWOOXRX4Q3bdhvOP/xKHnH4Dy5bX07t7Z+bOf4+OHTZg2fIVrFixkk17deX/7r2QAfuOYsWKlasdc+Jd53HeDx9k4pRXq/GS1hvzJ11b7RLahOcmT6JTp06MuvB8B34FdahFTW0r5Axf0t4R8b+SDm20aYAkIuKXRbSbkolTXqXvZj1WW3fiEXtw1a2PsWx5PQBz578HwOIly1ft037DDVjTL/mt+vZmkx5dHPbWanbZ9dO88cbr1S4jKUV16Xwe+F/gwDVsC8CBX4Ct+23CZwdvxaWnHsiSZcu58EcP8dxfsolJP71DP2645Cv03awHIy+67QNn90futwsPPDqlGmWbWSspJPAj4uL8z+M+zPMknQicCFD7sT2p7bV9AdW1XbU17ejetRNDjr2KXbfvx51Xfp1PHHAJAJNemskuh3+Xgf37cMtlX+V3E//C0mX1q557xJd2YeRFt1epcjNrDUV/8ao9cBiwJavPh3/ZmvaPiJuAm8B9+GvjjTkL+NUTUwGY/PJMVq4MenXvzLy8awfgrzPmsGjxMrbfenOm5Gf/n9x2C2pravjzK/+qSt1m1jqKHpb5a+DLQD2wqOTHCvDwhBfY8zPbArB1303YcINa5s1/j36b96SmJvun7rtZd7bdsg8zZ/17xuoj99uF+8ZPrkrNZtZ6ih6W+bGI2K/gNpJ02xUj2GOXbejVrTN/Hz+GMTeM47ZfPcuNlxzD5Pu/xbLlKzh+9B0A7D54AOccty/L61ewcmVwxuX3Urfg3793D/vizhx8+vXVeimWqPPPOYvJk/7EggXz+eLeQ/jGqadz6GFHVLusNq3QYZmSbgL+OyJe/LDPdZeOras8LNPWZa0+LLPE54ARkmaQzYcvICLCtzg0M2tlRQf+0IKPb2ZmZSr0om1EzAQ+DuydP36/6DbNzGzNCg1fSRcD5wMX5qs2AO4ssk0zM1uzos+2DwEOIh+KGRGzgC4Ft2lmZmtQdOAvi2wYUADks2SamVkVFB3490m6Eegm6QTgcXynKzOzqij6JuZXSfoi2a0NBwKjI+KxIts0M7M1K3pYJnnAPyapF1DX0v5mZlaMQrp0JO0maYKkX0oaLOkl4CVgjiRPtWBmVgVFneFfC3wL2JhsXvyhEfFHSYOAe4DxBbVrZmZNKOqibW1EPBoR9wOzI+KPABExraD2zMysBUUFfuntlBY32uZJ0czMqqCoLp0dJb1DNllax/wx+XKHgto0M7NmFHWLw5oijmtmZmvPE5mZmSXCgW9mlggHvplZIhz4ZmaJcOCbmSXCgW9mlggHvplZIhz4ZmaJcOCbmSXCgW9mlggHvplZIhz4ZmaJcOCbmSXCgW9mlggHvplZIhz4ZmaJcOCbmSXCgW9mlogmb3EoaefmnhgRUypfjpmZFaW5e9r+sJltAexd4VrMzKxATQZ+ROzVmoWYmVmxmjvDX0XSDsB2QIeGdRFxe1FFmZlZ5bUY+JIuBvYkC/xxwFDgGcCBb2a2HilnlM7hwBeA2RFxHLAj0L7QqszMrOLKCfzFEbESqJfUFXgLGFBsWWZmVmnl9OFPltQNuBl4DngP+FOhVZmZWcW1GPgRcUr+8AZJ44GuEfFCsWWZmVmllXPRdsia1kXEU8WUZGZmRSinS+fckscdgM+Qde34i1dmZuuRcrp0DixdlvRx4MrCKjIzs0KszeRprwM7VLoQMzMrVjl9+P9NNncOZL8gdgKmFlmUmZlVXlnDMkse1wP3RMTEguoxM7OClBP43SLi6tIVks5ovM7MzNZtiojmd5CmRMTOjdb9OSIGF1nYrAXLmi/MrEqW1q+sdglmTerfq4Oa2tbcDVCGA0cD/SWNLdnUBairXHlmZtYamuvS+QPwJtCL1W+G8i7gb9qama1nmrsBykxgpqRjgFkRsQRAUkfgY8A/WqVCMzOriHLG4d8HlHZargDuL6YcMzMrSjmBXxsRyxoW8scbFleSmZkVoZzAnyvpoIYFSV8G5hVXkpmZFaGccfgnA3dJujZffh04triSzMysCOVMnvYqsJukzmTj9t8tviwzM6u0Frt0JF0uqVtEvBcR70rqLuk7rVGcmZlVTjl9+EMjYkHDQkTMB/YvriQzMytCOYFfI6l9w0I+Dr99M/ubmdk6qJyLtncCT0i6NV8+DrituJLMzKwI5Vy0vVLSC8A+gIDxQL+iCzMzs8oq945Xs8m+bXsY8AXglcIqMjOzQjQ3W+a2wFHAcLLZMe8lG5a5VyvVZmZmFdRcl8404GngwIj4O4CkM1ulKjMzq7jmunQOI+vKeVLSzZK+QNaHb2Zm66EmAz8iHoqIYcAgYAJwJtBH0vWS9m2l+szMrEJavGgbEYsi4q6IOIBsHvzngQsKr8zMzCqqxXvaVovvaWvrKt/T1tZlzd3TttxhmWZmtp5z4JuZJcKBb2aWCAe+mVkiHPhmZolw4JuZJcKBb2aWCAe+mVkiHPhmZolw4JuZJcKBb2aWCAe+mVkiHPhmZolw4JuZJcKBb2aWCAe+mVkiHPhmZolw4JuZJcKBb2aWCAe+mVkiCgt8SSPXsO57RbVnZmbNqy3w2IdLWhIRdwFIug5oX2B7ZmbWjCID/1BgrKSVwFDg7Yg4pcD2zMysGRUPfEk9ShaPB34FTAQuk9QjIt6udJtmZtYyRURlDyjNAAJQyZ8NIiIGlHOcWQuWVbYwswpZWr+y2iWYNal/rw5qalvFz/Ajon+lj2lmZh9dkaN0TpXUrWS5uyT34ZuZVUmR4/BPiIgFDQsRMR84ocD2zMysGUUGfjtJq/qSJNUAGxbYnpmZNaPIYZm/A+6TdAPZxduTgfEFtpes74/5Nn+c+BTduvfg1nseAuDnN1/HI79+kI27dQfg+G98k90+O4RXXn6RH15xKQARwYgTTmGPPb9QtdqtbZs7ZzY/GDOK+W/XIYn9v3w4Bx95DO++s5DLv30ec2bPos+mm/OtMT+gS9euLF++nGuuvIzp0/6C2rXj5DPOY8edP13tl9FmVHyUzqoDS+2Ak4AvkI3UeRS4JSJWlPN8j9Ip39Q/T6Zjx05ccemo1QK/Y8dODPvKiNX2XbJkMRvUbkBNbS118+Zy/FcO54HfPEFNbZG/+9sWj9IpX928ubxdN49tBn6C9xct4vSRRzH6ip/w2LixdOnalWFfHcm9d/yM9959h5GnnMnYB3/B9Gkvc/aoMSyYX8dFZ5/KNbfcTbt2ngWmXM2N0insbzEiVkbE9RFxeEQcFhE3lhv29uHsOHhXunbduKx9O3TouCrcly1bSpPvDLMK6NmrN9sM/AQAnTbaiI/3G0Dd3Ld49ukn2WfoQQDsM/Qg/vDUkwD88x+vsdOu/w+Abt170rlzF6ZPe7k6xbdBRY7S2UbSA5L+Ium1hp+i2rMPeuiBexh5zKF8f8y3efedhavW/+WlFxhx1MF8/ehDOfOC0T67t1Yx+803eHX6NAZu/0kWzH+bnr16A9kvhYULsu9jDth6W559egIr6uuZPet1pv/1FebOmVPNstuUIj8n3QpcD9QDewG3A3c09wRJJ0qaLGnynT+/pcDS2r6DDj2Sux4cx813PEDPXr257uqrVm3bbodP8fNf/Iobbv0Fd992C8uWLq1ipZaCxe+/z3dGnc1J3zyXjTbq3OR+X/rPg+nduw+njzyaG67+AdvtsCM1tTWtWGnbVuSpXceIeEKSImImcImkp4GLm3pCRNwE3ATuw/+oevTsterxAV8+jAvPPu0D+/TrP4AOHToy47W/M/AT27dmeZaQ+vrljBl1Fnvtuz+f23MfALp170HdvLn07NWbunlz2bhbNiNLTW0tJ51x7qrnnnnSsWz+sb5VqbstKvIMf0l+4Xa6pNMkHQJsUmB7VqJu3txVj5/+/RP0H7A1AG/Oep0V9fUAzH5zFv/65z/YdLPNq1KjtX0RwY+vuIS+/QZw2FHHrlq/2+f25PHfjgXg8d+O5T/22AvIBhUsWfw+AFP+9Cw1NTX0679V6xfeRhU5SufTwCtAN2AM0BW4MiL+r5zn+wy/fGMuOo/np0xi4YIFdO/RgxEnnsrU5ybx9+nTkMSmm23BWReMpmev3jw67mHuvv1n1NbW0q5dO44deRKf+7yHZX4YHqVTvpemTuGcU45jy622oZ2y88sRJ53OoO0/yeXfPpe35sxmkz6bMuo7V9Gl68bMfvMNRp35Ddq1a0fP3ptw5oWX0GdTn5B8GM2N0iky8I+IiPtbWtcUB76tqxz4ti6ryrBM4MIy15mZWSsoYj78ocD+wBaSrinZ1JVsxI6ZmVVBEaN0ZgGTgYOA50rWvwucWUB7ZmZWhiLmw58KTJXUJyJuK90m6Qzg6kq3aWZmLSuyD/+oNawbUWB7ZmbWjCL68IcDRwP9JY0t2dQVmFfp9szMrDxF9OH/AXgT6AX8sGR9AMMKaM/MzMpQRB/+TGAm8B+SdiI72z8SmAE8WOn2zMysPEV06WxL1n8/HKgD7iX7gtdelW7LzMzKV0SXzjTgaeDAiPg7gCQPxzQzq7IiRukcBswGnpR0s6SGO16ZmVkVVTzwI+KhiBgGDAImkH3Zqo+k6yXtW+n2zMysPEXe4nBRRNwVEQcAHwOeBy4oqj0zM2teYbNlflSeLdPWVZ4t09Zl1Zot08zM1iEOfDOzRDjwzcwS4cA3M0uEA9/MLBEOfDOzRDjwzcwS4cA3M0uEA9/MLBEOfDOzRDjwzcwS4cA3M0uEA9/MLBEOfDOzRDjwzcwS4cA3M0uEA9/MLBEOfDOzRDjwzcwS4cA3M0uEA9/MLBEOfDOzRDjwzcwS4cA3M0uEA9/MLBEOfDOzRDjwzcwS4cA3M0uEA9/MLBEOfDOzRDjwzcwS4cA3M0uEA9/MLBEOfDOzRDjwzcwS4cA3M0uEIqLaNVgrkHRiRNxU7TrMGvN7s/X4DD8dJ1a7ALMm+L3ZShz4ZmaJcOCbmSXCgZ8O95HausrvzVbii7ZmZonwGb6ZWSIc+GZmiXDgrwckrZD0vKSpkqZI2v0jHOsySftUsj4zAEmHSApJg/LlLSUdXbJ9J0n7f4Tj/0NSr0rUmioH/vphcUTsFBE7AhcCV6ztgSJidEQ8XrnSzFYZDjwDHJUvbwkcXbJ9J2CtA98+Ogf++qcrML9hQdK5kiZJekHSpfm6LSW9IulmSS9LelRSx3zbzyUdnj/eX9I0Sc9IukbSb/L1l0j6H0kTJL0m6ZtVeJ22HpHUGfgsMJJ/B/73gD3yT6fnA5cBw/LlYZI+I+kPkv6c/zkwP1aNpKskvZi/r09v1FZHSeMlndCKL7FNqK12AVaWjpKeBzoAmwF7A0jaF9gG+AwgYKykIcA/8/XDI+IESfcBhwF3NhxQUgfgRmBIRMyQdE+jNgcBewFdgL9Kuj4ilhf5Im29djAwPiL+JultSTsDFwDnRMQBAJLmALtGxGn5cley91993s14Odn79ESgPzA439ajpJ3OwC+A2yPi9lZ7dW2Ez/DXDw1dOoOA/YDbJQnYN//5MzCFLKS3yZ8zIyKezx8/R/bxutQg4LWImJEvNw78RyJiaUTMA94C+lTyBVmbM5wsiMn/HF7GczYG7pf0EvBjYPt8/T7ADRFRDxARb5c859fArQ77teMz/PVMRDybX7jqTXZWf0VE3Fi6j6QtgaUlq1YAHRsdSi001fj5fq/YGknqSfapcwdJAdQAAYxr4aljgCcj4pD8PTuh4ZD589dkIjBU0t3hLxF9aD7DX8/kIyBqgDrgd8DX8/5TJG0haZMyDzUNGJD/RwMYVuFSLR2Hk3Wx9IuILSPi48AMYCVZl2CDdxstbwy8kT8eUbL+UeBkSbUAjbp0RpO996+r6CtIhAN//dAxv9D1PHAv8LWIWBERjwJ3A89KehF4gNX/QzUpIhYDpwDjJT0DzAEWFlO+tXHDgYcarXuQ7OJtfT6c+EzgSWC7hou2wJXAFZImkp3ENLiF7DrUC5KmsvpIH4D/AjpIurKA19KmeWqFhEnqHBHv5dcDfgpMj4gfV7suMyuGz/DTdkL+qeFlso/XN7awv5mtx3yGb2aWCJ/hm5klwoFvZpYIB76ZWSIc+NZmlcwy+pKk+yV1+gjH2rNkrqGDJF3QzL7dJJ2yFm1cIumcta3RrCUOfGvLGqak2AFYBpxculGZD/1/ICLGRsT3mtmlG9l3HMzWKQ58S8XTwNYlM4leRzb/0Mcl7Svp2fxeA/eXfHN5v4bZRIFDGw4kaYSka/PHfSQ9lH+5aKqyexV8D9gq/3Txg3y/D8xqmq8fJemvkh4HBrba34YlyYFvbV7+Ff2hwIv5qoFkUwEMBhYBFwH7RMTOwGTgrHw20ZuBA4E9gE2bOPw1wO/zexXsTPadhguAV/NPF+c2mtV0J2AXSUMk7UL2bdTBZL9QPl3hl262Gk+IZW1Zw7TSkJ3h/wzYHJgZEX/M1+8GbAdMzL5wzIbAs2Szic6IiOkAku4km7a3sb2BYwEiYgWwUFL3RvuUzmoK2RS/25BNg/FQRLyftzH2I71asxY48K0tWxwRO5WuyEN9Uekq4LGIGN5ov51oesbGD6upWU3/q4JtmLXIXTqWuj8Cn5W0NYCkTpK2JZtNtL+krfL9mprf/QngG/lza/KbejSeFbKpWU2fAg7J7+DUhaz7yKwwDnxLWkTMJZua9x5JL5D9AhgUEUvIunAeyS/azmziEGcAe+WzlT4HbB8RdWRdRC9J+kFTs5pGxBSy2U+fJ5td8unCXqgZnkvHzCwZPsM3M0uEA9/MLBEOfDOzRDjwzcwS4cA3M0uEA9/MLBEOfDOzRPx/QQOreDywqpcAAAAASUVORK5CYII=",
-            "text/plain": [
-              "<Figure size 432x288 with 1 Axes>"
-            ]
-          }
-        }
+     },
+     "execution_count": null,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data_23_cleaned = pd.read_csv(\"result23022018.csv\")\n",
+    "data_23_cleaned.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "cGXf6PmhYTNR",
+    "outputId": "3371cd8e-e8e3-4382-ad17-bfffd8e3d8ac"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Benign              1042301\n",
+       "Brute Force -Web        362\n",
+       "Brute Force -XSS        151\n",
+       "SQL Injection            53\n",
+       "Name: Label, dtype: int64"
       ]
+     },
+     "execution_count": null,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data_23_cleaned.Label.value_counts()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "bWUf-dk1ywpU"
+   },
+   "source": [
+    "### Load the Model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "97lIL236YTNR"
+   },
+   "source": [
+    "Here we load the pretrained model. The model is trained using the data from the same date."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "Bq8_hM-RfEeR"
+   },
+   "source": [
+    "We have modified [the original model](https://github.com/rambasnet/DeepLearning-IDS/blob/master/keras_tensorflow_models/02-23-2018.csv_adam_10_10_multiclass_baseline_model_1561316601.model) slightly and changed the number of classes from four (Benign, BruteForce-Web, BruteForce-XSS, SQL-Injection) to two (Benign and Attack). In the step below we will download and unzip our modified model."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "eRTvBzhoqPkR"
+   },
+   "outputs": [],
+   "source": [
+    "!wget -q -O it_threat_model.model.zip \"https://drive.google.com/uc?export=download&id=1VYMHOk_XMAc-QFJ_8CAPvWFfHnLpS2J_\" \n",
+    "!unzip -q it_threat_model.model.zip"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "WP44njScywpU",
+    "outputId": "2a9d2001-0328-4602-c595-372c5bf67aa4"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "WARNING:tensorflow:SavedModel saved prior to TF 2.5 detected when loading Keras model. Please ensure that you are saving the model with model.save() or tf.keras.models.save_model(), *NOT* tf.saved_model.save(). To confirm, there should be a file named \"keras_metadata.pb\" in the SavedModel directory.\n",
+      "Model: \"sequential\"\n",
+      "_________________________________________________________________\n",
+      "Layer (type)                 Output Shape              Param #   \n",
+      "=================================================================\n",
+      "dense (Dense)                (None, 128)               10240     \n",
+      "_________________________________________________________________\n",
+      "dense_1 (Dense)              (None, 64)                8256      \n",
+      "_________________________________________________________________\n",
+      "dense_2 (Dense)              (None, 1)                 65        \n",
+      "=================================================================\n",
+      "Total params: 18,561\n",
+      "Trainable params: 18,561\n",
+      "Non-trainable params: 0\n",
+      "_________________________________________________________________\n"
+     ]
+    }
+   ],
+   "source": [
+    "model = keras.models.load_model(\"it_threat_model.model\")\n",
+    "model.summary()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "2rCdzSrfywpV"
+   },
+   "outputs": [],
+   "source": [
+    "# Select the first layer\n",
+    "layer_name = \"dense\"\n",
+    "intermediate_layer_model = Model(\n",
+    "    inputs=model.input, outputs=model.get_layer(layer_name).output\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "V3dx9XkPYTNV"
+   },
+   "source": [
+    "### Upload Data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "yO1eySIdjSOG"
+   },
+   "source": [
+    "\n",
+    "Let's define the item's ids in a way that will reflect the event's label.  Then, we index the events in Pinecone's vector index."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "nk4ZjGg-ywpX",
+    "outputId": "bce8f1fd-98e3-4313-d625-c404a4eca184"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1042867/1042867 [01:43<00:00, 10067.22it/s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "from tqdm import tqdm\n",
+    "\n",
+    "items_to_upload = []\n",
+    "\n",
+    "model_res = intermediate_layer_model.predict(K.constant(data_23_cleaned.iloc[:, :-1]))\n",
+    "\n",
+    "for i, res in tqdm(zip(data_23_cleaned.iterrows(), model_res), total=len(model_res)):\n",
+    "    benign_or_attack = i[1][\"Label\"][:3]\n",
+    "    items_to_upload.append((benign_or_attack + \"_\" + str(i[0]), res.tolist()))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "cMzD8s3ps3k0"
+   },
+   "outputs": [],
+   "source": [
+    "import itertools\n",
+    "\n",
+    "\n",
+    "def chunks(iterable, batch_size=100):\n",
+    "    it = iter(iterable)\n",
+    "    chunk = tuple(itertools.islice(it, batch_size))\n",
+    "    while chunk:\n",
+    "        yield chunk\n",
+    "        chunk = tuple(itertools.islice(it, batch_size))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "6C7er-8Gl2Rg"
+   },
+   "source": [
+    "You can lower the NUMBER_OF_ITEMS and, by doing so, limit the number of uploaded items. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "_Ti9p0P-ywpX",
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "NUMBER_OF_ITEMS = len(items_to_upload)\n",
+    "\n",
+    "for batch in chunks(items_to_upload[:NUMBER_OF_ITEMS], 50):\n",
+    "    index.upsert(vectors=batch)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "XgF8vW8PtaRX"
+   },
+   "outputs": [],
+   "source": [
+    "items_to_upload.clear()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "fglWJfAq_kw3"
+   },
+   "source": [
+    "Let's verify all items were inserted. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "xU172A4EywpY",
+    "outputId": "80008942-0000-40e5-92a1-66b767a489c6"
+   },
+   "outputs": [
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "-H7rWFguYTNZ"
-      },
-      "source": [
-        "Now we can calculate overall accuracy and per class accuracy."
+     "data": {
+      "text/plain": [
+       "{'dimension': 128, 'namespaces': {'': {'vector_count': 1042867}}}"
       ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "uHTInLt2YTNZ",
-        "outputId": "e48f2726-5eba-4d17-ffe7-7b17c62d5ee8"
-      },
-      "source": [
-        "# Calculate accuracy\n",
-        "acc = accuracy_score(y_true, y_pred, normalize=True, sample_weight=None)\n",
-        "precision = precision_score(y_true, y_pred)\n",
-        "recall = recall_score(y_true, y_pred)\n",
-        "\n",
-        "print(f\"Accuracy: {acc:.3f}\")\n",
-        "print(f\"Precision: {precision:.3f}\")\n",
-        "print(f\"Recall: {recall:.3f}\")"
+     },
+     "execution_count": null,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "index.describe_index_stats()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "3CGzW3mVywpY"
+   },
+   "source": [
+    "## Query"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "Ywuld4BylAIu"
+   },
+   "source": [
+    "First, we will randomly select a Benign/Attack event and query the vector index using the event embedding. Then, we will use data from different day, that contains same set of attacks to query on a bigger sample."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "m5H4rMyOYTNX"
+   },
+   "source": [
+    "\n",
+    "### Evaluate the Rare Event Classification Model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "velMK_XlYTNX"
+   },
+   "source": [
+    "We will use network intrusion dataset for 22-02-2018 for querying and testing the Pinecone."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "VC2AVfWsj7em"
+   },
+   "source": [
+    "First, let's clean the data."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "bW9mhYvOYTNX",
+    "outputId": "84a9d548-de90-40fe-c183-fc2ca48de2ec",
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "cleaning Thursday-22-02-2018_TrafficForML_CICFlowMeter.csv\n",
+      "total rows read = 1048576\n",
+      "all done writing 1042966 rows; dropped 5610 rows\n"
+     ]
+    }
+   ],
+   "source": [
+    "!python DeepLearning-IDS/data_cleanup.py \"Thursday-22-02-2018_TrafficForML_CICFlowMeter.csv\" \"result22022018\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 278
+    },
+    "id": "xqMuz0jKYTNX",
+    "outputId": "d9f8c333-5e3f-4509-b84a-901bacf42487"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Dst Port</th>\n",
+       "      <th>Protocol</th>\n",
+       "      <th>Timestamp</th>\n",
+       "      <th>Flow Duration</th>\n",
+       "      <th>Tot Fwd Pkts</th>\n",
+       "      <th>Tot Bwd Pkts</th>\n",
+       "      <th>TotLen Fwd Pkts</th>\n",
+       "      <th>TotLen Bwd Pkts</th>\n",
+       "      <th>Fwd Pkt Len Max</th>\n",
+       "      <th>Fwd Pkt Len Min</th>\n",
+       "      <th>...</th>\n",
+       "      <th>Fwd Seg Size Min</th>\n",
+       "      <th>Active Mean</th>\n",
+       "      <th>Active Std</th>\n",
+       "      <th>Active Max</th>\n",
+       "      <th>Active Min</th>\n",
+       "      <th>Idle Mean</th>\n",
+       "      <th>Idle Std</th>\n",
+       "      <th>Idle Max</th>\n",
+       "      <th>Idle Min</th>\n",
+       "      <th>Label</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>22</td>\n",
+       "      <td>6</td>\n",
+       "      <td>1.519288e+09</td>\n",
+       "      <td>20553406</td>\n",
+       "      <td>10</td>\n",
+       "      <td>7</td>\n",
+       "      <td>1063</td>\n",
+       "      <td>1297</td>\n",
+       "      <td>744</td>\n",
+       "      <td>0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>20</td>\n",
+       "      <td>1027304.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1027304</td>\n",
+       "      <td>1027304</td>\n",
+       "      <td>1.952608e+07</td>\n",
+       "      <td>0.000000e+00</td>\n",
+       "      <td>19526080</td>\n",
+       "      <td>19526080</td>\n",
+       "      <td>Benign</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>34989</td>\n",
+       "      <td>6</td>\n",
+       "      <td>1.519288e+09</td>\n",
+       "      <td>790</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>848</td>\n",
+       "      <td>0</td>\n",
+       "      <td>848</td>\n",
+       "      <td>0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>20</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.000000e+00</td>\n",
+       "      <td>0.000000e+00</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>Benign</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>500</td>\n",
+       "      <td>17</td>\n",
+       "      <td>1.519288e+09</td>\n",
+       "      <td>99745913</td>\n",
+       "      <td>5</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2500</td>\n",
+       "      <td>0</td>\n",
+       "      <td>500</td>\n",
+       "      <td>500</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8</td>\n",
+       "      <td>4000203.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>4000203</td>\n",
+       "      <td>4000203</td>\n",
+       "      <td>3.191524e+07</td>\n",
+       "      <td>3.792787e+07</td>\n",
+       "      <td>75584115</td>\n",
+       "      <td>7200679</td>\n",
+       "      <td>Benign</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>500</td>\n",
+       "      <td>17</td>\n",
+       "      <td>1.519288e+09</td>\n",
+       "      <td>99745913</td>\n",
+       "      <td>5</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2500</td>\n",
+       "      <td>0</td>\n",
+       "      <td>500</td>\n",
+       "      <td>500</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8</td>\n",
+       "      <td>4000189.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>4000189</td>\n",
+       "      <td>4000189</td>\n",
+       "      <td>3.191524e+07</td>\n",
+       "      <td>3.792788e+07</td>\n",
+       "      <td>75584130</td>\n",
+       "      <td>7200693</td>\n",
+       "      <td>Benign</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>500</td>\n",
+       "      <td>17</td>\n",
+       "      <td>1.519288e+09</td>\n",
+       "      <td>89481361</td>\n",
+       "      <td>6</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3000</td>\n",
+       "      <td>0</td>\n",
+       "      <td>500</td>\n",
+       "      <td>500</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8</td>\n",
+       "      <td>4000554.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>4000554</td>\n",
+       "      <td>4000554</td>\n",
+       "      <td>2.137020e+07</td>\n",
+       "      <td>1.528109e+07</td>\n",
+       "      <td>41990741</td>\n",
+       "      <td>7200848</td>\n",
+       "      <td>Benign</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 80 columns</p>\n",
+       "</div>"
       ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "Accuracy: 0.923\n",
-            "Precision: 0.995\n",
-            "Recall: 0.577\n"
-          ]
-        }
+      "text/plain": [
+       "   Dst Port  Protocol     Timestamp  Flow Duration  Tot Fwd Pkts  \\\n",
+       "0        22         6  1.519288e+09       20553406            10   \n",
+       "1     34989         6  1.519288e+09            790             2   \n",
+       "2       500        17  1.519288e+09       99745913             5   \n",
+       "3       500        17  1.519288e+09       99745913             5   \n",
+       "4       500        17  1.519288e+09       89481361             6   \n",
+       "\n",
+       "   Tot Bwd Pkts  TotLen Fwd Pkts  TotLen Bwd Pkts  Fwd Pkt Len Max  \\\n",
+       "0             7             1063             1297              744   \n",
+       "1             0              848                0              848   \n",
+       "2             0             2500                0              500   \n",
+       "3             0             2500                0              500   \n",
+       "4             0             3000                0              500   \n",
+       "\n",
+       "   Fwd Pkt Len Min  ...  Fwd Seg Size Min  Active Mean  Active Std  \\\n",
+       "0                0  ...                20    1027304.0         0.0   \n",
+       "1                0  ...                20          0.0         0.0   \n",
+       "2              500  ...                 8    4000203.0         0.0   \n",
+       "3              500  ...                 8    4000189.0         0.0   \n",
+       "4              500  ...                 8    4000554.0         0.0   \n",
+       "\n",
+       "   Active Max  Active Min     Idle Mean      Idle Std  Idle Max  Idle Min  \\\n",
+       "0     1027304     1027304  1.952608e+07  0.000000e+00  19526080  19526080   \n",
+       "1           0           0  0.000000e+00  0.000000e+00         0         0   \n",
+       "2     4000203     4000203  3.191524e+07  3.792787e+07  75584115   7200679   \n",
+       "3     4000189     4000189  3.191524e+07  3.792788e+07  75584130   7200693   \n",
+       "4     4000554     4000554  2.137020e+07  1.528109e+07  41990741   7200848   \n",
+       "\n",
+       "    Label  \n",
+       "0  Benign  \n",
+       "1  Benign  \n",
+       "2  Benign  \n",
+       "3  Benign  \n",
+       "4  Benign  \n",
+       "\n",
+       "[5 rows x 80 columns]"
       ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 112
-        },
-        "id": "ZNzyqAH9YTNZ",
-        "outputId": "7d2e35a1-df80-47bc-f5e7-fd425e79a7f4"
-      },
-      "source": [
-        "# Calculate per class accuracy\n",
-        "cmd = confusion_matrix(y_true, y_pred, normalize=\"true\").diagonal()\n",
-        "per_class_accuracy_df = pd.DataFrame(\n",
-        "    [(index, round(value, 4)) for index, value in zip([\"Benign\", \"Attack\"], cmd)],\n",
-        "    columns=[\"type\", \"accuracy\"],\n",
-        ")\n",
-        "per_class_accuracy_df = per_class_accuracy_df.round(2)\n",
-        "display(per_class_accuracy_df)"
-      ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/html": [
-              "<div>\n",
-              "<style scoped>\n",
-              "    .dataframe tbody tr th:only-of-type {\n",
-              "        vertical-align: middle;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe tbody tr th {\n",
-              "        vertical-align: top;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe thead th {\n",
-              "        text-align: right;\n",
-              "    }\n",
-              "</style>\n",
-              "<table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              "    <tr style=\"text-align: right;\">\n",
-              "      <th></th>\n",
-              "      <th>type</th>\n",
-              "      <th>accuracy</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <th>0</th>\n",
-              "      <td>Benign</td>\n",
-              "      <td>1.00</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>1</th>\n",
-              "      <td>Attack</td>\n",
-              "      <td>0.58</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table>\n",
-              "</div>"
-            ],
-            "text/plain": [
-              "     type  accuracy\n",
-              "0  Benign      1.00\n",
-              "1  Attack      0.58"
-            ]
-          }
-        }
+     },
+     "execution_count": null,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data_22_cleaned = pd.read_csv(\"result22022018.csv\")\n",
+    "data_22_cleaned.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "262mxbQDYTNY",
+    "outputId": "7540316e-2c26-49ad-a487-85a4fe0bd84c"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Benign              1042603\n",
+       "Brute Force -Web        249\n",
+       "Brute Force -XSS         79\n",
+       "SQL Injection            34\n",
+       "Name: Label, dtype: int64"
       ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Gfy_LW5zXIj6"
-      },
-      "source": [
-        "We got great results using Pinecone! Let's see what happens if we skip the similarity search step and predict values from the model directly. In other words, let's use the model that created the embeddings as a classifier. It would be interesting to compare its and the similarity search approach accuracy. "
+     },
+     "execution_count": null,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data_22_cleaned.Label.value_counts()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "P6nJdtveYTNY"
+   },
+   "source": [
+    "Let's define a sample that will include all different types of web attacks for this specific date."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "N7vwZk6HYTNY",
+    "outputId": "96c03fc3-c086-4398-b58a-2c5c57eb4ded"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Benign              1638\n",
+       "Brute Force -Web     249\n",
+       "Brute Force -XSS      79\n",
+       "SQL Injection         34\n",
+       "Name: Label, dtype: int64"
       ]
+     },
+     "execution_count": null,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data_sample = data_22_cleaned[-2000:]\n",
+    "data_sample.Label.value_counts()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "neSxNwYckGMK"
+   },
+   "source": [
+    "Now, we will query the test dataset and save predicted and expected results to create a confusion matrix."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 136,
+     "referenced_widgets": [
+      "567d04116b1b4d21bac2348535b750a0",
+      "8f92a228cbc54b30bcb22d0598e9577f",
+      "8fe529938a4f40ac905e145e423d856e",
+      "a4b449113a734e90825bc1ef87ca3d3c",
+      "cda25dc5f86344b3850c92e59085c06c",
+      "3586c81ff82048ed80d69b7a4b5bd6b3",
+      "c427de521f054c4997d69586251bed4f",
+      "90d8d1d9da814d90b0f0bf331102d4df",
+      "7f2cabefd9eb4674a63ef3d56a5be122",
+      "dda94f4a1ea946b7996a928374dda4a5",
+      "ea6f763369cd4b478998ea4d3e8f20e6",
+      "e752273786584dd4baa77ac3f4528849",
+      "aef6058200fa454c90f51760685e25db",
+      "87b4d4b12452401cb82285364dae3576",
+      "8a60896e0288471a91089a03a75b210b",
+      "fe75c2a93c6e4619a2731a4c01a71619",
+      "5c64d617d0d94a6a9797d20f0d1e80f3",
+      "25ac309f2f5d43169ed0bda88300c8d2",
+      "9ebe5f94bbb743058602c9af26cd4eaf",
+      "3db0b4717ac140b78bf7d75e7ebeaf39",
+      "fcc29e1c6b304044a53522c550c4b49d",
+      "cd9e8a060d05491f8c4d74871c9560fa"
+     ]
+    },
+    "id": "8u6cg_1tYTNY",
+    "outputId": "dbe81cb3-88fb-4cd9-91fe-773c960ca108",
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [10:48<00:00, 32.44s/it]\n"
+     ]
+    }
+   ],
+   "source": [
+    "y_true = []\n",
+    "y_pred = []\n",
+    "\n",
+    "BATCH_SIZE = 100\n",
+    "\n",
+    "for i in tqdm(range(0, len(data_sample), BATCH_SIZE)):\n",
+    "    test_data = data_sample.iloc[i : i + BATCH_SIZE, :]\n",
+    "\n",
+    "    # Create vector embedding using the model\n",
+    "    test_vector = intermediate_layer_model.predict(K.constant(test_data.iloc[:, :-1]))\n",
+    "    # Query using the vector embedding\n",
+    "    query_results = []\n",
+    "\n",
+    "    for xq in test_vector.tolist():\n",
+    "        query_res = index.query(vector=xq, top_k=50)\n",
+    "        query_results.append(query_res)\n",
+    "\n",
+    "    ids = [res.id for result in query_results for res in result.matches]\n",
+    "\n",
+    "    for label, res in zip(test_data.Label.values, query_results):\n",
+    "        # Add to the true list\n",
+    "        if label == \"Benign\":\n",
+    "            y_true.append(0)\n",
+    "        else:\n",
+    "            y_true.append(1)\n",
+    "\n",
+    "        counter = Counter(match.id.split(\"_\")[0] for match in res.matches)\n",
+    "\n",
+    "        # Add to the predicted list\n",
+    "        if counter[\"Bru\"] or counter[\"SQL\"]:\n",
+    "            y_pred.append(1)\n",
+    "        else:\n",
+    "            y_pred.append(0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 313
     },
+    "id": "HV3-gkdWYTNZ",
+    "outputId": "20103ea7-713b-4e2b-9590-ecc70ef9b76e"
+   },
+   "outputs": [
     {
-      "cell_type": "code",
-      "metadata": {
-        "id": "Oxya9-mMYh5A"
-      },
-      "source": [
-        "import numpy as np\n",
-        "from tensorflow.keras.utils import normalize\n",
-        "\n",
-        "data_sample = normalize(data_22_cleaned.iloc[:, :-1])[-2000:]\n",
-        "y_pred_model = model.predict(normalize(data_sample)).flatten()\n",
-        "y_pred_model = np.round(y_pred_model)"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 313
-        },
-        "id": "GWssFePDXEks",
-        "outputId": "9eee2c60-f1c1-4a34-c682-665389fe3aee"
-      },
-      "source": [
-        "# Create confusion matrix\n",
-        "conf_matrix = confusion_matrix(y_true, y_pred_model)\n",
-        "\n",
-        "# Show confusion matrix\n",
-        "ax = plt.subplot()\n",
-        "sns.heatmap(conf_matrix, annot=True, ax=ax, cmap=\"Blues\", fmt=\"g\", cbar=False)\n",
-        "\n",
-        "# Add labels, title and ticks\n",
-        "ax.set_xlabel(\"Predicted\")\n",
-        "ax.set_ylabel(\"Acctual\")\n",
-        "ax.set_title(\"Confusion Matrix\")\n",
-        "ax.xaxis.set_ticklabels([\"Benign\", \"Attack\"])\n",
-        "ax.yaxis.set_ticklabels([\"Benign\", \"Attack\"])"
-      ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "[Text(0, 0.5, 'Benign'), Text(0, 1.5, 'Attack')]"
-            ]
-          }
-        },
-        {
-          "output_type": "display_data",
-          "data": {
-            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAEWCAYAAABliCz2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAbqElEQVR4nO3dd5xV9Z3G8c8DI02kI1ixy0Z3xRLjakBRg2JXVAQrKqyxxMTgWhcs0aBJTIwaxbI2jFGz9oKokYglu6JR7A2ChSJNlCbM8N0/zhlyGZmZK94zl5nzvF+vec099fe9w/DMub9zzu8oIjAzs6avWbkLMDOzhuHANzPLCQe+mVlOOPDNzHLCgW9mlhMOfDOznHDgW5MhqbWkRyTNl3Tfd9jP0ZLGlbK2cpD0hKTjy12HrTkc+NbgJA2WNFHSAknT02D6YQl2fTjQDegcEUes7k4i4q6I6FeCelYiaQ9JIen+GvO3S+ePL3I/F0kaU996EdE/Im5fzXKtCXLgW4OSdBbwO+ByknDeGPgDcHAJdt8DeD8iKkuwr6zMAnaV1Llg3vHA+6VqQAn/37Zv8C+FNRhJ7YFLgNMi4v6IWBgRyyLikYg4O12npaTfSZqWfv1OUst02R6SPpX0c0mfp58OhqTLLgZGAAPTTw4n1TwSlrRJeiRdkU6fIGmypK8kTZF0dMH85wu221XSy2lX0cuSdi1YNl7SpZJeSPczTlKXOn4MS4EHgaPS7ZsDRwJ31fhZXS3pE0lfSnpFUu90/r7A+QXv8/WCOi6T9AKwCNgsnXdyuvx6SX8u2P8Vkp6RpKL/Aa3Rc+BbQ/p3oBXwQB3rXADsAvQCtgN2Bi4sWN4daA9sAJwEXCepY0SMJPnUcE9EtI2IW+oqRNLawO+B/hGxDrAr8Noq1usEPJau2xm4CnisxhH6YGAIsC7QAhheV9vAHcBx6et9gLeAaTXWeZnkZ9AJ+CNwn6RWETG2xvvcrmCbY4FhwDrA1Br7+znwb+kfs94kP7vjw2Or5IoD3xpSZ2B2PV0uRwOXRMTnETELuJgkyKotS5cvi4jHgQXA1qtZz3JgW0mtI2J6RLy1inX2Bz6IiDsjojIi7gbeBQ4sWOfWiHg/IhYD95IEda0i4kWgk6StSYL/jlWsMyYi5qRt/gZoSf3v87aIeCvdZlmN/S0CjiH5gzUGOCMiPq1nf9bEOPCtIc0BulR3qdRifVY+Op2azluxjxp/MBYBbb9tIRGxEBgInAJMl/SYpJ5F1FNd0wYF0zNWo547gdOBvqziE0/abfVO2o30Bcmnmrq6igA+qWthRPwfMBkQyR8myxkHvjWkl4AlwCF1rDON5ORrtY35ZndHsRYCbQqmuxcujIgnI+JHwHokR+03FVFPdU2frWZN1e4ETgUeT4++V0i7XM4h6dvvGBEdgPkkQQ1QWzdMnd0zkk4j+aQwDfjP1S/dGisHvjWYiJhPcmL1OkmHSGojaS1J/SVdma52N3ChpK7pyc8RJF0Qq+M1oI+kjdMTxudVL5DUTdJBaV/+1yRdQ1Wr2MfjwFbppaQVkgYC3wMeXc2aAIiIKcDuJOcsaloHqCS5oqdC0gigXcHymcAm3+ZKHElbAb8g6dY5FvhPSXV2PVnT48C3BhURVwFnkZyInUXSDXE6yZUrkITSRGAS8Abwajpvddp6Crgn3dcrrBzSzUhOZE4D5pKE76mr2Mcc4IB03TkkR8YHRMTs1ampxr6fj4hVfXp5EniC5FLNqSSfigq7a6pvKpsj6dX62km70MYAV0TE6xHxAcmVPndWXwFl+SCfpDczywcf4ZuZ5YQD38wsJxz4ZmY54cA3M8uJum6AKavW25/us8m2Rpr38rXlLsGsVq0qqHV8JB/hm5nlhAPfzCwnHPhmZjnhwDczywkHvplZTjjwzcxywoFvZpYTDnwzs5xw4JuZ5YQD38wsJxz4ZmY54cA3M8sJB76ZWU448M3McsKBb2aWEw58M7OccOCbmeWEA9/MLCcc+GZmOeHANzPLCQe+mVlOOPDNzHLCgW9mlhMOfDOznHDgm5nlhAPfzCwnHPhmZjnhwDczywkHvplZTjjwzcxywoFvZpYTDnwzs5xw4JuZ5YQD38wsJxz4ZmY54cA3M8sJB76ZWU448M3McsKBb2aWExVZNyCpOdCtsK2I+Djrds3MbGWZBr6kM4CRwExgeTo7gH/Lsl0zM/umrI/wzwS2jog5GbdjZmb1yLoP/xNgfsZtmJlZEbI+wp8MjJf0GPB19cyIuCrjds3MrIasA//j9KtF+mVmZmWSaeBHxMVZ7t/MzIqX9VU6j5BclVNoPjARGB0RS7Js38zM/inrk7aTgQXATenXlySXaG6VTpuZWQPJug9/+4joUzD9iKTnIqKPpLcybtvMzApkfYTfVdLG1RPp6y7p5NKM2zYzswJZH+H/HHhe0keAgE2BUyWtDdyecdtN2g0jj6Z/n22ZNfcrdjri8hXzf3zU7pwysA+VVcsZO+FNLrj6IXbapgfX/tcgACS47IbHefjZSQAcue+OnH3iPkQE02fN58QLb2fOFwvL8p4sf16Y8BxXjLqM5VXLOXTAEZw0dFi5S2rSFFHznGqJG5BaAj1JAv/dYk/Utt7+9GwLa+R222FzFi76mpsvPW5F4PfZaUvOOXkfDj3jBpYuq6Rrx7bMmreA1q3WYumyKqqqltO9Szv+957z2KzfBQBMHncZOwz4BXO+WMhlZx7MoiXLuGz04+V8a2u8eS9fW+4SmoSqqioO2n8fRt90K926dWPwwMMZ9aur2HyLLcpdWqPWqgLVtiyTLh1Je6bfDwP2BzYHNgP2S+fZd/TCqx8xd/6ileYNO6I3v771KZYuqwRg1rwFACxesoyqqmQoo5Yt1qL6j7yUfK3dOrlFYp22rZk+yzdGW8N4841JbLRRDzbcaCPWatGCfffbn/HPPlPuspq0rLp0dgf+Ahy4imUB3J9Ru7m2RY912W37zbn4tANZsnQZ5131AK+8nQxM+v1te3DDRcew8XqdOOnC21f8ATjz8nt4+d7zWbh4KR99Mouf/vKecr4Fy5HPZ86k+3rdV0yv260bb0yaVMaKmr5MjvAjYmT6fcgqvk6sbTtJwyRNlDSxcrYv4vm2Kpo3o2O7NvQ57tec/9sHGXPlP3/UL785lR0Pv4wfHnMlZ5/Yj5YtKqioaMbQw3uzy6Ar2KzfBbz5/mecfWK/Mr4Dy5P4xi06INXaG2ElkPWNVy2BAcAmrDwe/iWrWj8ibgRuBPfhr47PZn7Bg8+8DsDEt6ayfHnQpWNbZqddOwDvTZnJwsVL2WaL9Vd09E35dDYAf37qVYYPceBbw+jWrTszps9YMf35zJmsu+66Zayo6cv6ssyHgIOBSmBhwZdl4JHxk9hj560A2GLjdWmxVgWz5y2gx/qdad48+afeeL2ObLVJN6ZOm8O0WfPpuVl3unRsC8Beu/TkvSkzat2/WSlts+2/8vHH/+DTTz9h2dKljH38MXbvu2e5y2rSsr4sc8OI2DfjNnLp9l+eQO8dt6RLh7Z8OPZSLr3hcW5/8CVGX3Q0E+87n6XLqjh5xJ0A7Lr9Zgwf0o9llVUsXx6cefk9Ky69vPzGJ3jq5p+yrLKKj6fPZdjIMeV8W5YjFRUVnHfBCH487GSWL6/ikEMHsMUWW5a7rCYt08syJd0IXBMRb3zbbd2lY2sqX5Zpa7K6LsvM+gj/h8AJkqaQjIcvICLCjzg0M2tgWQd+/4z3b2ZmRcr0pG1ETAU2AvZMXy/Kuk0zM1u1TMNX0kjgHOC8dNZagM8KmpmVQdZH24cCB5FeihkR04B1Mm7TzMxWIevAXxrJZUABkI6SaWZmZZB14N8raTTQQdJQ4Gn8pCszs7LI+iHmv5b0I5JHG24NjIiIp7Js08zMVi3ryzJJA/4pSV2AOVm3Z2Zmq5bVePi7SBov6X5J20t6E3gTmCnJQy2YmZVBVkf41wLnA+1JxsXvHxF/k9QTuBsYm1G7ZmZWi6xO2lZExLiIuA+YERF/A4iIdzNqz8zM6pFV4C8veL24xjIPimZmVgZZdelsJ+lLksHSWqevSadbZdSmmZnVIZPAj4jmWezXzMxWnwcyMzPLCQe+mVlOOPDNzHLCgW9mlhMOfDOznHDgm5nlhAPfzCwnHPhmZjnhwDczywkHvplZTjjwzcxywoFvZpYTDnwzs5xw4JuZ5YQD38wsJxz4ZmY54cA3M8sJB76ZWU7U+ohDSTvUtWFEvFr6cszMLCt1PdP2N3UsC2DPEtdiZmYZqjXwI6JvQxZiZmbZqusIfwVJ2wLfA1pVz4uIO7IqyszMSq/ewJc0EtiDJPAfB/oDzwMOfDOzRqSYq3QOB/YCZkTEEGA7oGWmVZmZWckVE/iLI2I5UCmpHfA5sFm2ZZmZWakV04c/UVIH4CbgFWAB8H+ZVmVmZiVXb+BHxKnpyxskjQXaRcSkbMsyM7NSK+akbZ9VzYuI57IpyczMslBMl87ZBa9bATuTdO34xiszs0akmC6dAwunJW0EXJlZRWZmlonVGTztU2DbUhdiZmbZKqYP/xqSsXMg+QPRC3g9y6LMzKz0iross+B1JXB3RLyQUT1mZpaRYgK/Q0RcXThD0pk155mZ2ZpNEVH3CtKrEbFDjXl/j4jtsyzsvRmL6i7MrEw6tW1R7hLMatW1bYVqW1bXA1AGAYOBTSU9XLBoHWBO6cozM7OGUFeXzovAdKALKz8M5SvAd9qamTUydT0AZSowVdLRwLSIWAIgqTWwIfCPBqnQzMxKopjr8O8FlhdMVwH3ZVOOmZllpZjAr4iIpdUT6WuftTIza2SKCfxZkg6qnpB0MDA7u5LMzCwLxVyHfwpwl6Rr0+lPgeOyK8nMzLJQzOBpHwG7SGpLct3+V9mXZWZmpVZvl46kyyV1iIgFEfGVpI6SftEQxZmZWekU04ffPyK+qJ6IiHnAftmVZGZmWSgm8JtLalk9kV6H37KO9c3MbA1UzEnbMcAzkm5Np4cAt2dXkpmZZaGYk7ZXSpoE7A0IGAv0yLowMzMrrWKfeDWD5G7bAcBewDuZVWRmZpmoa7TMrYCjgEEko2PeQ3JZZt8Gqs3MzEqori6dd4EJwIER8SGApJ81SFVmZlZydXXpDCDpynlW0k2S9iLpwzczs0ao1sCPiAciYiDQExgP/AzoJul6Sf0aqD4zMyuRek/aRsTCiLgrIg4gGQf/NeDczCszM7OSqveZtuXiZ9ramsrPtLU1WV3PtC32skwzM2vkHPhmZjnhwDczywkHvplZTjjwzcxywoFvZpYTDnwzs5xw4JuZ5YQD38wsJxz4ZmY54cA3M8sJB76ZWU448M3McsKBb2aWEw58M7OccOCbmeWEA9/MLCcc+GZmOeHANzPLCQe+mVlOZBb4kk5axbxRWbVnZmZ1q8hw34dLWhIRdwFI+gPQMsP2zMysDlkG/mHAw5KWA/2BuRFxaobtmZlZHUoe+JI6FUyeDDwIvABcIqlTRMwtdZtmZlY/RURpdyhNAQJQwfdqERGbFbOf92YsKm1hZiXSqW2LcpdgVquubStU27KSH+FHxKal3qeZmX13WV6lc5qkDgXTHSW5D9/MrEyyvA5/aER8UT0REfOAoRm2Z2Zmdcgy8JtJWtGXJKk54M5PM7MyyfKyzCeBeyXdQHLy9hRgbIbt5dasz2fwu8v+i3lz56BmYp8DB3DQ4YP54603MO7R+2nfoSMAxw49nZ126U1l5TKuufISJr//LlVVVfTdZ3+OOOYb98mZlcTlF1/IixP+SsdOnbjz3ocA+HL+F4w4bzgzpn1G9/U34JJRv6Fdu/YrtpkxfRrHHnEQQ4adxuDjhpSr9CYny8A/B/gP4MckV+qMA27OsL3cat68OSeedhabb/UvLFq0kLOGDqbXTj8A4OAjjuHQo45baf0Xnn2aymVLuea2+/h6yWJOO34AffbqT7f11i9H+dbE7XfgIQw4cjC/GHneinljbruZHb//A44dMpQ7b72JMbfdzKk/+fmK5ddcdQU/2LV3Ocpt0jLr0omI5RFxfUQcHhEDImJ0RFRl1V6ederclc23+hcA2rRZmw17bMqcWbNq30CwZPESqior+frrr6moWIs2a6/dQNVa3vTaYSfatW+/0rwJf32W/gccAkD/Aw5hwvi/rFj23LPPsP4GG7Hp5ls0aJ15kOVVOltK+rOktyVNrv7Kqj1LzJw+jckfvMfW39sWgMce+BNnDDmSq0ddxIKvvgRgtz32plXrVhx/2I846cj+HDLwONZp176u3ZqV1Lw5c+jStSsAXbp2Zd7c5H7MxYsXcdfttzBk2I/LWV6TleVJ21uB64FKoC9wB3BnXRtIGiZpoqSJ99z53xmW1jQtXrSIUSOGc/IZw2mzdlv6H3wEo//4CFff8ic6de7CLdddBcD777xFs2bNue3+cdz0p8d46N47mTHt0zJXbwa33HAdRw4+jjZt/IkzC1n24beOiGckKSKmAhdJmgCMrG2DiLgRuBF8p+23VVm5jFEjhrP73v3Ztc9eAHTs1HnF8n4HHMal5/0EgOeefoIddt6Vioq16NCxEz237cWH775N9/U3LEvtlj8dO3dm9qxZdOnaldmzZtGxUzIiy9tvTmL8M+O4/ve/YcFXX6FmomXLFgwYeHSZK24asgz8JZKaAR9IOh34DFg3w/ZyKyK45oqL2bDHphwy8NgV8+fOmUWnzsnH5r9N+As9Nt0cgK7dujPp1ZfZo9/+fL1kCe+/PYmDjhhcltotn37Ypy9PPPogxw4ZyhOPPkjv3fsC8Idb/tkJcMvo62jduo3DvoRKPpbOih1L3wfeAToAlwLtgCsj4n+L2d5H+MV7e9LfOfeME+mx2ZY0a5bc+nDs0NN57uknmfLheyDRrft6nDr8Qjp17sriRYu4etRIPpk6GSLYq//BHDbo+DK/i8bDY+l8OyPPH85rE1/miy++oFPnzpz0H6fRe4+9GHHuWcycMZ1u3dfj0iuuol37DittVx34vizz26lrLJ0sA/+IiLivvnm1ceDbmsqBb2uyugI/y5O25xU5z8zMGkAW4+H3B/YDNpD0+4JF7Uiu2DEzszLI4qTtNGAicBDwSsH8r4CfZdCemZkVIYvx8F8HXpfULSJuL1wm6Uzg6lK3aWZm9cuyD/+oVcw7IcP2zMysDln04Q8CBgObSnq4YFE7YHap2zMzs+Jk0Yf/IjAd6AL8pmB+AAMzaM/MzIqQRR/+VGAq8O+SepEc7R8JTAH+p9TtmZlZcbLo0tmKpP9+EDAHuIfkBq++pW7LzMyKl0WXzrvABODAiPgQQJIvxzQzK7MsrtIZAMwAnpV0k6S9SJ54ZWZmZVTywI+IByJiINATGE9ys1U3SddL6lfq9szMrDhZPuJwYUTcFREHABsCrwHnZtWemZnVLbPRMr8rj5ZpayqPlmlrsnKNlmlmZmsQB76ZWU448M3McsKBb2aWEw58M7OccOCbmeWEA9/MLCcc+GZmOeHANzPLCQe+mVlOOPDNzHLCgW9mlhMOfDOznHDgm5nlhAPfzCwnHPhmZjnhwDczywkHvplZTjjwzcxywoFvZpYTDnwzs5xw4JuZ5YQD38wsJxz4ZmY54cA3M8sJB76ZWU448M3McsKBb2aWEw58M7OccOCbmeWEA9/MLCcc+GZmOeHANzPLCQe+mVlOOPDNzHLCgW9mlhOKiHLXYA1A0rCIuLHcdZjV5N/NhuMj/PwYVu4CzGrh380G4sA3M8sJB76ZWU448PPDfaS2pvLvZgPxSVszs5zwEb6ZWU448M3McsKB3whIqpL0mqTXJb0qadfvsK9LJO1dyvrMACQdKikk9UynN5E0uGB5L0n7fYf9/0NSl1LUmlcO/MZhcUT0iojtgPOAX67ujiJiREQ8XbrSzFYYBDwPHJVObwIMLljeC1jtwLfvzoHf+LQD5lVPSDpb0suSJkm6OJ23iaR3JN0k6S1J4yS1TpfdJunw9PV+kt6V9Lyk30t6NJ1/kaT/ljRe0mRJPynD+7RGRFJbYDfgJP4Z+KOA3umn03OAS4CB6fRASTtLelHS39PvW6f7ai7p15LeSH+vz6jRVmtJYyUNbcC32CRUlLsAK0prSa8BrYD1gD0BJPUDtgR2BgQ8LKkP8HE6f1BEDJV0LzAAGFO9Q0mtgNFAn4iYIunuGm32BPoC6wDvSbo+IpZl+SatUTsEGBsR70uaK2kH4FxgeEQcACBpJrBTRJyeTrcj+f2rTLsZLyf5PR0GbApsny7rVNBOW+BPwB0RcUeDvbsmwkf4jUN1l05PYF/gDkkC+qVffwdeJQnpLdNtpkTEa+nrV0g+XhfqCUyOiCnpdM3Afywivo6I2cDnQLdSviFrcgaRBDHp90FFbNMeuE/Sm8BvgW3S+XsDN0REJUBEzC3Y5iHgVof96vERfiMTES+lJ666khzV/zIiRheuI2kT4OuCWVVA6xq7Uj1N1dzevyu2SpI6k3zq3FZSAM2BAB6vZ9NLgWcj4tD0d3Z89S7T7VflBaC/pD+GbyL61nyE38ikV0A0B+YATwInpv2nSNpA0rpF7updYLP0PxrAwBKXavlxOEkXS4+I2CQiNgKmAMtJugSrfVVjuj3wWfr6hIL544BTJFUA1OjSGUHyu/+Hkr6DnHDgNw6t0xNdrwH3AMdHRFVEjAP+CLwk6Q3gz6z8H6pWEbEYOBUYK+l5YCYwP5vyrYkbBDxQY97/kJy8rUwvJ/4Z8CzwveqTtsCVwC8lvUByEFPtZpLzUJMkvc7KV/oA/BRoJenKDN5Lk+ahFXJMUtuIWJCeD7gO+CAiflvuuswsGz7Cz7eh6aeGt0g+Xo+uZ30za8R8hG9mlhM+wjczywkHvplZTjjwzcxywoFvTVbBKKNvSrpPUpvvsK89CsYaOkjSuXWs20HSqavRxkWShq9ujWb1ceBbU1Y9JMW2wFLglMKFSnzr/wMR8XBEjKpjlQ4k9ziYrVEc+JYXE4AtCkYS/QPJ+EMbSeon6aX0WQP3Fdy5vG/1aKLAYdU7knSCpGvT190kPZDeXPS6kmcVjAI2Tz9d/Cpd7xujmqbzL5D0nqSnga0b7KdhueTAtyYvvUW/P/BGOmtrkqEAtgcWAhcCe0fEDsBE4Kx0NNGbgAOB3kD3Wnb/e+Cv6bMKdiC5p+Fc4KP008XZNUY17QXsKKmPpB1J7kbdnuQPyvdL/NbNVuIBsawpqx5WGpIj/FuA9YGpEfG3dP4uwPeAF5IbjmkBvEQymuiUiPgAQNIYkmF7a9oTOA4gIqqA+ZI61lincFRTSIb43ZJkGIwHImJR2sbD3+ndmtXDgW9N2eKI6FU4Iw31hYWzgKciYlCN9XpR+4iN31Zto5r+tIRtmNXLXTqWd38DdpO0BYCkNpK2IhlNdFNJm6fr1Ta++zPAj9Ntm6cP9ag5KmRto5o+BxyaPsFpHZLuI7PMOPAt1yJiFsnQvHdLmkTyB6BnRCwh6cJ5LD1pO7WWXZwJ9E1HK30F2CYi5pB0Eb0p6Ve1jWoaEa+SjH76GsnokhMye6NmeCwdM7Pc8BG+mVlOOPDNzHLCgW9mlhMOfDOznHDgm5nlhAPfzCwnHPhmZjnx/zT+z1Wh63oMAAAAAElFTkSuQmCC",
-            "text/plain": [
-              "<Figure size 432x288 with 1 Axes>"
-            ]
-          }
-        }
+     "data": {
+      "text/plain": [
+       "[Text(0, 0.5, 'Benign'), Text(0, 1.5, 'Attack')]"
       ]
+     },
+     "execution_count": null,
+     "metadata": {},
+     "output_type": "execute_result"
     },
     {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "VJRwvXOvYtBL",
-        "outputId": "9d45426b-6bd8-4374-dcf8-9ffecedd7176"
-      },
-      "source": [
-        "# Calculate accuracy\n",
-        "acc = accuracy_score(y_true, y_pred_model, normalize=True, sample_weight=None)\n",
-        "precision = precision_score(y_true, y_pred_model)\n",
-        "recall = recall_score(y_true, y_pred_model)\n",
-        "\n",
-        "print(f\"Accuracy: {acc:.3f}\")\n",
-        "print(f\"Precision: {precision:.3f}\")\n",
-        "print(f\"Recall: {recall:.3f}\")"
-      ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "Accuracy: 0.871\n",
-            "Precision: 1.000\n",
-            "Recall: 0.287\n"
-          ]
-        }
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAEWCAYAAABliCz2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAbKUlEQVR4nO3debxVdb3/8debc5RBQEZxKBAcILUUtX5eS1IzE6+aM6JlGE455HXWMBwoLbNBrzl3zTmnLEwihyupZDeQxCEpUqIUQTgCKjId+Pz+WOvQ5sg5Z4t7nQ3n+34+HufBXsNe38+Gzfus/V3f/V2KCMzMrO1rV+0CzMysdTjwzcwS4cA3M0uEA9/MLBEOfDOzRDjwzcwS4cC3NkNSR0kPS1oo6f6PcJxjJD1aydqqQdJvJX2t2nXYusOBb61O0tGSJkt6T9KbeTB9rgKHPhzoA/SMiCPW9iARcVdE7FuBelYjaU9JIemXjdbvmK+fUOZxLpF0Z0v7RcTQiLhtLcu1NsiBb61K0lnAT4DLycK5L3Ad8OUKHL4f8LeIqK/AsYoyF9hdUs+SdV8D/lapBpTx/237AL8prNVI2hi4DDg1In4ZEYsiYnlEPBwR5+b7tJf0E0mz8p+fSGqfb9tT0uuSzpb0Vv7p4Lh826XAaGBY/slhZOMzYUlb5mfStfnyCEmvSXpX0gxJx5Ssf6bkebtLmpR3FU2StHvJtgmSxkiamB/nUUm9mvlrWAb8Cjgqf34NcCRwV6O/q6sl/UvSO5Kek7RHvn4/4Fslr3NqSR3flTQReB8YkK87Pt9+vaQHSo7/fUlPSFLZ/4C23nPgW2v6D6AD8FAz+4wCdgN2AnYEPgNcVLJ9U2BjYAtgJPBTSd0j4mKyTw33RkTniPhZc4VI2gi4BhgaEV2A3YHn17BfD+CRfN+ewI+ARxqdoR8NHAdsAmwInNNc28DtwLH54y8BLwOzGu0ziezvoAdwN3C/pA4RMb7R69yx5DlfBU4EugAzGx3vbOBT+S+zPcj+7r4WnlslKQ58a009gXktdLkcA1wWEW9FxFzgUrIga7A83748IsYB7wED17KelcAOkjpGxJsR8fIa9vlPYHpE3BER9RFxDzANOLBkn1sj4m8RsRi4jyyomxQRfwB6SBpIFvy3r2GfOyOiLm/zh0B7Wn6dP4+Il/PnLG90vPeBr5D9wroTOD0iXm/heNbGOPCtNdUBvRq6VJqwOaufnc7M1606RqNfGO8DnT9sIRGxCBgGnAy8KekRSYPKqKehpi1KlmevRT13AKcBe7GGTzx5t9UreTfSArJPNc11FQH8q7mNEfEn4DVAZL+YLDEOfGtNzwJLgIOb2WcW2cXXBn35YHdHuRYBnUqWNy3dGBG/i4gvApuRnbXfXEY9DTW9sZY1NbgDOAUYl599r5J3uZxP1rffPSK6AQvJghqgqW6YZrtnJJ1K9klhFnDe2pdu6ysHvrWaiFhIdmH1p5IOltRJ0gaShkq6Mt/tHuAiSb3zi5+jybog1sbzwBBJffMLxhc2bJDUR9JBeV/+UrKuoRVrOMY4YNt8KGmtpGHAdsBv1rImACJiBvB5smsWjXUB6slG9NRKGg10Ldk+B9jyw4zEkbQt8B2ybp2vAudJarbrydoeB761qoj4EXAW2YXYuWTdEKeRjVyBLJQmAy8ALwJT8nVr09ZjwL35sZ5j9ZBuR3YhcxbwNln4nrKGY9QBB+T71pGdGR8QEfPWpqZGx34mItb06eV3wG/JhmrOJPtUVNpd0/ClsjpJU1pqJ+9CuxP4fkRMjYjpZCN97mgYAWVpkC/Sm5mlwWf4ZmaJcOCbmSXCgW9mlggHvplZIpr7AkxVdRx8mq8m2zpp/qRrq12CWZM61NLk/Eg+wzczS4QD38wsEQ58M7NEOPDNzBLhwDczS4QD38wsEQ58M7NEOPDNzBLhwDczS4QD38wsEQ58M7NEOPDNzBLhwDczS4QD38wsEQ58M7NEOPDNzBLhwDczS4QD38wsEQ58M7NEOPDNzBLhwDczS4QD38wsEQ58M7NEOPDNzBLhwDczS4QD38wsEQ58M7NEOPDNzBLhwDczS4QD38wsEQ58M7NEOPDNzBLhwDczS4QD38wsEQ58M7NEOPDNzBLhwDczS4QD38wsEQ58M7NE1BbdgKQaoE9pWxHxz6LbNTOz1RUa+JJOBy4G5gAr89UBfKrIds3M7IOKPsM/AxgYEXUFt2NmZi0oug//X8DCgtswM7MyFH2G/xowQdIjwNKGlRHxo4LbNTOzRooO/H/mPxvmP2ZmViWFBn5EXFrk8c3MrHxFj9J5mGxUTqmFwGTgxohYUmT7Zmb2b0VftH0NeA+4Of95h2yI5rb5spmZtZKi+/AHR8SQkuWHJT0VEUMkvVxw22ZmVqLoM/zekvo2LOSPe+WLywpu28zMShR9hn828IykVwEB/YFTJG0E3FZw223aDRcfw9AhOzD37XfZ9YjLV63/xlGf5+RhQ6hfsZLxT7/EqKt/za7b9+Pabw8HQILv3jCOsU++QOdO7Xn8f85c9dwtNunGL8ZN4tyrHmz112PpGX3RhTz1+wn06NGTX/76N9UuJwmKaHxNtcINSO2BQWSBP63cC7UdB59WbGHruc/uvBWL3l/KLWOOXRX4Q3bdhvOP/xKHnH4Dy5bX07t7Z+bOf4+OHTZg2fIVrFixkk17deX/7r2QAfuOYsWKlasdc+Jd53HeDx9k4pRXq/GS1hvzJ11b7RLahOcmT6JTp06MuvB8B34FdahFTW0r5Axf0t4R8b+SDm20aYAkIuKXRbSbkolTXqXvZj1WW3fiEXtw1a2PsWx5PQBz578HwOIly1ft037DDVjTL/mt+vZmkx5dHPbWanbZ9dO88cbr1S4jKUV16Xwe+F/gwDVsC8CBX4Ct+23CZwdvxaWnHsiSZcu58EcP8dxfsolJP71DP2645Cv03awHIy+67QNn90futwsPPDqlGmWbWSspJPAj4uL8z+M+zPMknQicCFD7sT2p7bV9AdW1XbU17ejetRNDjr2KXbfvx51Xfp1PHHAJAJNemskuh3+Xgf37cMtlX+V3E//C0mX1q557xJd2YeRFt1epcjNrDUV/8ao9cBiwJavPh3/ZmvaPiJuAm8B9+GvjjTkL+NUTUwGY/PJMVq4MenXvzLy8awfgrzPmsGjxMrbfenOm5Gf/n9x2C2pravjzK/+qSt1m1jqKHpb5a+DLQD2wqOTHCvDwhBfY8zPbArB1303YcINa5s1/j36b96SmJvun7rtZd7bdsg8zZ/17xuoj99uF+8ZPrkrNZtZ6ih6W+bGI2K/gNpJ02xUj2GOXbejVrTN/Hz+GMTeM47ZfPcuNlxzD5Pu/xbLlKzh+9B0A7D54AOccty/L61ewcmVwxuX3Urfg3793D/vizhx8+vXVeimWqPPPOYvJk/7EggXz+eLeQ/jGqadz6GFHVLusNq3QYZmSbgL+OyJe/LDPdZeOras8LNPWZa0+LLPE54ARkmaQzYcvICLCtzg0M2tlRQf+0IKPb2ZmZSr0om1EzAQ+DuydP36/6DbNzGzNCg1fSRcD5wMX5qs2AO4ssk0zM1uzos+2DwEOIh+KGRGzgC4Ft2lmZmtQdOAvi2wYUADks2SamVkVFB3490m6Eegm6QTgcXynKzOzqij6JuZXSfoi2a0NBwKjI+KxIts0M7M1K3pYJnnAPyapF1DX0v5mZlaMQrp0JO0maYKkX0oaLOkl4CVgjiRPtWBmVgVFneFfC3wL2JhsXvyhEfFHSYOAe4DxBbVrZmZNKOqibW1EPBoR9wOzI+KPABExraD2zMysBUUFfuntlBY32uZJ0czMqqCoLp0dJb1DNllax/wx+XKHgto0M7NmFHWLw5oijmtmZmvPE5mZmSXCgW9mlggHvplZIhz4ZmaJcOCbmSXCgW9mlggHvplZIhz4ZmaJcOCbmSXCgW9mlggHvplZIhz4ZmaJcOCbmSXCgW9mlggHvplZIhz4ZmaJcOCbmSXCgW9mlogmb3EoaefmnhgRUypfjpmZFaW5e9r+sJltAexd4VrMzKxATQZ+ROzVmoWYmVmxmjvDX0XSDsB2QIeGdRFxe1FFmZlZ5bUY+JIuBvYkC/xxwFDgGcCBb2a2HilnlM7hwBeA2RFxHLAj0L7QqszMrOLKCfzFEbESqJfUFXgLGFBsWWZmVmnl9OFPltQNuBl4DngP+FOhVZmZWcW1GPgRcUr+8AZJ44GuEfFCsWWZmVmllXPRdsia1kXEU8WUZGZmRSinS+fckscdgM+Qde34i1dmZuuRcrp0DixdlvRx4MrCKjIzs0KszeRprwM7VLoQMzMrVjl9+P9NNncOZL8gdgKmFlmUmZlVXlnDMkse1wP3RMTEguoxM7OClBP43SLi6tIVks5ovM7MzNZtiojmd5CmRMTOjdb9OSIGF1nYrAXLmi/MrEqW1q+sdglmTerfq4Oa2tbcDVCGA0cD/SWNLdnUBairXHlmZtYamuvS+QPwJtCL1W+G8i7gb9qama1nmrsBykxgpqRjgFkRsQRAUkfgY8A/WqVCMzOriHLG4d8HlHZargDuL6YcMzMrSjmBXxsRyxoW8scbFleSmZkVoZzAnyvpoIYFSV8G5hVXkpmZFaGccfgnA3dJujZffh04triSzMysCOVMnvYqsJukzmTj9t8tviwzM6u0Frt0JF0uqVtEvBcR70rqLuk7rVGcmZlVTjl9+EMjYkHDQkTMB/YvriQzMytCOYFfI6l9w0I+Dr99M/ubmdk6qJyLtncCT0i6NV8+DrituJLMzKwI5Vy0vVLSC8A+gIDxQL+iCzMzs8oq945Xs8m+bXsY8AXglcIqMjOzQjQ3W+a2wFHAcLLZMe8lG5a5VyvVZmZmFdRcl8404GngwIj4O4CkM1ulKjMzq7jmunQOI+vKeVLSzZK+QNaHb2Zm66EmAz8iHoqIYcAgYAJwJtBH0vWS9m2l+szMrEJavGgbEYsi4q6IOIBsHvzngQsKr8zMzCqqxXvaVovvaWvrKt/T1tZlzd3TttxhmWZmtp5z4JuZJcKBb2aWCAe+mVkiHPhmZolw4JuZJcKBb2aWCAe+mVkiHPhmZolw4JuZJcKBb2aWCAe+mVkiHPhmZolw4JuZJcKBb2aWCAe+mVkiHPhmZolw4JuZJcKBb2aWCAe+mVkiCgt8SSPXsO57RbVnZmbNqy3w2IdLWhIRdwFIug5oX2B7ZmbWjCID/1BgrKSVwFDg7Yg4pcD2zMysGRUPfEk9ShaPB34FTAQuk9QjIt6udJtmZtYyRURlDyjNAAJQyZ8NIiIGlHOcWQuWVbYwswpZWr+y2iWYNal/rw5qalvFz/Ajon+lj2lmZh9dkaN0TpXUrWS5uyT34ZuZVUmR4/BPiIgFDQsRMR84ocD2zMysGUUGfjtJq/qSJNUAGxbYnpmZNaPIYZm/A+6TdAPZxduTgfEFtpes74/5Nn+c+BTduvfg1nseAuDnN1/HI79+kI27dQfg+G98k90+O4RXXn6RH15xKQARwYgTTmGPPb9QtdqtbZs7ZzY/GDOK+W/XIYn9v3w4Bx95DO++s5DLv30ec2bPos+mm/OtMT+gS9euLF++nGuuvIzp0/6C2rXj5DPOY8edP13tl9FmVHyUzqoDS+2Ak4AvkI3UeRS4JSJWlPN8j9Ip39Q/T6Zjx05ccemo1QK/Y8dODPvKiNX2XbJkMRvUbkBNbS118+Zy/FcO54HfPEFNbZG/+9sWj9IpX928ubxdN49tBn6C9xct4vSRRzH6ip/w2LixdOnalWFfHcm9d/yM9959h5GnnMnYB3/B9Gkvc/aoMSyYX8dFZ5/KNbfcTbt2ngWmXM2N0insbzEiVkbE9RFxeEQcFhE3lhv29uHsOHhXunbduKx9O3TouCrcly1bSpPvDLMK6NmrN9sM/AQAnTbaiI/3G0Dd3Ld49ukn2WfoQQDsM/Qg/vDUkwD88x+vsdOu/w+Abt170rlzF6ZPe7k6xbdBRY7S2UbSA5L+Ium1hp+i2rMPeuiBexh5zKF8f8y3efedhavW/+WlFxhx1MF8/ehDOfOC0T67t1Yx+803eHX6NAZu/0kWzH+bnr16A9kvhYULsu9jDth6W559egIr6uuZPet1pv/1FebOmVPNstuUIj8n3QpcD9QDewG3A3c09wRJJ0qaLGnynT+/pcDS2r6DDj2Sux4cx813PEDPXr257uqrVm3bbodP8fNf/Iobbv0Fd992C8uWLq1ipZaCxe+/z3dGnc1J3zyXjTbq3OR+X/rPg+nduw+njzyaG67+AdvtsCM1tTWtWGnbVuSpXceIeEKSImImcImkp4GLm3pCRNwE3ATuw/+oevTsterxAV8+jAvPPu0D+/TrP4AOHToy47W/M/AT27dmeZaQ+vrljBl1Fnvtuz+f23MfALp170HdvLn07NWbunlz2bhbNiNLTW0tJ51x7qrnnnnSsWz+sb5VqbstKvIMf0l+4Xa6pNMkHQJsUmB7VqJu3txVj5/+/RP0H7A1AG/Oep0V9fUAzH5zFv/65z/YdLPNq1KjtX0RwY+vuIS+/QZw2FHHrlq/2+f25PHfjgXg8d+O5T/22AvIBhUsWfw+AFP+9Cw1NTX0679V6xfeRhU5SufTwCtAN2AM0BW4MiL+r5zn+wy/fGMuOo/np0xi4YIFdO/RgxEnnsrU5ybx9+nTkMSmm23BWReMpmev3jw67mHuvv1n1NbW0q5dO44deRKf+7yHZX4YHqVTvpemTuGcU45jy622oZ2y88sRJ53OoO0/yeXfPpe35sxmkz6bMuo7V9Gl68bMfvMNRp35Ddq1a0fP3ptw5oWX0GdTn5B8GM2N0iky8I+IiPtbWtcUB76tqxz4ti6ryrBM4MIy15mZWSsoYj78ocD+wBaSrinZ1JVsxI6ZmVVBEaN0ZgGTgYOA50rWvwucWUB7ZmZWhiLmw58KTJXUJyJuK90m6Qzg6kq3aWZmLSuyD/+oNawbUWB7ZmbWjCL68IcDRwP9JY0t2dQVmFfp9szMrDxF9OH/AXgT6AX8sGR9AMMKaM/MzMpQRB/+TGAm8B+SdiI72z8SmAE8WOn2zMysPEV06WxL1n8/HKgD7iX7gtdelW7LzMzKV0SXzjTgaeDAiPg7gCQPxzQzq7IiRukcBswGnpR0s6SGO16ZmVkVVTzwI+KhiBgGDAImkH3Zqo+k6yXtW+n2zMysPEXe4nBRRNwVEQcAHwOeBy4oqj0zM2teYbNlflSeLdPWVZ4t09Zl1Zot08zM1iEOfDOzRDjwzcwS4cA3M0uEA9/MLBEOfDOzRDjwzcwS4cA3M0uEA9/MLBEOfDOzRDjwzcwS4cA3M0uEA9/MLBEOfDOzRDjwzcwS4cA3M0uEA9/MLBEOfDOzRDjwzcwS4cA3M0uEA9/MLBEOfDOzRDjwzcwS4cA3M0uEA9/MLBEOfDOzRDjwzcwS4cA3M0uEA9/MLBEOfDOzRDjwzcwS4cA3M0uEA9/MLBEOfDOzRDjwzcwS4cA3M0uEIqLaNVgrkHRiRNxU7TrMGvN7s/X4DD8dJ1a7ALMm+L3ZShz4ZmaJcOCbmSXCgZ8O95HausrvzVbii7ZmZonwGb6ZWSIc+GZmiXDgrwckrZD0vKSpkqZI2v0jHOsySftUsj4zAEmHSApJg/LlLSUdXbJ9J0n7f4Tj/0NSr0rUmioH/vphcUTsFBE7AhcCV6ztgSJidEQ8XrnSzFYZDjwDHJUvbwkcXbJ9J2CtA98+Ogf++qcrML9hQdK5kiZJekHSpfm6LSW9IulmSS9LelRSx3zbzyUdnj/eX9I0Sc9IukbSb/L1l0j6H0kTJL0m6ZtVeJ22HpHUGfgsMJJ/B/73gD3yT6fnA5cBw/LlYZI+I+kPkv6c/zkwP1aNpKskvZi/r09v1FZHSeMlndCKL7FNqK12AVaWjpKeBzoAmwF7A0jaF9gG+AwgYKykIcA/8/XDI+IESfcBhwF3NhxQUgfgRmBIRMyQdE+jNgcBewFdgL9Kuj4ilhf5Im29djAwPiL+JultSTsDFwDnRMQBAJLmALtGxGn5cley91993s14Odn79ESgPzA439ajpJ3OwC+A2yPi9lZ7dW2Ez/DXDw1dOoOA/YDbJQnYN//5MzCFLKS3yZ8zIyKezx8/R/bxutQg4LWImJEvNw78RyJiaUTMA94C+lTyBVmbM5wsiMn/HF7GczYG7pf0EvBjYPt8/T7ADRFRDxARb5c859fArQ77teMz/PVMRDybX7jqTXZWf0VE3Fi6j6QtgaUlq1YAHRsdSi001fj5fq/YGknqSfapcwdJAdQAAYxr4aljgCcj4pD8PTuh4ZD589dkIjBU0t3hLxF9aD7DX8/kIyBqgDrgd8DX8/5TJG0haZMyDzUNGJD/RwMYVuFSLR2Hk3Wx9IuILSPi48AMYCVZl2CDdxstbwy8kT8eUbL+UeBkSbUAjbp0RpO996+r6CtIhAN//dAxv9D1PHAv8LWIWBERjwJ3A89KehF4gNX/QzUpIhYDpwDjJT0DzAEWFlO+tXHDgYcarXuQ7OJtfT6c+EzgSWC7hou2wJXAFZImkp3ENLiF7DrUC5KmsvpIH4D/AjpIurKA19KmeWqFhEnqHBHv5dcDfgpMj4gfV7suMyuGz/DTdkL+qeFlso/XN7awv5mtx3yGb2aWCJ/hm5klwoFvZpYIB76ZWSIc+NZmlcwy+pKk+yV1+gjH2rNkrqGDJF3QzL7dJJ2yFm1cIumcta3RrCUOfGvLGqak2AFYBpxculGZD/1/ICLGRsT3mtmlG9l3HMzWKQ58S8XTwNYlM4leRzb/0Mcl7Svp2fxeA/eXfHN5v4bZRIFDGw4kaYSka/PHfSQ9lH+5aKqyexV8D9gq/3Txg3y/D8xqmq8fJemvkh4HBrba34YlyYFvbV7+Ff2hwIv5qoFkUwEMBhYBFwH7RMTOwGTgrHw20ZuBA4E9gE2bOPw1wO/zexXsTPadhguAV/NPF+c2mtV0J2AXSUMk7UL2bdTBZL9QPl3hl262Gk+IZW1Zw7TSkJ3h/wzYHJgZEX/M1+8GbAdMzL5wzIbAs2Szic6IiOkAku4km7a3sb2BYwEiYgWwUFL3RvuUzmoK2RS/25BNg/FQRLyftzH2I71asxY48K0tWxwRO5WuyEN9Uekq4LGIGN5ov51oesbGD6upWU3/q4JtmLXIXTqWuj8Cn5W0NYCkTpK2JZtNtL+krfL9mprf/QngG/lza/KbejSeFbKpWU2fAg7J7+DUhaz7yKwwDnxLWkTMJZua9x5JL5D9AhgUEUvIunAeyS/azmziEGcAe+WzlT4HbB8RdWRdRC9J+kFTs5pGxBSy2U+fJ5td8unCXqgZnkvHzCwZPsM3M0uEA9/MLBEOfDOzRDjwzcwS4cA3M0uEA9/MLBEOfDOzRPx/QQOreDywqpcAAAAASUVORK5CYII=",
+      "text/plain": [
+       "<Figure size 432x288 with 1 Axes>"
       ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 112
-        },
-        "id": "TFbfVi_-W6GT",
-        "outputId": "68facf43-ac41-44de-d59a-a489627cc893"
-      },
-      "source": [
-        "# Calculate per class accuracy\n",
-        "cmd = confusion_matrix(y_true, y_pred_model, normalize=\"true\").diagonal()\n",
-        "per_class_accuracy_df = pd.DataFrame(\n",
-        "    [(index, round(value, 4)) for index, value in zip([\"Benign\", \"Attack\"], cmd)],\n",
-        "    columns=[\"type\", \"accuracy\"],\n",
-        ")\n",
-        "per_class_accuracy_df = per_class_accuracy_df.round(2)\n",
-        "display(per_class_accuracy_df)"
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# Create confusion matrix\n",
+    "conf_matrix = confusion_matrix(y_true, y_pred)\n",
+    "\n",
+    "# Show confusion matrix\n",
+    "ax = plt.subplot()\n",
+    "sns.heatmap(conf_matrix, annot=True, ax=ax, cmap=\"Blues\", fmt=\"g\", cbar=False)\n",
+    "\n",
+    "# Add labels, title and ticks\n",
+    "ax.set_xlabel(\"Predicted\")\n",
+    "ax.set_ylabel(\"Acctual\")\n",
+    "ax.set_title(\"Confusion Matrix\")\n",
+    "ax.xaxis.set_ticklabels([\"Benign\", \"Attack\"])\n",
+    "ax.yaxis.set_ticklabels([\"Benign\", \"Attack\"])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "-H7rWFguYTNZ"
+   },
+   "source": [
+    "Now we can calculate overall accuracy and per class accuracy."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "uHTInLt2YTNZ",
+    "outputId": "e48f2726-5eba-4d17-ffe7-7b17c62d5ee8"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Accuracy: 0.923\n",
+      "Precision: 0.995\n",
+      "Recall: 0.577\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Calculate accuracy\n",
+    "acc = accuracy_score(y_true, y_pred, normalize=True, sample_weight=None)\n",
+    "precision = precision_score(y_true, y_pred)\n",
+    "recall = recall_score(y_true, y_pred)\n",
+    "\n",
+    "print(f\"Accuracy: {acc:.3f}\")\n",
+    "print(f\"Precision: {precision:.3f}\")\n",
+    "print(f\"Recall: {recall:.3f}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 112
+    },
+    "id": "ZNzyqAH9YTNZ",
+    "outputId": "7d2e35a1-df80-47bc-f5e7-fd425e79a7f4"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>type</th>\n",
+       "      <th>accuracy</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Benign</td>\n",
+       "      <td>1.00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Attack</td>\n",
+       "      <td>0.58</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
       ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/html": [
-              "<div>\n",
-              "<style scoped>\n",
-              "    .dataframe tbody tr th:only-of-type {\n",
-              "        vertical-align: middle;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe tbody tr th {\n",
-              "        vertical-align: top;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe thead th {\n",
-              "        text-align: right;\n",
-              "    }\n",
-              "</style>\n",
-              "<table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              "    <tr style=\"text-align: right;\">\n",
-              "      <th></th>\n",
-              "      <th>type</th>\n",
-              "      <th>accuracy</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <th>0</th>\n",
-              "      <td>Benign</td>\n",
-              "      <td>1.00</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>1</th>\n",
-              "      <td>Attack</td>\n",
-              "      <td>0.29</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table>\n",
-              "</div>"
-            ],
-            "text/plain": [
-              "     type  accuracy\n",
-              "0  Benign      1.00\n",
-              "1  Attack      0.29"
-            ]
-          }
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "_E5rnFehXSls"
-      },
-      "source": [
-        "As we can see, the direct application of our model produced much worse results. Pinecone's similarity search over the same model's embeddings improved our threat detection (i.e., \"Attack\") accuracy by over 50%!"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "OiT0JgQxktOC"
-      },
-      "source": [
-        "### Result summary"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "UBBv6-dnfEeX"
-      },
-      "source": [
-        "Using standard vector embeddings with Pinecone's similarity search service, we detected 85% of the attacks while keeping a low 3% false-positive rate. We also showed that our similarity search approach outperforms the direct classification approach that utilizes the classifier's embedding model. Similarity search-based detection gained 50% higher accuracy compared to the direct detector.\n",
-        "\n",
-        "[Original published results](https://github.com/rambasnet/DeepLearning-IDS/blob/master/graphics/confusion_matrices/) for 02-22-2018 show that the model was able to correctly detect 208520 benign cases out of 208520 benign cases, and 24 (18+1+5) attacks out of 70 attacks in the test set making this model **34.3% accurate in predicting attacks**. For testing purposes, 20% of the data for 02-22-2018 was used. \n",
-        "\n",
-        "![02-22-2018--6-15%281%29.png](https://raw.githubusercontent.com/rambasnet/DeepLearning-IDS/master/graphics/confusion_matrices/02-22-2018--6-15(1).png)\n",
-        "\n",
-        "As you can see, the model's performance for creating embeddings for Pinecone was much higher. \n",
-        "\n",
-        "The model we have created follows the academic paper ([model for the same date](https://github.com/rambasnet/DeepLearning-IDS/blob/master/keras_tensorflow_models/) (02-23-2018)) and is slightly modified, but still a straightforward, sequential, shallow model. We have changed the number of classes from four (Benign, BruteForce-Web, BruteForce-XSS, SQL-Injection) to two (Benign and Attack), only interested in whether we are detecting an attack or not. We have also changed validation metrics to precision and recall. These changes improved our results. Yet, there is still room for further improvements, for example, by adding more data covering multiple days and different types of attacks."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "JdZcEl1pfEeX"
-      },
-      "source": [
-        "## Delete the Index"
+      "text/plain": [
+       "     type  accuracy\n",
+       "0  Benign      1.00\n",
+       "1  Attack      0.58"
       ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# Calculate per class accuracy\n",
+    "cmd = confusion_matrix(y_true, y_pred, normalize=\"true\").diagonal()\n",
+    "per_class_accuracy_df = pd.DataFrame(\n",
+    "    [(index, round(value, 4)) for index, value in zip([\"Benign\", \"Attack\"], cmd)],\n",
+    "    columns=[\"type\", \"accuracy\"],\n",
+    ")\n",
+    "per_class_accuracy_df = per_class_accuracy_df.round(2)\n",
+    "display(per_class_accuracy_df)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "Gfy_LW5zXIj6"
+   },
+   "source": [
+    "We got great results using Pinecone! Let's see what happens if we skip the similarity search step and predict values from the model directly. In other words, let's use the model that created the embeddings as a classifier. It would be interesting to compare its and the similarity search approach accuracy. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "Oxya9-mMYh5A"
+   },
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "from tensorflow.keras.utils import normalize\n",
+    "\n",
+    "data_sample = normalize(data_22_cleaned.iloc[:, :-1])[-2000:]\n",
+    "y_pred_model = model.predict(normalize(data_sample)).flatten()\n",
+    "y_pred_model = np.round(y_pred_model)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 313
     },
+    "id": "GWssFePDXEks",
+    "outputId": "9eee2c60-f1c1-4a34-c682-665389fe3aee"
+   },
+   "outputs": [
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "hyxLeOnSfEeX"
-      },
-      "source": [
-        "Delete the index once you are sure that you do not want to use it anymore. Once it is deleted, you cannot reuse it."
+     "data": {
+      "text/plain": [
+       "[Text(0, 0.5, 'Benign'), Text(0, 1.5, 'Attack')]"
       ]
+     },
+     "execution_count": null,
+     "metadata": {},
+     "output_type": "execute_result"
     },
     {
-      "cell_type": "code",
-      "metadata": {
-        "id": "ywvaJrVkfEeX"
-      },
-      "source": [
-        "pc.delete_index(index_name)"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Hu2mIbHms3k7"
-      },
-      "source": [
-        "---"
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAEWCAYAAABliCz2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAbqElEQVR4nO3dd5xV9Z3G8c8DI02kI1ixy0Z3xRLjakBRg2JXVAQrKqyxxMTgWhcs0aBJTIwaxbI2jFGz9oKokYglu6JR7A2ChSJNlCbM8N0/zhlyGZmZK94zl5nzvF+vec099fe9w/DMub9zzu8oIjAzs6avWbkLMDOzhuHANzPLCQe+mVlOOPDNzHLCgW9mlhMOfDOznHDgW5MhqbWkRyTNl3Tfd9jP0ZLGlbK2cpD0hKTjy12HrTkc+NbgJA2WNFHSAknT02D6YQl2fTjQDegcEUes7k4i4q6I6FeCelYiaQ9JIen+GvO3S+ePL3I/F0kaU996EdE/Im5fzXKtCXLgW4OSdBbwO+ByknDeGPgDcHAJdt8DeD8iKkuwr6zMAnaV1Llg3vHA+6VqQAn/37Zv8C+FNRhJ7YFLgNMi4v6IWBgRyyLikYg4O12npaTfSZqWfv1OUst02R6SPpX0c0mfp58OhqTLLgZGAAPTTw4n1TwSlrRJeiRdkU6fIGmypK8kTZF0dMH85wu221XSy2lX0cuSdi1YNl7SpZJeSPczTlKXOn4MS4EHgaPS7ZsDRwJ31fhZXS3pE0lfSnpFUu90/r7A+QXv8/WCOi6T9AKwCNgsnXdyuvx6SX8u2P8Vkp6RpKL/Aa3Rc+BbQ/p3oBXwQB3rXADsAvQCtgN2Bi4sWN4daA9sAJwEXCepY0SMJPnUcE9EtI2IW+oqRNLawO+B/hGxDrAr8Noq1usEPJau2xm4CnisxhH6YGAIsC7QAhheV9vAHcBx6et9gLeAaTXWeZnkZ9AJ+CNwn6RWETG2xvvcrmCbY4FhwDrA1Br7+znwb+kfs94kP7vjw2Or5IoD3xpSZ2B2PV0uRwOXRMTnETELuJgkyKotS5cvi4jHgQXA1qtZz3JgW0mtI2J6RLy1inX2Bz6IiDsjojIi7gbeBQ4sWOfWiHg/IhYD95IEda0i4kWgk6StSYL/jlWsMyYi5qRt/gZoSf3v87aIeCvdZlmN/S0CjiH5gzUGOCMiPq1nf9bEOPCtIc0BulR3qdRifVY+Op2azluxjxp/MBYBbb9tIRGxEBgInAJMl/SYpJ5F1FNd0wYF0zNWo547gdOBvqziE0/abfVO2o30Bcmnmrq6igA+qWthRPwfMBkQyR8myxkHvjWkl4AlwCF1rDON5ORrtY35ZndHsRYCbQqmuxcujIgnI+JHwHokR+03FVFPdU2frWZN1e4ETgUeT4++V0i7XM4h6dvvGBEdgPkkQQ1QWzdMnd0zkk4j+aQwDfjP1S/dGisHvjWYiJhPcmL1OkmHSGojaS1J/SVdma52N3ChpK7pyc8RJF0Qq+M1oI+kjdMTxudVL5DUTdJBaV/+1yRdQ1Wr2MfjwFbppaQVkgYC3wMeXc2aAIiIKcDuJOcsaloHqCS5oqdC0gigXcHymcAm3+ZKHElbAb8g6dY5FvhPSXV2PVnT48C3BhURVwFnkZyInUXSDXE6yZUrkITSRGAS8Abwajpvddp6Crgn3dcrrBzSzUhOZE4D5pKE76mr2Mcc4IB03TkkR8YHRMTs1ampxr6fj4hVfXp5EniC5FLNqSSfigq7a6pvKpsj6dX62km70MYAV0TE6xHxAcmVPndWXwFl+SCfpDczywcf4ZuZ5YQD38wsJxz4ZmY54cA3M8uJum6AKavW25/us8m2Rpr38rXlLsGsVq0qqHV8JB/hm5nlhAPfzCwnHPhmZjnhwDczywkHvplZTjjwzcxywoFvZpYTDnwzs5xw4JuZ5YQD38wsJxz4ZmY54cA3M8sJB76ZWU448M3McsKBb2aWEw58M7OccOCbmeWEA9/MLCcc+GZmOeHANzPLCQe+mVlOOPDNzHLCgW9mlhMOfDOznHDgm5nlhAPfzCwnHPhmZjnhwDczywkHvplZTjjwzcxywoFvZpYTDnwzs5xw4JuZ5YQD38wsJxz4ZmY54cA3M8sJB76ZWU448M3McsKBb2aWExVZNyCpOdCtsK2I+Djrds3MbGWZBr6kM4CRwExgeTo7gH/Lsl0zM/umrI/wzwS2jog5GbdjZmb1yLoP/xNgfsZtmJlZEbI+wp8MjJf0GPB19cyIuCrjds3MrIasA//j9KtF+mVmZmWSaeBHxMVZ7t/MzIqX9VU6j5BclVNoPjARGB0RS7Js38zM/inrk7aTgQXATenXlySXaG6VTpuZWQPJug9/+4joUzD9iKTnIqKPpLcybtvMzApkfYTfVdLG1RPp6y7p5NKM2zYzswJZH+H/HHhe0keAgE2BUyWtDdyecdtN2g0jj6Z/n22ZNfcrdjri8hXzf3zU7pwysA+VVcsZO+FNLrj6IXbapgfX/tcgACS47IbHefjZSQAcue+OnH3iPkQE02fN58QLb2fOFwvL8p4sf16Y8BxXjLqM5VXLOXTAEZw0dFi5S2rSFFHznGqJG5BaAj1JAv/dYk/Utt7+9GwLa+R222FzFi76mpsvPW5F4PfZaUvOOXkfDj3jBpYuq6Rrx7bMmreA1q3WYumyKqqqltO9Szv+957z2KzfBQBMHncZOwz4BXO+WMhlZx7MoiXLuGz04+V8a2u8eS9fW+4SmoSqqioO2n8fRt90K926dWPwwMMZ9aur2HyLLcpdWqPWqgLVtiyTLh1Je6bfDwP2BzYHNgP2S+fZd/TCqx8xd/6ileYNO6I3v771KZYuqwRg1rwFACxesoyqqmQoo5Yt1qL6j7yUfK3dOrlFYp22rZk+yzdGW8N4841JbLRRDzbcaCPWatGCfffbn/HPPlPuspq0rLp0dgf+Ahy4imUB3J9Ru7m2RY912W37zbn4tANZsnQZ5131AK+8nQxM+v1te3DDRcew8XqdOOnC21f8ATjz8nt4+d7zWbh4KR99Mouf/vKecr4Fy5HPZ86k+3rdV0yv260bb0yaVMaKmr5MjvAjYmT6fcgqvk6sbTtJwyRNlDSxcrYv4vm2Kpo3o2O7NvQ57tec/9sHGXPlP3/UL785lR0Pv4wfHnMlZ5/Yj5YtKqioaMbQw3uzy6Ar2KzfBbz5/mecfWK/Mr4Dy5P4xi06INXaG2ElkPWNVy2BAcAmrDwe/iWrWj8ibgRuBPfhr47PZn7Bg8+8DsDEt6ayfHnQpWNbZqddOwDvTZnJwsVL2WaL9Vd09E35dDYAf37qVYYPceBbw+jWrTszps9YMf35zJmsu+66Zayo6cv6ssyHgIOBSmBhwZdl4JHxk9hj560A2GLjdWmxVgWz5y2gx/qdad48+afeeL2ObLVJN6ZOm8O0WfPpuVl3unRsC8Beu/TkvSkzat2/WSlts+2/8vHH/+DTTz9h2dKljH38MXbvu2e5y2rSsr4sc8OI2DfjNnLp9l+eQO8dt6RLh7Z8OPZSLr3hcW5/8CVGX3Q0E+87n6XLqjh5xJ0A7Lr9Zgwf0o9llVUsXx6cefk9Ky69vPzGJ3jq5p+yrLKKj6fPZdjIMeV8W5YjFRUVnHfBCH487GSWL6/ikEMHsMUWW5a7rCYt08syJd0IXBMRb3zbbd2lY2sqX5Zpa7K6LsvM+gj/h8AJkqaQjIcvICLCjzg0M2tgWQd+/4z3b2ZmRcr0pG1ETAU2AvZMXy/Kuk0zM1u1TMNX0kjgHOC8dNZagM8KmpmVQdZH24cCB5FeihkR04B1Mm7TzMxWIevAXxrJZUABkI6SaWZmZZB14N8raTTQQdJQ4Gn8pCszs7LI+iHmv5b0I5JHG24NjIiIp7Js08zMVi3ryzJJA/4pSV2AOVm3Z2Zmq5bVePi7SBov6X5J20t6E3gTmCnJQy2YmZVBVkf41wLnA+1JxsXvHxF/k9QTuBsYm1G7ZmZWi6xO2lZExLiIuA+YERF/A4iIdzNqz8zM6pFV4C8veL24xjIPimZmVgZZdelsJ+lLksHSWqevSadbZdSmmZnVIZPAj4jmWezXzMxWnwcyMzPLCQe+mVlOOPDNzHLCgW9mlhMOfDOznHDgm5nlhAPfzCwnHPhmZjnhwDczywkHvplZTjjwzcxywoFvZpYTDnwzs5xw4JuZ5YQD38wsJxz4ZmY54cA3M8sJB76ZWU7U+ohDSTvUtWFEvFr6cszMLCt1PdP2N3UsC2DPEtdiZmYZqjXwI6JvQxZiZmbZqusIfwVJ2wLfA1pVz4uIO7IqyszMSq/ewJc0EtiDJPAfB/oDzwMOfDOzRqSYq3QOB/YCZkTEEGA7oGWmVZmZWckVE/iLI2I5UCmpHfA5sFm2ZZmZWakV04c/UVIH4CbgFWAB8H+ZVmVmZiVXb+BHxKnpyxskjQXaRcSkbMsyM7NSK+akbZ9VzYuI57IpyczMslBMl87ZBa9bATuTdO34xiszs0akmC6dAwunJW0EXJlZRWZmlonVGTztU2DbUhdiZmbZKqYP/xqSsXMg+QPRC3g9y6LMzKz0iross+B1JXB3RLyQUT1mZpaRYgK/Q0RcXThD0pk155mZ2ZpNEVH3CtKrEbFDjXl/j4jtsyzsvRmL6i7MrEw6tW1R7hLMatW1bYVqW1bXA1AGAYOBTSU9XLBoHWBO6cozM7OGUFeXzovAdKALKz8M5SvAd9qamTUydT0AZSowVdLRwLSIWAIgqTWwIfCPBqnQzMxKopjr8O8FlhdMVwH3ZVOOmZllpZjAr4iIpdUT6WuftTIza2SKCfxZkg6qnpB0MDA7u5LMzCwLxVyHfwpwl6Rr0+lPgeOyK8nMzLJQzOBpHwG7SGpLct3+V9mXZWZmpVZvl46kyyV1iIgFEfGVpI6SftEQxZmZWekU04ffPyK+qJ6IiHnAftmVZGZmWSgm8JtLalk9kV6H37KO9c3MbA1UzEnbMcAzkm5Np4cAt2dXkpmZZaGYk7ZXSpoE7A0IGAv0yLowMzMrrWKfeDWD5G7bAcBewDuZVWRmZpmoa7TMrYCjgEEko2PeQ3JZZt8Gqs3MzEqori6dd4EJwIER8SGApJ81SFVmZlZydXXpDCDpynlW0k2S9iLpwzczs0ao1sCPiAciYiDQExgP/AzoJul6Sf0aqD4zMyuRek/aRsTCiLgrIg4gGQf/NeDczCszM7OSqveZtuXiZ9ramsrPtLU1WV3PtC32skwzM2vkHPhmZjnhwDczywkHvplZTjjwzcxywoFvZpYTDnwzs5xw4JuZ5YQD38wsJxz4ZmY54cA3M8sJB76ZWU448M3McsKBb2aWEw58M7OccOCbmeWEA9/MLCcc+GZmOeHANzPLCQe+mVlOZBb4kk5axbxRWbVnZmZ1q8hw34dLWhIRdwFI+gPQMsP2zMysDlkG/mHAw5KWA/2BuRFxaobtmZlZHUoe+JI6FUyeDDwIvABcIqlTRMwtdZtmZlY/RURpdyhNAQJQwfdqERGbFbOf92YsKm1hZiXSqW2LcpdgVquubStU27KSH+FHxKal3qeZmX13WV6lc5qkDgXTHSW5D9/MrEyyvA5/aER8UT0REfOAoRm2Z2Zmdcgy8JtJWtGXJKk54M5PM7MyyfKyzCeBeyXdQHLy9hRgbIbt5dasz2fwu8v+i3lz56BmYp8DB3DQ4YP54603MO7R+2nfoSMAxw49nZ126U1l5TKuufISJr//LlVVVfTdZ3+OOOYb98mZlcTlF1/IixP+SsdOnbjz3ocA+HL+F4w4bzgzpn1G9/U34JJRv6Fdu/YrtpkxfRrHHnEQQ4adxuDjhpSr9CYny8A/B/gP4MckV+qMA27OsL3cat68OSeedhabb/UvLFq0kLOGDqbXTj8A4OAjjuHQo45baf0Xnn2aymVLuea2+/h6yWJOO34AffbqT7f11i9H+dbE7XfgIQw4cjC/GHneinljbruZHb//A44dMpQ7b72JMbfdzKk/+fmK5ddcdQU/2LV3Ocpt0jLr0omI5RFxfUQcHhEDImJ0RFRl1V6ederclc23+hcA2rRZmw17bMqcWbNq30CwZPESqior+frrr6moWIs2a6/dQNVa3vTaYSfatW+/0rwJf32W/gccAkD/Aw5hwvi/rFj23LPPsP4GG7Hp5ls0aJ15kOVVOltK+rOktyVNrv7Kqj1LzJw+jckfvMfW39sWgMce+BNnDDmSq0ddxIKvvgRgtz32plXrVhx/2I846cj+HDLwONZp176u3ZqV1Lw5c+jStSsAXbp2Zd7c5H7MxYsXcdfttzBk2I/LWV6TleVJ21uB64FKoC9wB3BnXRtIGiZpoqSJ99z53xmW1jQtXrSIUSOGc/IZw2mzdlv6H3wEo//4CFff8ic6de7CLdddBcD777xFs2bNue3+cdz0p8d46N47mTHt0zJXbwa33HAdRw4+jjZt/IkzC1n24beOiGckKSKmAhdJmgCMrG2DiLgRuBF8p+23VVm5jFEjhrP73v3Ztc9eAHTs1HnF8n4HHMal5/0EgOeefoIddt6Vioq16NCxEz237cWH775N9/U3LEvtlj8dO3dm9qxZdOnaldmzZtGxUzIiy9tvTmL8M+O4/ve/YcFXX6FmomXLFgwYeHSZK24asgz8JZKaAR9IOh34DFg3w/ZyKyK45oqL2bDHphwy8NgV8+fOmUWnzsnH5r9N+As9Nt0cgK7dujPp1ZfZo9/+fL1kCe+/PYmDjhhcltotn37Ypy9PPPogxw4ZyhOPPkjv3fsC8Idb/tkJcMvo62jduo3DvoRKPpbOih1L3wfeAToAlwLtgCsj4n+L2d5H+MV7e9LfOfeME+mx2ZY0a5bc+nDs0NN57uknmfLheyDRrft6nDr8Qjp17sriRYu4etRIPpk6GSLYq//BHDbo+DK/i8bDY+l8OyPPH85rE1/miy++oFPnzpz0H6fRe4+9GHHuWcycMZ1u3dfj0iuuol37DittVx34vizz26lrLJ0sA/+IiLivvnm1ceDbmsqBb2uyugI/y5O25xU5z8zMGkAW4+H3B/YDNpD0+4JF7Uiu2DEzszLI4qTtNGAicBDwSsH8r4CfZdCemZkVIYvx8F8HXpfULSJuL1wm6Uzg6lK3aWZm9cuyD/+oVcw7IcP2zMysDln04Q8CBgObSnq4YFE7YHap2zMzs+Jk0Yf/IjAd6AL8pmB+AAMzaM/MzIqQRR/+VGAq8O+SepEc7R8JTAH+p9TtmZlZcbLo0tmKpP9+EDAHuIfkBq++pW7LzMyKl0WXzrvABODAiPgQQJIvxzQzK7MsrtIZAMwAnpV0k6S9SJ54ZWZmZVTywI+IByJiINATGE9ys1U3SddL6lfq9szMrDhZPuJwYUTcFREHABsCrwHnZtWemZnVLbPRMr8rj5ZpayqPlmlrsnKNlmlmZmsQB76ZWU448M3McsKBb2aWEw58M7OccOCbmeWEA9/MLCcc+GZmOeHANzPLCQe+mVlOOPDNzHLCgW9mlhMOfDOznHDgm5nlhAPfzCwnHPhmZjnhwDczywkHvplZTjjwzcxywoFvZpYTDnwzs5xw4JuZ5YQD38wsJxz4ZmY54cA3M8sJB76ZWU448M3McsKBb2aWEw58M7OccOCbmeWEA9/MLCcc+GZmOeHANzPLCQe+mVlOOPDNzHLCgW9mlhOKiHLXYA1A0rCIuLHcdZjV5N/NhuMj/PwYVu4CzGrh380G4sA3M8sJB76ZWU448PPDfaS2pvLvZgPxSVszs5zwEb6ZWU448M3McsKB3whIqpL0mqTXJb0qadfvsK9LJO1dyvrMACQdKikk9UynN5E0uGB5L0n7fYf9/0NSl1LUmlcO/MZhcUT0iojtgPOAX67ujiJiREQ8XbrSzFYYBDwPHJVObwIMLljeC1jtwLfvzoHf+LQD5lVPSDpb0suSJkm6OJ23iaR3JN0k6S1J4yS1TpfdJunw9PV+kt6V9Lyk30t6NJ1/kaT/ljRe0mRJPynD+7RGRFJbYDfgJP4Z+KOA3umn03OAS4CB6fRASTtLelHS39PvW6f7ai7p15LeSH+vz6jRVmtJYyUNbcC32CRUlLsAK0prSa8BrYD1gD0BJPUDtgR2BgQ8LKkP8HE6f1BEDJV0LzAAGFO9Q0mtgNFAn4iYIunuGm32BPoC6wDvSbo+IpZl+SatUTsEGBsR70uaK2kH4FxgeEQcACBpJrBTRJyeTrcj+f2rTLsZLyf5PR0GbApsny7rVNBOW+BPwB0RcUeDvbsmwkf4jUN1l05PYF/gDkkC+qVffwdeJQnpLdNtpkTEa+nrV0g+XhfqCUyOiCnpdM3Afywivo6I2cDnQLdSviFrcgaRBDHp90FFbNMeuE/Sm8BvgW3S+XsDN0REJUBEzC3Y5iHgVof96vERfiMTES+lJ666khzV/zIiRheuI2kT4OuCWVVA6xq7Uj1N1dzevyu2SpI6k3zq3FZSAM2BAB6vZ9NLgWcj4tD0d3Z89S7T7VflBaC/pD+GbyL61nyE38ikV0A0B+YATwInpv2nSNpA0rpF7updYLP0PxrAwBKXavlxOEkXS4+I2CQiNgKmAMtJugSrfVVjuj3wWfr6hIL544BTJFUA1OjSGUHyu/+Hkr6DnHDgNw6t0xNdrwH3AMdHRFVEjAP+CLwk6Q3gz6z8H6pWEbEYOBUYK+l5YCYwP5vyrYkbBDxQY97/kJy8rUwvJ/4Z8CzwveqTtsCVwC8lvUByEFPtZpLzUJMkvc7KV/oA/BRoJenKDN5Lk+ahFXJMUtuIWJCeD7gO+CAiflvuuswsGz7Cz7eh6aeGt0g+Xo+uZ30za8R8hG9mlhM+wjczywkHvplZTjjwzcxywoFvTVbBKKNvSrpPUpvvsK89CsYaOkjSuXWs20HSqavRxkWShq9ujWb1ceBbU1Y9JMW2wFLglMKFSnzr/wMR8XBEjKpjlQ4k9ziYrVEc+JYXE4AtCkYS/QPJ+EMbSeon6aX0WQP3Fdy5vG/1aKLAYdU7knSCpGvT190kPZDeXPS6kmcVjAI2Tz9d/Cpd7xujmqbzL5D0nqSnga0b7KdhueTAtyYvvUW/P/BGOmtrkqEAtgcWAhcCe0fEDsBE4Kx0NNGbgAOB3kD3Wnb/e+Cv6bMKdiC5p+Fc4KP008XZNUY17QXsKKmPpB1J7kbdnuQPyvdL/NbNVuIBsawpqx5WGpIj/FuA9YGpEfG3dP4uwPeAF5IbjmkBvEQymuiUiPgAQNIYkmF7a9oTOA4gIqqA+ZI61lincFRTSIb43ZJkGIwHImJR2sbD3+ndmtXDgW9N2eKI6FU4Iw31hYWzgKciYlCN9XpR+4iN31Zto5r+tIRtmNXLXTqWd38DdpO0BYCkNpK2IhlNdFNJm6fr1Ta++zPAj9Ntm6cP9ag5KmRto5o+BxyaPsFpHZLuI7PMOPAt1yJiFsnQvHdLmkTyB6BnRCwh6cJ5LD1pO7WWXZwJ9E1HK30F2CYi5pB0Eb0p6Ve1jWoaEa+SjH76GsnokhMye6NmeCwdM7Pc8BG+mVlOOPDNzHLCgW9mlhMOfDOznHDgm5nlhAPfzCwnHPhmZjnx/zT+z1Wh63oMAAAAAElFTkSuQmCC",
+      "text/plain": [
+       "<Figure size 432x288 with 1 Axes>"
       ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
     }
-  ],
-  "metadata": {
-    "accelerator": "TPU",
+   ],
+   "source": [
+    "# Create confusion matrix\n",
+    "conf_matrix = confusion_matrix(y_true, y_pred_model)\n",
+    "\n",
+    "# Show confusion matrix\n",
+    "ax = plt.subplot()\n",
+    "sns.heatmap(conf_matrix, annot=True, ax=ax, cmap=\"Blues\", fmt=\"g\", cbar=False)\n",
+    "\n",
+    "# Add labels, title and ticks\n",
+    "ax.set_xlabel(\"Predicted\")\n",
+    "ax.set_ylabel(\"Acctual\")\n",
+    "ax.set_title(\"Confusion Matrix\")\n",
+    "ax.xaxis.set_ticklabels([\"Benign\", \"Attack\"])\n",
+    "ax.yaxis.set_ticklabels([\"Benign\", \"Attack\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
     "colab": {
-      "collapsed_sections": [],
-      "machine_shape": "hm",
-      "name": "it_threat_detection.ipynb",
-      "provenance": []
-    },
-    "environment": {
-      "name": "tf2-gpu.2-3.m65",
-      "type": "gcloud",
-      "uri": "gcr.io/deeplearning-platform-release/tf2-gpu.2-3:m65"
-    },
-    "kernelspec": {
-      "display_name": "Python 3",
-      "language": "python",
-      "name": "python3"
-    },
-    "language_info": {
-      "codemirror_mode": {
-        "name": "ipython",
-        "version": 3
-      },
-      "file_extension": ".py",
-      "mimetype": "text/x-python",
-      "name": "python",
-      "nbconvert_exporter": "python",
-      "pygments_lexer": "ipython3",
-      "version": "3.10.7 (main, Sep 14 2022, 22:38:23) [Clang 14.0.0 (clang-1400.0.29.102)]"
-    },
-    "vscode": {
-      "interpreter": {
-        "hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e"
-      }
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "VJRwvXOvYtBL",
+    "outputId": "9d45426b-6bd8-4374-dcf8-9ffecedd7176"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Accuracy: 0.871\n",
+      "Precision: 1.000\n",
+      "Recall: 0.287\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Calculate accuracy\n",
+    "acc = accuracy_score(y_true, y_pred_model, normalize=True, sample_weight=None)\n",
+    "precision = precision_score(y_true, y_pred_model)\n",
+    "recall = recall_score(y_true, y_pred_model)\n",
+    "\n",
+    "print(f\"Accuracy: {acc:.3f}\")\n",
+    "print(f\"Precision: {precision:.3f}\")\n",
+    "print(f\"Recall: {recall:.3f}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 112
+    },
+    "id": "TFbfVi_-W6GT",
+    "outputId": "68facf43-ac41-44de-d59a-a489627cc893"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>type</th>\n",
+       "      <th>accuracy</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Benign</td>\n",
+       "      <td>1.00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Attack</td>\n",
+       "      <td>0.29</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "     type  accuracy\n",
+       "0  Benign      1.00\n",
+       "1  Attack      0.29"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
     }
+   ],
+   "source": [
+    "# Calculate per class accuracy\n",
+    "cmd = confusion_matrix(y_true, y_pred_model, normalize=\"true\").diagonal()\n",
+    "per_class_accuracy_df = pd.DataFrame(\n",
+    "    [(index, round(value, 4)) for index, value in zip([\"Benign\", \"Attack\"], cmd)],\n",
+    "    columns=[\"type\", \"accuracy\"],\n",
+    ")\n",
+    "per_class_accuracy_df = per_class_accuracy_df.round(2)\n",
+    "display(per_class_accuracy_df)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "_E5rnFehXSls"
+   },
+   "source": [
+    "As we can see, the direct application of our model produced much worse results. Pinecone's similarity search over the same model's embeddings improved our threat detection (i.e., \"Attack\") accuracy by over 50%!"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "OiT0JgQxktOC"
+   },
+   "source": [
+    "### Result summary"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "UBBv6-dnfEeX"
+   },
+   "source": [
+    "Using standard vector embeddings with Pinecone's similarity search service, we detected 85% of the attacks while keeping a low 3% false-positive rate. We also showed that our similarity search approach outperforms the direct classification approach that utilizes the classifier's embedding model. Similarity search-based detection gained 50% higher accuracy compared to the direct detector.\n",
+    "\n",
+    "[Original published results](https://github.com/rambasnet/DeepLearning-IDS/blob/master/graphics/confusion_matrices/) for 02-22-2018 show that the model was able to correctly detect 208520 benign cases out of 208520 benign cases, and 24 (18+1+5) attacks out of 70 attacks in the test set making this model **34.3% accurate in predicting attacks**. For testing purposes, 20% of the data for 02-22-2018 was used. \n",
+    "\n",
+    "![02-22-2018--6-15%281%29.png](https://raw.githubusercontent.com/rambasnet/DeepLearning-IDS/master/graphics/confusion_matrices/02-22-2018--6-15(1).png)\n",
+    "\n",
+    "As you can see, the model's performance for creating embeddings for Pinecone was much higher. \n",
+    "\n",
+    "The model we have created follows the academic paper ([model for the same date](https://github.com/rambasnet/DeepLearning-IDS/blob/master/keras_tensorflow_models/) (02-23-2018)) and is slightly modified, but still a straightforward, sequential, shallow model. We have changed the number of classes from four (Benign, BruteForce-Web, BruteForce-XSS, SQL-Injection) to two (Benign and Attack), only interested in whether we are detecting an attack or not. We have also changed validation metrics to precision and recall. These changes improved our results. Yet, there is still room for further improvements, for example, by adding more data covering multiple days and different types of attacks."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "JdZcEl1pfEeX"
+   },
+   "source": [
+    "## Delete the Index"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "hyxLeOnSfEeX"
+   },
+   "source": [
+    "Delete the index once you are sure that you do not want to use it anymore. Once it is deleted, you cannot reuse it."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "ywvaJrVkfEeX"
+   },
+   "outputs": [],
+   "source": [
+    "pc.delete_index(index_name)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "Hu2mIbHms3k7"
+   },
+   "source": [
+    "---"
+   ]
+  }
+ ],
+ "metadata": {
+  "accelerator": "TPU",
+  "colab": {
+   "collapsed_sections": [],
+   "machine_shape": "hm",
+   "name": "it_threat_detection.ipynb",
+   "provenance": []
+  },
+  "environment": {
+   "name": "tf2-gpu.2-3.m65",
+   "type": "gcloud",
+   "uri": "gcr.io/deeplearning-platform-release/tf2-gpu.2-3:m65"
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.7 (main, Sep 14 2022, 22:38:23) [Clang 14.0.0 (clang-1400.0.29.102)]"
   },
-  "nbformat": 4,
-  "nbformat_minor": 1
-}
\ No newline at end of file
+  "vscode": {
+   "interpreter": {
+    "hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}
diff --git a/scripts/ticketbot.py b/scripts/ticketbot.py
new file mode 100644
index 00000000..ee28c266
--- /dev/null
+++ b/scripts/ticketbot.py
@@ -0,0 +1,371 @@
+#!/usr/bin/env python3
+"""
+Ticketbot - Automated ticket processing with independent worker pools.
+
+This script manages ticket lifecycle automation by spawning Cursor agents
+to handle different stages: picking new work, iterating on PRs, and merging.
+"""
+
+import logging
+import os
+import signal
+import subprocess
+import sys
+import time
+from datetime import datetime
+from pathlib import Path
+
+import click
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s %(levelname)s %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+)
+logger = logging.getLogger(__name__)
+
+# Global flag for graceful shutdown
+_draining = False
+
+
+def _handle_drain_signal(signum, frame):
+    """Handle SIGTERM by setting drain flag."""
+    global _draining
+    _draining = True
+    logger.info("Received shutdown signal, will exit after current iteration completes...")
+
+WORKSPACE = Path(os.environ.get("TICKETBOT_WORKSPACE", os.getcwd()))
+WORKTREE_DIR = WORKSPACE.parent / f"{WORKSPACE.name}-worktrees"
+LOG_DIR = WORKSPACE.parent / f"{WORKSPACE.name}-logs"
+
+
+def get_worktree(job: str, worker_index: int) -> Path:
+    """Get or create a worktree for this worker."""
+    worktree_path = WORKTREE_DIR / f"{job}-{worker_index}"
+
+    if not worktree_path.exists():
+        WORKTREE_DIR.mkdir(exist_ok=True)
+        # Use --detach to avoid conflicts with branches checked out elsewhere
+        # The Cursor commands will handle checking out the appropriate branch
+        subprocess.run(
+            ["git", "worktree", "add", "--detach", str(worktree_path), "HEAD"],
+            cwd=WORKSPACE,
+            check=True,
+        )
+        logger.info(f"Created worktree: {worktree_path}")
+
+    return worktree_path
+
+
+MAX_RETRIES = 3
+INITIAL_BACKOFF = 10  # seconds
+
+
+def invoke_cursor(prompt: str, worktree: Path, worker_id: str) -> subprocess.CompletedProcess:
+    """Invoke Cursor CLI with the given prompt in the specified worktree.
+    
+    Retries with exponential backoff on transient failures.
+    """
+    logger.info(f"[{worker_id}] Invoking Cursor in {worktree}")
+    
+    # Ensure log directory exists
+    LOG_DIR.mkdir(exist_ok=True)
+    
+    last_error = None
+    
+    for attempt in range(MAX_RETRIES):
+        # Create timestamped log file for this run
+        timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
+        log_file = LOG_DIR / f"{worker_id}-{timestamp}.log"
+        
+        result = subprocess.run(
+            [
+                "agent",
+                "--print",
+                "--force",
+                "--approve-mcps",
+                "--workspace", str(worktree),
+                prompt,
+            ],
+            capture_output=True,
+            text=True,
+        )
+        
+        # Write output to log file
+        with open(log_file, "w") as f:
+            f.write(f"=== Ticketbot Agent Log ===\n")
+            f.write(f"Worker: {worker_id}\n")
+            f.write(f"Timestamp: {timestamp}\n")
+            f.write(f"Attempt: {attempt + 1}/{MAX_RETRIES}\n")
+            f.write(f"Worktree: {worktree}\n")
+            f.write(f"Prompt: {prompt}\n")
+            f.write(f"Exit Code: {result.returncode}\n")
+            f.write(f"\n=== STDOUT ===\n")
+            f.write(result.stdout or "(empty)")
+            f.write(f"\n\n=== STDERR ===\n")
+            f.write(result.stderr or "(empty)")
+        
+        logger.info(f"[{worker_id}] Agent output logged to: {log_file}")
+        
+        # Success
+        if result.returncode == 0:
+            return result
+        
+        # Check for transient errors that should be retried
+        stderr_lower = (result.stderr or "").lower()
+        is_transient = any(err in stderr_lower for err in [
+            "connection stalled",
+            "connection reset",
+            "connection refused",
+            "timeout",
+            "rate limit",
+            "503",
+            "502",
+            "504",
+        ])
+        
+        if not is_transient:
+            # Non-transient error, don't retry
+            raise subprocess.CalledProcessError(
+                result.returncode, result.args, result.stdout, result.stderr
+            )
+        
+        # Transient error, retry with backoff
+        last_error = subprocess.CalledProcessError(
+            result.returncode, result.args, result.stdout, result.stderr
+        )
+        
+        if attempt < MAX_RETRIES - 1:
+            backoff = INITIAL_BACKOFF * (2 ** attempt)
+            logger.warning(
+                f"[{worker_id}] Transient error (attempt {attempt + 1}/{MAX_RETRIES}), "
+                f"retrying in {backoff}s: {result.stderr[:100] if result.stderr else 'unknown error'}"
+            )
+            time.sleep(backoff)
+    
+    # All retries exhausted
+    logger.error(f"[{worker_id}] All {MAX_RETRIES} attempts failed")
+    raise last_error
+
+
+def pick_work(worker_index: int) -> None:
+    """Pick up new work from the backlog."""
+    worker_id = f"tb-pick-work-{worker_index}"
+    logger.info(f"[{worker_id}] Starting pick_work iteration")
+
+    worktree = get_worktree("tb-pick-work", worker_index)
+    prompt = "Run /tb-pick-next-ticket"
+
+    try:
+        invoke_cursor(prompt, worktree, worker_id)
+        logger.info(f"[{worker_id}] Completed pick_work iteration")
+    except subprocess.CalledProcessError as e:
+        logger.error(f"[{worker_id}] Cursor failed: {e.stderr}")
+        raise
+
+
+def iterate_prs(worker_index: int, total_workers: int) -> None:
+    """Iterate on PRs in review, filtered by shard."""
+    worker_id = f"tb-iterate-prs-{worker_index}"
+    logger.info(f"[{worker_id}] Starting iterate_prs iteration (shard {worker_index}/{total_workers})")
+
+    worktree = get_worktree("tb-iterate-prs", worker_index)
+    prompt = f"""Run /tb-iterate-review-tickets
+Worker shard: {worker_index} of {total_workers}
+Only process tickets where (ticket_number % {total_workers}) == {worker_index}"""
+
+    try:
+        invoke_cursor(prompt, worktree, worker_id)
+        logger.info(f"[{worker_id}] Completed iterate_prs iteration")
+    except subprocess.CalledProcessError as e:
+        logger.error(f"[{worker_id}] Cursor failed: {e.stderr}")
+        raise
+
+
+def merge_ready(worker_index: int) -> None:
+    """Merge PRs that are ready."""
+    worker_id = f"tb-merge-ready-{worker_index}"
+    logger.info(f"[{worker_id}] Starting merge_ready iteration")
+
+    worktree = get_worktree("tb-merge-ready", worker_index)
+    prompt = "Run /tb-merge-ready-prs"
+
+    try:
+        invoke_cursor(prompt, worktree, worker_id)
+        logger.info(f"[{worker_id}] Completed merge_ready iteration")
+    except subprocess.CalledProcessError as e:
+        logger.error(f"[{worker_id}] Cursor failed: {e.stderr}")
+        raise
+
+
+def spawn_worker(
+    job: str,
+    interval: int,
+    worker_index: int = 0,
+    total_workers: int = 1,
+) -> subprocess.Popen:
+    """Spawn a worker subprocess for the given job type."""
+    cmd = [
+        sys.executable,
+        __file__,
+        "run",
+        "--job", job,
+        "--interval", str(interval),
+        "--worker-index", str(worker_index),
+        "--total-workers", str(total_workers),
+    ]
+    logger.info(f"Spawning worker: {' '.join(cmd)}")
+    return subprocess.Popen(cmd)
+
+
+@click.group()
+def cli():
+    """Ticketbot - Automated ticket processing with independent worker pools."""
+    pass
+
+
+@cli.command()
+@click.option("--pick-workers", default=3, help="Number of pick-work workers")
+@click.option("--iterate-workers", default=3, help="Number of iterate-prs workers")
+@click.option("--merge-workers", default=1, help="Number of merge-ready workers")
+@click.option("--interval", default=45, help="Seconds between job iterations")
+@click.option("--stagger", default=30, help="Seconds between worker starts within a pool")
+def start_all(
+    pick_workers: int,
+    iterate_workers: int,
+    merge_workers: int,
+    interval: int,
+    stagger: int,
+):
+    """Start all worker pools."""
+    processes = []
+
+    # Spawn pick workers
+    for i in range(pick_workers):
+        if i > 0:
+            time.sleep(stagger)
+        processes.append(spawn_worker("tb-pick-work", interval, worker_index=i))
+
+    # Spawn iterate workers (sharded by index)
+    for i in range(iterate_workers):
+        if i > 0:
+            time.sleep(stagger)
+        processes.append(
+            spawn_worker(
+                "tb-iterate-prs",
+                interval,
+                worker_index=i,
+                total_workers=iterate_workers,
+            )
+        )
+
+    # Spawn merge workers
+    for i in range(merge_workers):
+        if i > 0:
+            time.sleep(stagger)
+        processes.append(spawn_worker("tb-merge-ready", interval, worker_index=i))
+
+    logger.info(f"Started {len(processes)} workers. Press Ctrl+C to drain and stop.")
+
+    try:
+        # Wait for all processes (they run forever, so this blocks until interrupt)
+        for p in processes:
+            p.wait()
+    except KeyboardInterrupt:
+        logger.info("Draining workers (waiting for current iterations to complete)...")
+        
+        # Send SIGTERM to all workers to trigger drain mode
+        for p in processes:
+            if p.poll() is None:  # Still running
+                p.send_signal(signal.SIGTERM)
+        
+        # Wait for workers to finish gracefully (with timeout)
+        drain_timeout = 600  # 10 minutes max wait
+        start_time = time.time()
+        
+        while any(p.poll() is None for p in processes):
+            elapsed = time.time() - start_time
+            if elapsed > drain_timeout:
+                logger.warning(f"Drain timeout ({drain_timeout}s) exceeded, force killing...")
+                for p in processes:
+                    if p.poll() is None:
+                        p.kill()
+                break
+            
+            remaining = sum(1 for p in processes if p.poll() is None)
+            logger.info(f"Waiting for {remaining} workers to finish (elapsed: {int(elapsed)}s)...")
+            time.sleep(5)
+        
+        logger.info("All workers stopped.")
+
+
+@cli.command()
+@click.option(
+    "--job",
+    type=click.Choice(["tb-pick-work", "tb-iterate-prs", "tb-merge-ready"]),
+    required=True,
+    help="Job type to run",
+)
+@click.option("--interval", default=45, help="Seconds between iterations")
+@click.option("--worker-index", default=0, help="This worker's index (for sharding)")
+@click.option("--total-workers", default=1, help="Total workers of this job type (for sharding)")
+def run(job: str, interval: int, worker_index: int, total_workers: int):
+    """Run a single job type in a loop."""
+    global _draining
+    
+    # Register signal handler for graceful shutdown
+    signal.signal(signal.SIGTERM, _handle_drain_signal)
+    
+    worker_id = f"{job}-{worker_index}"
+    logger.info(f"[{worker_id}] Starting worker loop (interval={interval}s)")
+
+    while not _draining:
+        try:
+            if job == "tb-pick-work":
+                pick_work(worker_index)
+            elif job == "tb-iterate-prs":
+                iterate_prs(worker_index, total_workers)
+            elif job == "tb-merge-ready":
+                merge_ready(worker_index)
+        except Exception as e:
+            logger.error(f"[{worker_id}] Error: {e}")
+
+        # Check drain flag before sleeping
+        if _draining:
+            break
+            
+        logger.info(f"[{worker_id}] Sleeping {interval}s...")
+        
+        # Sleep in small increments to respond to drain signal faster
+        for _ in range(interval):
+            if _draining:
+                break
+            time.sleep(1)
+    
+    logger.info(f"[{worker_id}] Draining complete, exiting gracefully.")
+
+
+@cli.command("tb-pick-work")
+@click.option("--worker-index", default=0, help="Worker index for worktree")
+def tb_pick_work_cmd(worker_index: int):
+    """Run pick-work once (for testing)."""
+    pick_work(worker_index)
+
+
+@cli.command("tb-iterate-prs")
+@click.option("--worker-index", default=0, help="Worker index for sharding and worktree")
+@click.option("--total-workers", default=1, help="Total workers for sharding")
+def tb_iterate_prs_cmd(worker_index: int, total_workers: int):
+    """Run iterate-prs once (for testing)."""
+    iterate_prs(worker_index, total_workers)
+
+
+@cli.command("tb-merge-ready")
+@click.option("--worker-index", default=0, help="Worker index for worktree")
+def tb_merge_ready_cmd(worker_index: int):
+    """Run merge-ready once (for testing)."""
+    merge_ready(worker_index)
+
+
+if __name__ == "__main__":
+    cli()

From 6bcbb96f8c06fff2d3eca89d2f8e21fa010ec934 Mon Sep 17 00:00:00 2001
From: Jen Hamon <jhamon@pinecone.io>
Date: Thu, 29 Jan 2026 11:10:04 -0500
Subject: [PATCH 5/5] fix: ruff lint and format for ticketbot.py (f-strings,
 format)

---
 scripts/ticketbot.py | 125 ++++++++++++++++++++++++++-----------------
 1 file changed, 75 insertions(+), 50 deletions(-)

diff --git a/scripts/ticketbot.py b/scripts/ticketbot.py
index ee28c266..ffbcabc6 100644
--- a/scripts/ticketbot.py
+++ b/scripts/ticketbot.py
@@ -32,7 +32,10 @@ def _handle_drain_signal(signum, frame):
     """Handle SIGTERM by setting drain flag."""
     global _draining
     _draining = True
-    logger.info("Received shutdown signal, will exit after current iteration completes...")
+    logger.info(
+        "Received shutdown signal, will exit after current iteration completes..."
+    )
+
 
 WORKSPACE = Path(os.environ.get("TICKETBOT_WORKSPACE", os.getcwd()))
 WORKTREE_DIR = WORKSPACE.parent / f"{WORKSPACE.name}-worktrees"
@@ -61,88 +64,94 @@ def get_worktree(job: str, worker_index: int) -> Path:
 INITIAL_BACKOFF = 10  # seconds
 
 
-def invoke_cursor(prompt: str, worktree: Path, worker_id: str) -> subprocess.CompletedProcess:
+def invoke_cursor(
+    prompt: str, worktree: Path, worker_id: str
+) -> subprocess.CompletedProcess:
     """Invoke Cursor CLI with the given prompt in the specified worktree.
-    
+
     Retries with exponential backoff on transient failures.
     """
     logger.info(f"[{worker_id}] Invoking Cursor in {worktree}")
-    
+
     # Ensure log directory exists
     LOG_DIR.mkdir(exist_ok=True)
-    
+
     last_error = None
-    
+
     for attempt in range(MAX_RETRIES):
         # Create timestamped log file for this run
         timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
         log_file = LOG_DIR / f"{worker_id}-{timestamp}.log"
-        
+
         result = subprocess.run(
             [
                 "agent",
                 "--print",
                 "--force",
                 "--approve-mcps",
-                "--workspace", str(worktree),
+                "--workspace",
+                str(worktree),
                 prompt,
             ],
             capture_output=True,
             text=True,
         )
-        
+
         # Write output to log file
         with open(log_file, "w") as f:
-            f.write(f"=== Ticketbot Agent Log ===\n")
+            f.write("=== Ticketbot Agent Log ===\n")
             f.write(f"Worker: {worker_id}\n")
             f.write(f"Timestamp: {timestamp}\n")
             f.write(f"Attempt: {attempt + 1}/{MAX_RETRIES}\n")
             f.write(f"Worktree: {worktree}\n")
             f.write(f"Prompt: {prompt}\n")
             f.write(f"Exit Code: {result.returncode}\n")
-            f.write(f"\n=== STDOUT ===\n")
+            f.write("\n=== STDOUT ===\n")
             f.write(result.stdout or "(empty)")
-            f.write(f"\n\n=== STDERR ===\n")
+            f.write("\n\n=== STDERR ===\n")
             f.write(result.stderr or "(empty)")
-        
+
         logger.info(f"[{worker_id}] Agent output logged to: {log_file}")
-        
+
         # Success
         if result.returncode == 0:
             return result
-        
+
         # Check for transient errors that should be retried
         stderr_lower = (result.stderr or "").lower()
-        is_transient = any(err in stderr_lower for err in [
-            "connection stalled",
-            "connection reset",
-            "connection refused",
-            "timeout",
-            "rate limit",
-            "503",
-            "502",
-            "504",
-        ])
-        
+        is_transient = any(
+            err in stderr_lower
+            for err in [
+                "connection stalled",
+                "connection reset",
+                "connection refused",
+                "timeout",
+                "rate limit",
+                "503",
+                "502",
+                "504",
+            ]
+        )
+
         if not is_transient:
             # Non-transient error, don't retry
             raise subprocess.CalledProcessError(
                 result.returncode, result.args, result.stdout, result.stderr
             )
-        
+
         # Transient error, retry with backoff
         last_error = subprocess.CalledProcessError(
             result.returncode, result.args, result.stdout, result.stderr
         )
-        
+
         if attempt < MAX_RETRIES - 1:
-            backoff = INITIAL_BACKOFF * (2 ** attempt)
+            backoff = INITIAL_BACKOFF * (2**attempt)
             logger.warning(
                 f"[{worker_id}] Transient error (attempt {attempt + 1}/{MAX_RETRIES}), "
                 f"retrying in {backoff}s: {result.stderr[:100] if result.stderr else 'unknown error'}"
             )
             time.sleep(backoff)
-    
+
     # All retries exhausted
     logger.error(f"[{worker_id}] All {MAX_RETRIES} attempts failed")
     raise last_error
@@ -167,7 +176,9 @@ def pick_work(worker_index: int) -> None:
 def iterate_prs(worker_index: int, total_workers: int) -> None:
     """Iterate on PRs in review, filtered by shard."""
     worker_id = f"tb-iterate-prs-{worker_index}"
-    logger.info(f"[{worker_id}] Starting iterate_prs iteration (shard {worker_index}/{total_workers})")
+    logger.info(
+        f"[{worker_id}] Starting iterate_prs iteration (shard {worker_index}/{total_workers})"
+    )
 
     worktree = get_worktree("tb-iterate-prs", worker_index)
     prompt = f"""Run /tb-iterate-review-tickets
@@ -209,10 +220,14 @@ def spawn_worker(
         sys.executable,
         __file__,
         "run",
-        "--job", job,
-        "--interval", str(interval),
-        "--worker-index", str(worker_index),
-        "--total-workers", str(total_workers),
+        "--job",
+        job,
+        "--interval",
+        str(interval),
+        "--worker-index",
+        str(worker_index),
+        "--total-workers",
+        str(total_workers),
     ]
     logger.info(f"Spawning worker: {' '.join(cmd)}")
     return subprocess.Popen(cmd)
@@ -229,7 +244,9 @@ def cli():
 @click.option("--iterate-workers", default=3, help="Number of iterate-prs workers")
 @click.option("--merge-workers", default=1, help="Number of merge-ready workers")
 @click.option("--interval", default=45, help="Seconds between job iterations")
-@click.option("--stagger", default=30, help="Seconds between worker starts within a pool")
+@click.option(
+    "--stagger", default=30, help="Seconds between worker starts within a pool"
+)
 def start_all(
     pick_workers: int,
     iterate_workers: int,
@@ -273,29 +290,33 @@ def start_all(
             p.wait()
     except KeyboardInterrupt:
         logger.info("Draining workers (waiting for current iterations to complete)...")
-        
+
         # Send SIGTERM to all workers to trigger drain mode
         for p in processes:
             if p.poll() is None:  # Still running
                 p.send_signal(signal.SIGTERM)
-        
+
         # Wait for workers to finish gracefully (with timeout)
         drain_timeout = 600  # 10 minutes max wait
         start_time = time.time()
-        
+
         while any(p.poll() is None for p in processes):
             elapsed = time.time() - start_time
             if elapsed > drain_timeout:
-                logger.warning(f"Drain timeout ({drain_timeout}s) exceeded, force killing...")
+                logger.warning(
+                    f"Drain timeout ({drain_timeout}s) exceeded, force killing..."
+                )
                 for p in processes:
                     if p.poll() is None:
                         p.kill()
                 break
-            
+
             remaining = sum(1 for p in processes if p.poll() is None)
-            logger.info(f"Waiting for {remaining} workers to finish (elapsed: {int(elapsed)}s)...")
+            logger.info(
+                f"Waiting for {remaining} workers to finish (elapsed: {int(elapsed)}s)..."
+            )
             time.sleep(5)
-        
+
         logger.info("All workers stopped.")
 
 
@@ -308,14 +329,16 @@ def start_all(
 )
 @click.option("--interval", default=45, help="Seconds between iterations")
 @click.option("--worker-index", default=0, help="This worker's index (for sharding)")
-@click.option("--total-workers", default=1, help="Total workers of this job type (for sharding)")
+@click.option(
+    "--total-workers", default=1, help="Total workers of this job type (for sharding)"
+)
 def run(job: str, interval: int, worker_index: int, total_workers: int):
     """Run a single job type in a loop."""
     global _draining
-    
+
     # Register signal handler for graceful shutdown
     signal.signal(signal.SIGTERM, _handle_drain_signal)
-    
+
     worker_id = f"{job}-{worker_index}"
     logger.info(f"[{worker_id}] Starting worker loop (interval={interval}s)")
 
@@ -333,15 +356,15 @@ def run(job: str, interval: int, worker_index: int, total_workers: int):
         # Check drain flag before sleeping
         if _draining:
             break
-            
+
         logger.info(f"[{worker_id}] Sleeping {interval}s...")
-        
+
         # Sleep in small increments to respond to drain signal faster
         for _ in range(interval):
             if _draining:
                 break
             time.sleep(1)
-    
+
     logger.info(f"[{worker_id}] Draining complete, exiting gracefully.")
 
 
@@ -353,7 +376,9 @@ def tb_pick_work_cmd(worker_index: int):
 
 
 @cli.command("tb-iterate-prs")
-@click.option("--worker-index", default=0, help="Worker index for sharding and worktree")
+@click.option(
+    "--worker-index", default=0, help="Worker index for sharding and worktree"
+)
 @click.option("--total-workers", default=1, help="Total workers for sharding")
 def tb_iterate_prs_cmd(worker_index: int, total_workers: int):
     """Run iterate-prs once (for testing)."""