From 2e3f0cf16a33f9f2df2d369931aeba9171d0b0e1 Mon Sep 17 00:00:00 2001
From: vmisirlis010 <83094238+vmisirlis010@users.noreply.github.com>
Date: Sat, 11 Mar 2023 12:59:38 +0200
Subject: [PATCH] added sklearn linearRegression to be able to run the
experiment code
---
MLFLow/Sklearn-Regression-MLFlow.ipynb | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/MLFLow/Sklearn-Regression-MLFlow.ipynb b/MLFLow/Sklearn-Regression-MLFlow.ipynb
index 08a4dfa..7901d82 100644
--- a/MLFLow/Sklearn-Regression-MLFlow.ipynb
+++ b/MLFLow/Sklearn-Regression-MLFlow.ipynb
@@ -1 +1 @@
-{"cells":[{"cell_type":"markdown","source":["# Data Science with Python \n## Regression \n\nIn this demo we are going to look at a regression algorthm. \nRegression is typicaly a supervised Machine Learning technique (refer back to the slides for a definition). \n\nIn this demo, we will explore Regression with linear regression. We will use a series of modules:\n\n**matplotlib** - This module will allow us to visualise the output of our model. We will want to examine the data in 2 dimensions, we could do more but that will do for now. Interested in more dimensions? Ask me about PCA. \n**numpy** - Statistical package for working with numbers. \n**sklearn** -sklearn is one of the most used modules for general machine learning. Shallow learning. We can talk more about deep learning another time. \n\nOk. Lets begin by looking at importing those modules.\n\n[Original example](http://scikit-learn.org/stable/auto_examples/linear_model/plot_ols.html)"],"metadata":{"deletable":true,"editable":true}},{"cell_type":"code","source":["import matplotlib.pyplot as plt\nimport numpy as np\nfrom sklearn import datasets, linear_model\nfrom sklearn.metrics import mean_squared_error, r2_score\nimport pandas as pd\nimport os\nimport mlflow\nfrom math import sqrt\n\n# Set the experiment name to an experiment in the shared experiments folder\n\nmlflow.set_experiment(\"/mlflow/regressionDiabetes\")\n"],"metadata":{"collapsed":false,"deletable":true,"editable":true},"outputs":[{"metadata":{},"output_type":"display_data","data":{"text/html":["\n
INFO: '/mlflow/regressionDiabetes' does not exist. Creating a new experiment\n
"]}}],"execution_count":2},{"cell_type":"markdown","source":["Load a sample dataset. This will use the diabete dataset."],"metadata":{"deletable":true,"editable":true}},{"cell_type":"code","source":["# Load the diabetes dataset\ndiabetes = datasets.load_diabetes()\n"],"metadata":{"collapsed":true,"deletable":true,"editable":true},"outputs":[{"metadata":{},"output_type":"display_data","data":{"text/html":["\n"]}}],"execution_count":4},{"cell_type":"markdown","source":["For ease, lets load this in to a Padnas DataFrame and look at the top few rows."],"metadata":{"deletable":true,"editable":true}},{"cell_type":"code","source":["diabetespd = pd.DataFrame(data=diabetes.data)\ndiabetespd.to_csv('diabetes.txt', encoding='utf-8', index=False)\n"],"metadata":{"collapsed":false,"deletable":true,"editable":true},"outputs":[{"metadata":{},"output_type":"display_data","data":{"text/html":["\n"]}}],"execution_count":6},{"cell_type":"code","source":["diabetespd.head()"],"metadata":{"collapsed":false,"deletable":true,"editable":true},"outputs":[{"metadata":{},"output_type":"display_data","data":{"text/html":["\nOut[4]: \n 0 1 2 3 4 5 6 \\\n0 0.038076 0.050680 0.061696 0.021872 -0.044223 -0.034821 -0.043401 \n1 -0.001882 -0.044642 -0.051474 -0.026328 -0.008449 -0.019163 0.074412 \n2 0.085299 0.050680 0.044451 -0.005671 -0.045599 -0.034194 -0.032356 \n3 -0.089063 -0.044642 -0.011595 -0.036656 0.012191 0.024991 -0.036038 \n4 0.005383 -0.044642 -0.036385 0.021872 0.003935 0.015596 0.008142 \n\n 7 8 9 \n0 -0.002592 0.019908 -0.017646 \n1 -0.039493 -0.068330 -0.092204 \n2 -0.002592 0.002864 -0.025930 \n3 0.034309 0.022692 -0.009362 \n4 -0.002592 -0.031991 -0.046641 \n
"]}}],"execution_count":7},{"cell_type":"code","source":["# Use only one feature\ndiabetes_X = diabetes.data[:, np.newaxis, 2]"],"metadata":{"collapsed":true,"deletable":true,"editable":true},"outputs":[{"metadata":{},"output_type":"display_data","data":{"text/html":["\n"]}}],"execution_count":8},{"cell_type":"code","source":["diabetes_X[0:5]"],"metadata":{"collapsed":false,"deletable":true,"editable":true},"outputs":[{"metadata":{},"output_type":"display_data","data":{"text/html":["\nOut[6]: \narray([[ 0.06169621],\n [-0.05147406],\n [ 0.04445121],\n [-0.01159501],\n [-0.03638469]])\n
"]}}],"execution_count":9},{"cell_type":"markdown","source":["Lets split the data into training/testing sets.\nWe will do an 80/20 split."],"metadata":{"deletable":true,"editable":true}},{"cell_type":"code","source":["with mlflow.start_run():\n # 1st idea\n diabetes_X = diabetes.data[:, np.newaxis, 2]\n \n # 2nd idea\n #diabetes_X = diabetes.data\n \n diabetes_X_train = diabetes_X[:-20]\n diabetes_X_test = diabetes_X[-20:]\n\n diabetes_y_train = diabetes.target[:-20]\n diabetes_y_test = diabetes.target[-20:]\n\n regr = linear_model\n\n #regr = linear_model.Lasso(alpha=0.1)\n #mlflow.log_param(\"alpha\", 0.1)\n \n #regr = linear_model.LassoLars(alpha=0.1)\n #mlflow.log_param(\"alpha\", 0.1)\n\n #regr = linear_model.BayesianRidge() \n\n regr.fit(diabetes_X_train, diabetes_y_train)\n\n diabetes_y_pred = regr.predict(diabetes_X_test)\n\n mlflow.log_metric(\"mse\", mean_squared_error(diabetes_y_test, diabetes_y_pred))\n mlflow.log_metric(\"rmse\", sqrt(mean_squared_error(diabetes_y_test, diabetes_y_pred)))\n mlflow.log_metric(\"r2\", r2_score(diabetes_y_test, diabetes_y_pred))\n \n mlflow.log_artifact(\"diabetes.txt\")\n"],"metadata":{"collapsed":true,"deletable":true,"editable":true},"outputs":[{"metadata":{},"output_type":"display_data","data":{"text/html":["\n"]}}],"execution_count":11}],"metadata":{"language_info":{"mimetype":"text/x-python","name":"python","pygments_lexer":"ipython3","codemirror_mode":{"name":"ipython","version":3},"version":"3.5.2","nbconvert_exporter":"python","file_extension":".py"},"name":"Sklearn-Regression-MLFlow","notebookId":1962496471401116,"kernelspec":{"display_name":"Python [conda root]","language":"python","name":"conda-root-py"},"anaconda-cloud":{}},"nbformat":4,"nbformat_minor":0}
+{"cells":[{"cell_type":"markdown","metadata":{"deletable":true,"editable":true},"source":["# Data Science with Python \n","## Regression \n","\n","In this demo we are going to look at a regression algorthm. \n","Regression is typicaly a supervised Machine Learning technique (refer back to the slides for a definition). \n","\n","In this demo, we will explore Regression with linear regression. We will use a series of modules:\n","\n","**matplotlib** - This module will allow us to visualise the output of our model. We will want to examine the data in 2 dimensions, we could do more but that will do for now. Interested in more dimensions? Ask me about PCA. \n","**numpy** - Statistical package for working with numbers. \n","**sklearn** -sklearn is one of the most used modules for general machine learning. Shallow learning. We can talk more about deep learning another time. \n","\n","Ok. Lets begin by looking at importing those modules.\n","\n","[Original example](http://scikit-learn.org/stable/auto_examples/linear_model/plot_ols.html)"]},{"cell_type":"code","execution_count":2,"metadata":{"collapsed":false,"deletable":true,"editable":true},"outputs":[{"data":{"text/html":["\n","INFO: '/mlflow/regressionDiabetes' does not exist. Creating a new experiment\n","
"]},"metadata":{},"output_type":"display_data"}],"source":["import matplotlib.pyplot as plt\n","from sklearn.linear_model import LinearRegression #added this lib\n","import numpy as np\n","from sklearn import datasets, linear_model\n","from sklearn.metrics import mean_squared_error, r2_score\n","import pandas as pd\n","import os\n","import mlflow\n","from math import sqrt\n","\n","# Set the experiment name to an experiment in the shared experiments folder\n","\n","mlflow.set_experiment(\"/mlflow/regressionDiabetes\")\n"]},{"cell_type":"markdown","metadata":{"deletable":true,"editable":true},"source":["Load a sample dataset. This will use the diabete dataset."]},{"cell_type":"code","execution_count":4,"metadata":{"collapsed":true,"deletable":true,"editable":true},"outputs":[{"data":{"text/html":["\n",""]},"metadata":{},"output_type":"display_data"}],"source":["# Load the diabetes dataset\n","diabetes = datasets.load_diabetes()\n"]},{"cell_type":"markdown","metadata":{"deletable":true,"editable":true},"source":["For ease, lets load this in to a Padnas DataFrame and look at the top few rows."]},{"cell_type":"code","execution_count":6,"metadata":{"collapsed":false,"deletable":true,"editable":true},"outputs":[{"data":{"text/html":["\n",""]},"metadata":{},"output_type":"display_data"}],"source":["diabetespd = pd.DataFrame(data=diabetes.data)\n","diabetespd.to_csv('diabetes.txt', encoding='utf-8', index=False)\n"]},{"cell_type":"code","execution_count":7,"metadata":{"collapsed":false,"deletable":true,"editable":true},"outputs":[{"data":{"text/html":["\n","Out[4]: \n"," 0 1 2 3 4 5 6 \\\n","0 0.038076 0.050680 0.061696 0.021872 -0.044223 -0.034821 -0.043401 \n","1 -0.001882 -0.044642 -0.051474 -0.026328 -0.008449 -0.019163 0.074412 \n","2 0.085299 0.050680 0.044451 -0.005671 -0.045599 -0.034194 -0.032356 \n","3 -0.089063 -0.044642 -0.011595 -0.036656 0.012191 0.024991 -0.036038 \n","4 0.005383 -0.044642 -0.036385 0.021872 0.003935 0.015596 0.008142 \n","\n"," 7 8 9 \n","0 -0.002592 0.019908 -0.017646 \n","1 -0.039493 -0.068330 -0.092204 \n","2 -0.002592 0.002864 -0.025930 \n","3 0.034309 0.022692 -0.009362 \n","4 -0.002592 -0.031991 -0.046641 \n","
"]},"metadata":{},"output_type":"display_data"}],"source":["diabetespd.head()"]},{"cell_type":"code","execution_count":8,"metadata":{"collapsed":true,"deletable":true,"editable":true},"outputs":[{"data":{"text/html":["\n",""]},"metadata":{},"output_type":"display_data"}],"source":["# Use only one feature\n","diabetes_X = diabetes.data[:, np.newaxis, 2]"]},{"cell_type":"code","execution_count":9,"metadata":{"collapsed":false,"deletable":true,"editable":true},"outputs":[{"data":{"text/html":["\n","Out[6]: \n","array([[ 0.06169621],\n"," [-0.05147406],\n"," [ 0.04445121],\n"," [-0.01159501],\n"," [-0.03638469]])\n","
"]},"metadata":{},"output_type":"display_data"}],"source":["diabetes_X[0:5]"]},{"cell_type":"markdown","metadata":{"deletable":true,"editable":true},"source":["Lets split the data into training/testing sets.\n","We will do an 80/20 split."]},{"cell_type":"code","execution_count":11,"metadata":{"collapsed":true,"deletable":true,"editable":true},"outputs":[{"data":{"text/html":["\n",""]},"metadata":{},"output_type":"display_data"}],"source":["with mlflow.start_run():\n"," # 1st idea\n"," diabetes_X = diabetes.data[:, np.newaxis, 2]\n"," \n"," # 2nd idea\n"," #diabetes_X = diabetes.data\n"," \n"," diabetes_X_train = diabetes_X[:-20]\n"," diabetes_X_test = diabetes_X[-20:]\n","\n"," diabetes_y_train = diabetes.target[:-20]\n"," diabetes_y_test = diabetes.target[-20:]\n","\n"," regr = LinearRegression()\n"," #regr = linear_model\n","\n"," #regr = linear_model.Lasso(alpha=0.1)\n"," #mlflow.log_param(\"alpha\", 0.1)\n"," \n"," #regr = linear_model.LassoLars(alpha=0.1)\n"," #mlflow.log_param(\"alpha\", 0.1)\n","\n"," #regr = linear_model.BayesianRidge() \n","\n"," regr.fit(diabetes_X_train, diabetes_y_train)\n","\n"," diabetes_y_pred = regr.predict(diabetes_X_test)\n","\n"," mlflow.log_metric(\"mse\", mean_squared_error(diabetes_y_test, diabetes_y_pred))\n"," mlflow.log_metric(\"rmse\", sqrt(mean_squared_error(diabetes_y_test, diabetes_y_pred)))\n"," mlflow.log_metric(\"r2\", r2_score(diabetes_y_test, diabetes_y_pred))\n"," \n"," mlflow.log_artifact(\"diabetes.txt\")\n"]}],"metadata":{"anaconda-cloud":{},"kernelspec":{"display_name":"Python [conda root]","language":"python","name":"conda-root-py"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.5.2"},"name":"Sklearn-Regression-MLFlow","notebookId":1962496471401116},"nbformat":4,"nbformat_minor":0}