diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc index ebbd53a..0a1fb5c 100644 Binary files a/__pycache__/__init__.cpython-36.pyc and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_load_data/__pycache__/__init__.cpython-36.pyc b/q01_load_data/__pycache__/__init__.cpython-36.pyc index 745b533..6168cff 100644 Binary files a/q01_load_data/__pycache__/__init__.cpython-36.pyc and b/q01_load_data/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_load_data/__pycache__/build.cpython-36.pyc b/q01_load_data/__pycache__/build.cpython-36.pyc index 108e4a3..f4c6e05 100644 Binary files a/q01_load_data/__pycache__/build.cpython-36.pyc and b/q01_load_data/__pycache__/build.cpython-36.pyc differ diff --git a/q01_load_data/build.py b/q01_load_data/build.py index e4cd8e3..8619b56 100644 --- a/q01_load_data/build.py +++ b/q01_load_data/build.py @@ -1,10 +1,19 @@ +# %load q01_load_data/build.py # Default imports import pandas as pd from sklearn.model_selection import train_test_split -path = 'data/house_prices_multivariate.csv' +# Write your solution here +def load_data(path, test_s=0.33, Random_state = 9): + df = pd.read_csv(path) + X = df.iloc[:,:-1] + y = df.iloc[:,-1] + X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = test_s, random_state= Random_state) + return df, X_train, X_test, y_train, y_test + + + -# Write your solution here diff --git a/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc b/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc index 133357e..1c5fe15 100644 Binary files a/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc and b/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_load_data/tests/__pycache__/test_q01_load_data.cpython-36.pyc b/q01_load_data/tests/__pycache__/test_q01_load_data.cpython-36.pyc index 689755b..e14b3b9 100644 Binary files a/q01_load_data/tests/__pycache__/test_q01_load_data.cpython-36.pyc and b/q01_load_data/tests/__pycache__/test_q01_load_data.cpython-36.pyc differ diff --git a/q02_Max_important_feature/__pycache__/__init__.cpython-36.pyc b/q02_Max_important_feature/__pycache__/__init__.cpython-36.pyc index 93c9119..44f63ae 100644 Binary files a/q02_Max_important_feature/__pycache__/__init__.cpython-36.pyc and b/q02_Max_important_feature/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_Max_important_feature/__pycache__/build.cpython-36.pyc b/q02_Max_important_feature/__pycache__/build.cpython-36.pyc index 2b7cfd4..94863cb 100644 Binary files a/q02_Max_important_feature/__pycache__/build.cpython-36.pyc and b/q02_Max_important_feature/__pycache__/build.cpython-36.pyc differ diff --git a/q02_Max_important_feature/build.py b/q02_Max_important_feature/build.py index 51fbde6..ca534dc 100644 --- a/q02_Max_important_feature/build.py +++ b/q02_Max_important_feature/build.py @@ -1,4 +1,7 @@ +# %load q02_Max_important_feature/build.py # Default imports +import pandas as pd +from sklearn.model_selection import train_test_split from greyatomlib.advanced_linear_regression.q01_load_data.build import load_data # We have already loaded the data for you @@ -6,3 +9,14 @@ # Write your code here +def Max_important_feature(path, test_s=0.33, Random_state = 4): + df = pd.read_csv(path) + X = df.iloc[:,:-1] + y = df.iloc[:,-1] + X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = test_s, random_state= Random_state) + return df, X_train, X_test, y_train, y_test + + + + + diff --git a/q02_Max_important_feature/tests/__pycache__/__init__.cpython-36.pyc b/q02_Max_important_feature/tests/__pycache__/__init__.cpython-36.pyc index cec58d4..9f70d68 100644 Binary files a/q02_Max_important_feature/tests/__pycache__/__init__.cpython-36.pyc and b/q02_Max_important_feature/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_Max_important_feature/tests/__pycache__/test_q02max_important_feature.cpython-36.pyc b/q02_Max_important_feature/tests/__pycache__/test_q02max_important_feature.cpython-36.pyc index cb6849b..7f14857 100644 Binary files a/q02_Max_important_feature/tests/__pycache__/test_q02max_important_feature.cpython-36.pyc and b/q02_Max_important_feature/tests/__pycache__/test_q02max_important_feature.cpython-36.pyc differ diff --git a/q03_polynomial/__pycache__/__init__.cpython-36.pyc b/q03_polynomial/__pycache__/__init__.cpython-36.pyc index aa42922..0997a5a 100644 Binary files a/q03_polynomial/__pycache__/__init__.cpython-36.pyc and b/q03_polynomial/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_polynomial/__pycache__/build.cpython-36.pyc b/q03_polynomial/__pycache__/build.cpython-36.pyc index 3be41d0..c09b9b3 100644 Binary files a/q03_polynomial/__pycache__/build.cpython-36.pyc and b/q03_polynomial/__pycache__/build.cpython-36.pyc differ diff --git a/q03_polynomial/build.py b/q03_polynomial/build.py index 26d8971..9d5e39c 100644 --- a/q03_polynomial/build.py +++ b/q03_polynomial/build.py @@ -1,11 +1,31 @@ +# %load q03_polynomial/build.py # Default imports from greyatomlib.advanced_linear_regression.q01_load_data.build import load_data from sklearn.preprocessing import PolynomialFeatures from sklearn.pipeline import make_pipeline from sklearn.linear_model import LinearRegression +import numpy as np +from sklearn.model_selection import train_test_split # We have already loaded the data for you data_set, X_train, X_test, y_train, y_test = load_data('data/house_prices_multivariate.csv') # Write your solution here +def polynomial(power=5,Random_state=9): + linear_pipe = make_pipeline(PolynomialFeatures(degree=power,include_bias=False), LinearRegression()) + + cols = data_set.corr()['SalePrice'].drop('SalePrice').sort_values(ascending = False)[0:4].index + X = data_set[cols] +# print(X.shape) + y = data_set['SalePrice'] + X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=9, test_size = 0.33) + + linear_pipe.fit(X_train,y_train) + print(linear_pipe.predict(np.array([4, 5, 6, 7]).reshape(1,-1))) + return linear_pipe + + +polynomial(power=5,Random_state=9) + + diff --git a/q03_polynomial/tests/__pycache__/__init__.cpython-36.pyc b/q03_polynomial/tests/__pycache__/__init__.cpython-36.pyc index 6e20876..412cf74 100644 Binary files a/q03_polynomial/tests/__pycache__/__init__.cpython-36.pyc and b/q03_polynomial/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_polynomial/tests/__pycache__/test_q03_polynomial.cpython-36.pyc b/q03_polynomial/tests/__pycache__/test_q03_polynomial.cpython-36.pyc index ef8c88b..a85eea9 100644 Binary files a/q03_polynomial/tests/__pycache__/test_q03_polynomial.cpython-36.pyc and b/q03_polynomial/tests/__pycache__/test_q03_polynomial.cpython-36.pyc differ