-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathalgorithm.py
More file actions
133 lines (120 loc) · 7.1 KB
/
algorithm.py
File metadata and controls
133 lines (120 loc) · 7.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import pandas as pd
from model import Investment
# Global constants
numeric_columns = ['Total Environmental Cost', 'Working Capacity', 'Fish Production Capacity', 'Crop Production Capacity', 'Meat Production Capacity',
'Biodiversity', 'Abiotic Resources', 'Water production capacity (Drinking water & Irrigation Water)', 'Wood Production Capacity']
# Read and process data
investment_df=pd.read_csv("data/final_raw_sample_0_percent.csv")
investment_df = investment_df.replace(',','', regex=True)
for col in numeric_columns:
investment_df[col] = investment_df[col].map(lambda x: x[1:-1])
investment_df = investment_df[investment_df[col] != ""]
investment_df = investment_df[investment_df[col] != " "]
investment_df[col] = investment_df[col].astype('int')
# Main algorithm
def computeRank(i): # Value of i decides if something is to be returned or not
global investment_df
ranking_columns = ['Total Environmental Cost', 'Working Capacity', 'Fish Production Capacity', 'Crop Production Capacity', 'Meat Production Capacity','Biodiversity', 'Abiotic Resources', 'Water production capacity (Drinking water & Irrigation Water)', 'Wood Production Capacity']
bin_investment_df = investment_df.copy(deep=True)
for col in ranking_columns:
if(col=="Total Environmental Cost"):
bin_investment_df[col] = 4*pd.qcut(investment_df[col].rank(method='first'), 4, labels=False)
elif(col=="Biodiversity"):
bin_investment_df[col] = 2*pd.qcut(investment_df[col].rank(method='first'), 4, labels=False)
else:
bin_investment_df[col] = pd.qcut(investment_df[col].rank(method='first'), 4, labels=False)
investment_df['mean'] = bin_investment_df.iloc[:, 6:13].mean(axis=1)
if(i):
return bin_investment_df
def addInvestment(investment_descriptor):
global investment_df
investment = {'Company Name': investment_descriptor.company_name,
'Country': investment_descriptor.country,
'Total Environmental Cost': investment_descriptor.total_env_cost,
'Working Capacity': investment_descriptor.work_capacity,
'Fish Production Capacity': investment_descriptor.fish_prod_capacity,
'Crop Production Capacity': investment_descriptor.crop_prod_capacity,
'Meat Production Capacity': investment_descriptor.meat_prod_capacity,
'Biodiversity': investment_descriptor.biodiversity,
'Abiotic Resources': investment_descriptor.abio_rescs,
'Water production capacity (Drinking water & Irrigation Water)': investment_descriptor.water_prod_capacity,
'Wood Production Capacity': investment_descriptor.wood_prod_capacity}
investment_df = investment_df.append(investment, ignore_index = True)
# compute rank with new company
computeRank(False)
def investmentClassifier(investment_descriptor):
global investment_df
# add unknown company
if not investment_df['Company Name'].isin([investment_descriptor.company_name]).any() and not investment_df['Year'].isin([investment_descriptor.country]).any():
addInvestment(investment_descriptor)
# find rank of input company
company_rank = investment_df[investment_df['Company Name'] == investment_descriptor.company_name]['mean'].mean()
rounded_company_rank = round(company_rank)
rounded_company_rank = 1 if rounded_company_rank < 1 else 4 if rounded_company_rank > 4 else rounded_company_rank
return rounded_company_rank
#Calculate initial means for topFive
dummy_run = Investment('dude test company', 'Switzerland', 55143243, 0, 11456, 646758, 0, 2061, 3661, 5828063, 4)
investmentClassifier(dummy_run)
# Get most sustainable firms
def topFive():
global investment_df
calculation=investment_df
top= calculation[calculation["mean"]==1].head()
return zip(top["Company Name"],top["Country"])
# Get suggestions for better firms
def getSuggestions(curr_rank):
global investment_df
possible_upgrades_df = investment_df[round(investment_df['mean']) < curr_rank]
possible_upgrades = []
if not possible_upgrades_df.empty: # there are better options
possible_upgrades_df = possible_upgrades_df.sample(n=3)
possible_upgrades = possible_upgrades_df['Company Name'].tolist()
return possible_upgrades
else: # smallest possible bin - so best we can do is same level
possible_upgrades_df = investment_df[investment_df['mean'] == curr_rank]
possible_upgrades_df = possible_upgrades_df.sample(n=3)
possible_upgrades = possible_upgrades_df['Company Name'].tolist()
return possible_upgrades
# Get summarizing descriptions
def good_parameters(investment_descriptor):
parameters=[]
ranking_columns = ['Total Environmental Cost', 'Working Capacity', 'Fish Production Capacity', 'Crop Production Capacity', 'Meat Production Capacity','Biodiversity', 'Abiotic Resources', 'Water production capacity (Drinking water & Irrigation Water)', 'Wood Production Capacity']
i=0
bin_investment_df=computeRank(True)
for companies in bin_investment_df["Company Name"]:
for col in ranking_columns:
if(companies==investment_descriptor.company_name):
if(col=="Total Environmental Cost" and int(bin_investment_df[bin_investment_df["Company Name"]==companies][col])<4):
parameters.append(col)
i+=1
elif(col=="Biodiversity" and int(bin_investment_df[bin_investment_df["Company Name"]==companies][col])<2):
parameters.append(col)
i+=1
elif(int(bin_investment_df[bin_investment_df["Company Name"]==companies][col])<1):
parameters.append(col)
i+=1
if(i==3):
good=parameters
parameters=[]
return good
def bad_parameters(investment_descriptor):
parameters=[]
ranking_columns = ['Total Environmental Cost', 'Working Capacity', 'Fish Production Capacity', 'Crop Production Capacity', 'Meat Production Capacity','Biodiversity', 'Abiotic Resources', 'Water production capacity (Drinking water & Irrigation Water)', 'Wood Production Capacity']
i=0
bin_investment_df=computeRank(True)
for companies in bin_investment_df["Company Name"]:
for col in ranking_columns:
if(companies==investment_descriptor.company_name):
if(col=="Total Environmental Cost" and int(bin_investment_df[bin_investment_df["Company Name"]==companies][col])>=12):
parameters.append(col)
i+=1
elif(col=="Biodiversity" and int(bin_investment_df[bin_investment_df["Company Name"]==companies][col])>=6):
parameters.append(col)
i+=1
elif(int(bin_investment_df[bin_investment_df["Company Name"]==companies][col])>=3):
parameters.append(col)
i+=1
if(i==3):
bad=parameters
parameters=[]
return bad