forked from amanirmk/AMISTAD-intention-exp1
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathhelpData.py
More file actions
188 lines (170 loc) · 7.69 KB
/
helpData.py
File metadata and controls
188 lines (170 loc) · 7.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
import matplotlib.pyplot as plt
import seaborn as sb
import csv
import pandas as pd
import numpy as np
import statistics as stats
import math as m
import copy
import ast
from KDEpy import FFTKDE
def filterDataFrame(data, filterlist):
data = copy.deepcopy(data)
for param, value in filterlist:
booleans = data[param] == value
data = data[booleans]
return data
def combineCSVs(fileNameToCreate, csvFileNameList):
"""Combines the specified csv files into one big csv file.
Note: only works for csvs with exactly 15 columns.
Inputs:
fileNameToCreate: the file name for the combined csvs
csvFolderName: the name of the folder that contains all the csvs to be
appended."""
dfList = []
for fileName in csvFileNameList:
df = pd.read_csv(fileName, header=0, index_col=None, usecols=range(0, 15)) # uses columns from 0 to 15 only.
dfList.append(df)
resultDf = pd.concat(dfList) # concatenate dataframe versions of csv
resultDf.to_csv(fileNameToCreate, index=False)
return resultDf, dfList
def allDataToCSV(allData, filename):
"""takes in a data list obtained from simulateManySetups and writes all of the run information into a csv"""
with open(filename, 'w', newline='') as csvfile:
fieldnames = ["BATCH #"] + ["RUN #"] + [key for key in allData[0]["runsData"][0]]
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for batchIndex in range(len(allData)):
batchData = allData[batchIndex]
runsData = batchData["runsData"]
for runIndex in range(len(runsData)):
runDict = runsData[runIndex]
runDict["BATCH #"] = batchIndex
runDict["RUN #"] = runIndex
writer.writerow(runDict)
def appendDataToCSV(allData, filename):
with open(filename, 'a', newline='') as csvfile:
fieldnames = ["BATCH #"] + ["RUN #"] + [key for key in allData[0]["runsData"][0]]
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
for batchIndex in range(len(allData)):
batchData = allData[batchIndex]
runsData = batchData["runsData"]
for runIndex in range(len(runsData)):
runDict = runsData[runIndex]
runDict["BATCH #"] = batchIndex
runDict["RUN #"] = runIndex
writer.writerow(runDict)
def histFromCSV(filename, param):
"""Inputs:
filename: name of csv to read from
param: String, name of column in csv file"""
df = pd.read_csv(filename)
countSeries = df[param]
countList = [int(count) for count in countSeries]
plt.hist(countList, histtype='bar', orientation= 'vertical')
plt.xlabel(param)
plt.ylabel("Number of trials")
plt.title(param + " distribution")
plt.show()
def lifeTimeStatsFromCSV(filename, groupParam):
"""Makes a point plot overlayed on a strip plot with statistics regarding the average life time of prey, grouped by the specified parameter"""
df = pd.read_csv(filename)
prey_df = df.groupby(groupParam)["preyCountOverTime"]
allGroups = []
allLifeTimes = []
extraData = []
for param, group in prey_df:
groupLifeTimes = []
for countStr in group:
countList = strToNumList(countStr)
groupLifeTimes += lifeTimes(countList)
avg, std, ci = listStats(groupLifeTimes)
allGroups += [str(param) + "\nCI: "+ str((int(ci[0]), int(ci[1])))]*len(groupLifeTimes)
allLifeTimes += groupLifeTimes
extraData.append([str(groupParam) + " " + str(param), avg, std, ci, groupLifeTimes])
data = {groupParam: allGroups, "lifespan (time steps)": allLifeTimes}
data_df = pd.DataFrame(data, columns=[groupParam, "lifespan (time steps)"])
numGroups = len(df[groupParam].unique())
colorList = [color for color in plt.cm.jet(np.linspace(0, 0.9, numGroups))]
ax = sb.pointplot(x=groupParam, y="lifespan (time steps)", data=data_df, estimator=np.mean, ci=95, palette=colorList, errwidth=2, capsize=0.05)
sb.catplot(x=groupParam, y="lifespan (time steps)", data=data_df, ax=ax, palette=colorList)
plt.close()
plt.show()
return extraData
def lifeTimes(countList):
"""takes in a prey count over time list and returns lifetimes of prey"""
currentNumPrey = countList[0]
preyLifeTimes = []
for i in range(len(countList)):
if countList[i] < currentNumPrey:
for j in range(currentNumPrey - countList[i]):
preyLifeTimes.append(i)
currentNumPrey = countList[i]
for i in range(countList[-1]):
preyLifeTimes.append(len(countList))
return preyLifeTimes
def listStats(numList):
"""takes in a list of numbers and returns [average, standard deviation, 95% confidence interval]"""
avg = stats.mean(numList)
std = stats.stdev(numList)
sem = std / m.sqrt(len(numList))
z = 1.96 # 95% ci
ci = (avg - z*sem, avg + z*sem)
return [avg, std, ci]
def survivalGraphFromCSV(filename, groupParam):
"""takes in a csv file of runs and plots prey count over time, organized by color with respect to the indicated grouping parameter (ie targetedAware)"""
df = pd.read_csv(filename)
numGroups = len(df[groupParam].unique())
prey_df = df.groupby(groupParam)["preyCountOverTime"] # get the numpy series with preyCountOverTime column for all values grouped according to groupParam
colorIter=iter(plt.cm.jet(np.linspace(0, 0.9, numGroups))) # only set label for first in each group, keep this color till we get to the next group
for param, group in prey_df: # loop through groups. param is the thing we are grouping by. group is the group itself
color = next(colorIter) # change the color with new group
firstInGroup = True
for countStr in group: # loop through preyCountOverTime string "lists" within a group. Iterating though group gives values!
counts = strToNumList(countStr)
if firstInGroup: # change label if first in group
plt.plot(counts, label=groupParam + " " + str(param), color=color)
else:
plt.plot(counts, color=color)
firstInGroup = False
plt.xlabel("time (# of steps)")
plt.ylabel("population (# of prey)")
plt.title("Prey Population Over Time")
plt.legend()
plt.show()
def strToNumList(listStr):
listStr = listStr.replace(" ","")
listStr = listStr.strip("[]")
listNum = [int(num) for num in listStr.split(",")]
return listNum
def heatMapFromCSV(fileName, groupParam1, groupParam2):
"""
Inputs:
fileName: string, the csv file name
groupParam1: string, the first variable
groupParam2: string, the second variable to consider"""
df = pd.read_csv(fileName)
data = df.pivot_table(index=groupParam1, columns=groupParam2, values='stepCount')
heat_map = sb.heatmap(data, annot=True)
plt.show()
def avgTargetInfo(targetInfo, graph=False):
probList, durationList = targetInfo
s_avg, s_std, s_ci = listStats(probList)
d_avg, d_std, d_ci = listStats(durationList)
if graph:
plt.hist(durationList)
plt.title("Distribution for Duration of Targeting")
plt.xlabel("Duration of Targeting (timesteps")
plt.ylabel("Count")
plt.show()
return s_avg, s_ci, d_avg, d_ci
def loadCautiousDict(filename):
data = pd.read_csv(filename)
paramDict = data.set_index('keys').T.to_dict('list')
for key, value in paramDict.items():
prob, length = ast.literal_eval(value[0])
if len(length) > 200:
kde = FFTKDE(kernel='gaussian', bw='ISJ').fit(length)
kde.evaluate()
paramDict[key] = prob, kde.bw, length
return paramDict