-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathassignment.py
More file actions
63 lines (59 loc) · 4.52 KB
/
assignment.py
File metadata and controls
63 lines (59 loc) · 4.52 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import numpy as np
import pandas as pd
def assign (allItems, itemType, labeling, scoreMtx, support_minScore, assign_minPctSupport, unassign_maxPctSupport,
allContexts, allTemplates, uniqueTemplateAssignment = True):
assignment = list ()
for context in allContexts:
contextAssignment = list (); idxList = list ()
for temp in allTemplates:
if temp not in labeling:
continue
if context not in labeling[temp]:
continue
if itemType == "feature" or itemType == "edge":
score = scoreMtx[:, :, labeling[temp][context]]
pctSupport = pd.DataFrame ((score > support_minScore).mean (axis = 2).T, columns = allTemplates)
supported = pctSupport.loc[(pctSupport[temp] > assign_minPctSupport) & (pctSupport.drop (temp, axis = 1) < unassign_maxPctSupport).all (axis = 1)]
avgScore = pd.DataFrame (score[:, supported.index, :].mean (axis = 2).T, index = supported.index, columns = allTemplates)
elif itemType == "sample":
score = scoreMtx[:, labeling[temp][context], :]
pctSupport = pd.DataFrame ((score > support_minScore).mean (axis = 1).T, columns = allTemplates)
supported = pctSupport.loc[(pctSupport[temp] > assign_minPctSupport) & (pctSupport.drop (temp, axis = 1) < unassign_maxPctSupport).all (axis = 1)]
avgScore = pd.DataFrame (score[:, :, supported.index].mean (axis = 1).T, index = supported.index, columns = allTemplates)
else:
raise ValueError
infoMtx = pd.DataFrame ({"context": context, "template": temp}, index = avgScore.index)
supported = pd.concat ([pd.DataFrame ({itemType: [allItems[x] for x in avgScore.index]}, index = avgScore.index), infoMtx,
avgScore.rename (columns = {temp: f"avgScore_{temp}" for temp in allTemplates}).round (3),
supported.rename (columns = {temp: f"pctSupport_{temp}" for temp in allTemplates}).round (3)],
axis = 1)
if not supported.empty:
contextAssignment.append (supported.reset_index ())
if len (contextAssignment) > 0:
contextAssignment = pd.concat (contextAssignment, axis = 0, ignore_index = True)
assigned = pd.DataFrame ({"template": contextAssignment["template"],
"by_avgScore": contextAssignment.filter (regex = "^avgScore_").idxmax (axis = 1).str.split ("avgScore_", expand = True)[1],
"by_pctSupport": contextAssignment.filter (regex = "^pctSupport_").idxmax (axis = 1).str.split ("pctSupport_", expand = True)[1]})
contextAssignment = contextAssignment.loc[(assigned["template"] == assigned["by_avgScore"]) & (assigned["template"] == assigned["by_pctSupport"])]
if uniqueTemplateAssignment:
multiTemp = contextAssignment.value_counts ("index"); multiTemp = multiTemp[multiTemp > 1].index
for idx in multiTemp:
tmp = contextAssignment.loc[contextAssignment["index"] == idx]
itemScore = pd.Series ([tmp.loc[i, f"avgScore_{tmp.loc[i, 'template']}"] for i in tmp.index], index = tmp.index).sort_values (ascending = False)
if itemScore.iloc[0] / itemScore.iloc[1] > 2:
idxList.append (itemScore.index[0])
if len (idxList) == 0:
contextAssignment = contextAssignment.loc[~contextAssignment["index"].isin (multiTemp)]
else:
contextAssignment = pd.concat ([contextAssignment.loc[~contextAssignment["index"].isin (multiTemp)], contextAssignment.loc[idxList]], axis = 0)
if not contextAssignment.empty:
assignment.append (contextAssignment)
if len (assignment) == 0:
assignment = pd.concat ([pd.DataFrame ({"index": pd.Series (dtype = int)}),
pd.DataFrame (columns = [itemType, "context", "template"], dtype = str),
pd.DataFrame (columns = [f"avgScore_{temp}" for temp in allTemplates], dtype = float),
pd.DataFrame (columns = [f"pctSupport_{temp}" for temp in allTemplates], dtype = float)],
axis = 1)
else:
assignment = pd.concat (assignment, axis = 0).sort_values (["index", "context", "template"]).reset_index (drop = True)
return assignment