-
Notifications
You must be signed in to change notification settings - Fork 8
Expand file tree
/
Copy pathteamdiff.py
More file actions
243 lines (162 loc) · 6.24 KB
/
teamdiff.py
File metadata and controls
243 lines (162 loc) · 6.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
# -*- coding: utf8 -*-
'''
teamdiff.py
Determine the uniqueness of a team given a control team.
---
Joe Nudell, 2013
'''
# local
import eplstats
import optimize_roster as optr
# 3rd party
import numpy as np
# stdlib
import os
import re
import codecs
import argparse
from sys import stderr, exit
def _get_line_slices(formatstr):
'''Get the fixed-width line break points from a format string'''
ms = re.findall(r'\{.+?(\d+?)\}', formatstr)
return [int(b) for b in ms]
def _slice_line(line, points):
'''Slice a line into the pieces as specified by break points in `points`'''
slices = []
i = 0
for point in points:
slices.append(line[i:i+point].strip())
i += point
return slices
def read_team_file(fh):
'''Read the team roster from the given file object. Should be in the
format that is outputted by optimize_roster.'''
keys = []
roster = []
slice_points = _get_line_slices(optr.roster_line_format)
for i, line in enumerate(fh.readlines()):
if i == 0 :
# Header row: read as keys
keys = _slice_line(line, slice_points)
# Make header names easier to work with
keys = [re.sub(r'\s+', '_', k.lower()) for k in keys]
continue
elif i == 1:
# These are just delimeters
continue
else:
# Interpret content line
slices = _slice_line(line, slice_points)
# Adjust types
slices[-1] = float(slices[-1])
slices[-3] = len(slices[-3])>0
# Make dict using keys from header
new_player = dict(zip(keys, slices))
roster.append(new_player)
return roster
def team_similarity(roster1, roster2, players, freqfield='ownership'):
'''Compare the rosters of two teams using statistics provided in the list
`players`. Comparison is cosine similarity of TF-IDF vectors created from
these rosters. Return value is in [0, 1]. A value of 1 means that the
teams are identical. A value of 0 means that teams share no players in
common.'''
v1 = np.zeros(len(players))
v2 = np.zeros(len(players))
for i, player in enumerate(players):
# Calculate inverse frequency of player selection for all players
# selected in both teams. Same idea as TF-IDF in document similarity.
in_team_one = optr.player_in_roster(player, roster1) is not None
in_team_two = optr.player_in_roster(player, roster2) is not None
if not in_team_one and not in_team_two:
continue
freq = float(getattr(player, freqfield))
ipf = np.log(1./freq)
if in_team_one:
v1[i] = ipf
if in_team_two:
v2[i] = ipf
# TODO - verify that there are 15 elements in both teams?
# Calculate cos(θ) between vectors
cos = np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))
return cos
def get_player_stats(source, username=None, password=None):
'''Execute downloading of all player stats from provided source.'''
positions = ['forwards', 'midfielders', 'defenders', 'keepers']
downloader = eplstats.Downloader(source=source,
username=username, password=password)
all_players = []
for position in positions:
print >>stderr, " Getting stats about %s ..." % position
_partial_players = downloader.get(position,
source=source, season=season, adjustments=None)
all_players += _partial_players
return all_players
def score_team(roster, players, field="total_points"):
'''Calculate fantasy score of team'''
score = 0.
for i, player in enumerate(players):
in_team = optr.player_in_roster(player, roster) is not None
if in_team:
score += getattr(player, field)
return score
if __name__=='__main__':
'''Find the uniqueness of two teams.'''
# defaults
season = 2014
username = ''
password = ''
source = 'espn'
score = 'total_points'
parser = argparse.ArgumentParser(description=__doc__)
# Required positionals:
parser.add_argument('path_to_team1', type=str, nargs=1,
help="Path to a team roster")
parser.add_argument('path_to_team2', type=str, nargs=1,
help="Path to another team roster")
# Optional arguments
parser.add_argument('-y', '--season', type=int, default=season,
help="ESPN endpoint only currently supports 2014 season")
parser.add_argument('-u', '--username', type=str, default=username,
help="Username (for official EPL site)")
parser.add_argument('-p', '--password', type=str, default=password,
help="Password (for official EPL site)")
parser.add_argument('-w', '--source', type=str, default=source,
help="Stats source website. ESPN and EPL are supported.")
parser.add_argument('-s', '--score', type=str, default=score,
help="Attribute to calculate expected team score from")
cli = parser.parse_args()
fn1 = cli.path_to_team1[0]
fn2 = cli.path_to_team2[0]
# Make sure paths to files provided exist
for fn in [fn1, fn2]:
if not os.path.exists(fn):
raise IOError("Can't find %s" % fn)
# Run stats downloader
print >>stderr, "Fetching stats from %s ..." % cli.source
players = get_player_stats(cli.source,
username=cli.username, password=cli.password)
print >>stderr, "Done."
# Get team rosters
print >>stderr, "Processing team files ...",
with codecs.open(fn1, 'r', 'utf8') as fh:
roster1 = read_team_file(fh)
with codecs.open(fn2, 'r', 'utf8') as fh:
roster2 = read_team_file(fh)
print >>stderr, "done."
# Calculate similarity
print >>stderr, "Calculating similarity ...",
similarity = team_similarity(roster1, roster2, players)
print >>stderr, "done."
# Calculate fantasy score expected by teams
print >>stderr, "Calculating expected scores ...",
score1 = score_team(roster1, players, field=cli.score)
score2 = score_team(roster2, players, field=cli.score)
print >>stderr, "done."
# Display result
print
print "Team similarity:", similarity
print
print "Expected season fantasy scores:"
print " Team 1", "\t", score1
print " Team 2", "\t", score2
print