forked from ljtheminister/MMML2014
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdata_clean.py
More file actions
59 lines (39 loc) · 1.4 KB
/
data_clean.py
File metadata and controls
59 lines (39 loc) · 1.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import pandas as pd
import numpy as np
from numpy import ndarray
from collections import defaultdict
# read data
reg_results = pd.read_csv('regular_season_results.csv', header=0)
tourney_results = pd.read_csv('tourney_results.csv', header=0)
seasons = pd.read_csv('seasons.csv', header=0)
teams = pd.read_csv('teams.csv', header=0)
seeds = pd.read_csv('tourney_seeds.csv', header=0)
slots = pd.read_csv('tourney_slots.csv', header=0)
# dictionary for win-loss percentage
data = dict()
for season in seasons['season']:
data[season] = dict()
for row_idx in xrange(len(reg_results)):
game = reg_results.ix[row_idx, :]
season = game['season']
wteam = game['wteam']
lteam = game['lteam']
wins = data[season][wteam].get('W', 0)
data[season][wteam]['W'] = wins + 1
losses = data[season][lteam].get('L', 0)
data[season][lteam]['L'] = losses + 1
wteam_opps = data[season][wteam].get('opponents', [])
lteam_opps = data[season][lteam].get('opponents', [])
wteam_opps.append(lteam)
lteam_opps.append(wteam)
data[season][wteam]['opponents'] = wteam_opps
data[season][lteam]['opponents'] = lteam_opps
# calculate SOS for each season
SOS = dict()
for season in seasons['season']:
SOS[season] = dict()
for row_idx in xrange(len(reg_results)):
game = reg_results.ix[row_idx, :]
season = game['season']
wteam = game['wteam']
lteam = game['lteam']