22
33import datetime as dt
44
5- from pandas import read_csv , compat
5+ import pandas as pd
66from pandas .compat import StringIO
77
88from pandas_datareader .base import _DailyBaseReader
9+ from pandas_datareader .compat import is_list_like
910
1011
1112class MoexReader (_DailyBaseReader ):
@@ -43,21 +44,26 @@ def __init__(self, *args, **kwargs):
4344 self .start = self .start .date ()
4445 self .end_dt = self .end
4546 self .end = self .end .date ()
46- if not isinstance (self .symbols , compat .string_types ):
47- raise ValueError ("Support for multiple symbols is not yet implemented." )
47+ if not is_list_like (self .symbols ):
48+ self .symbols = [self .symbols ]
49+ self .__engines , self .__markets = {}, {} # dicts for engines and markets
4850
4951 __url_metadata = "https://iss.moex.com/iss/securities/{symbol}.csv"
5052 __url_data = "https://iss.moex.com/iss/history/engines/{engine}/" \
5153 "markets/{market}/securities/{symbol}.csv"
5254
5355 @property
5456 def url (self ):
55- """API URL"""
56- return self .__url_data .format (
57- engine = self .__engine ,
58- market = self .__market ,
59- symbol = self .symbols
60- )
57+ """Return a list of API URLs per symbol"""
58+
59+ if not self .__engines or not self .__markets :
60+ raise Exception ("Accesing url property accessed before "
61+ "invocation of read() or _get_metadata() methods" )
62+
63+ return [self .__url_data .format (
64+ engine = self .__engines [s ],
65+ market = self .__markets [s ],
66+ symbol = s ) for s in self .symbols ]
6167
6268 def _get_params (self , start ):
6369 params = {
@@ -78,96 +84,107 @@ def _get_params(self, start):
7884 return params
7985
8086 def _get_metadata (self ):
81- """ get a market and an engine for a given symbol """
82- response = self ._get_response (
83- self .__url_metadata .format (symbol = self .symbols )
84- )
85- text = self ._sanitize_response (response )
86- if len (text ) == 0 :
87- service = self .__class__ .__name__
88- raise IOError ("{} request returned no data; check URL for invalid "
89- "inputs: {}" .format (service , self .__url_metadata ))
90- if isinstance (text , compat .binary_type ):
91- text = text .decode ('windows-1251' )
92- else :
93- text = text
94-
95- header_str = 'secid;boardid;'
96- get_data = False
97- for s in text .splitlines ():
98- if s .startswith (header_str ):
99- get_data = True
100- continue
101- if get_data and s != '' :
102- fields = s .split (';' )
103- return fields [5 ], fields [7 ]
104- service = self .__class__ .__name__
105- raise IOError ("{} request returned no metadata: {}\n "
106- "Typo in security symbol `{}`?" .format (
107- service ,
108- self .__url_metadata .format (symbol = self .symbols ),
109- self .symbols
110- )
111- )
87+ """Get markets and engines for the given symbols"""
88+
89+ markets , engines = {}, {}
90+
91+ for symbol in self .symbols :
92+ response = self ._get_response (
93+ self .__url_metadata .format (symbol = symbol )
94+ )
95+ text = self ._sanitize_response (response )
96+ if len (text ) == 0 :
97+ service = self .__class__ .__name__
98+ raise IOError ("{} request returned no data; check URL for invalid "
99+ "inputs: {}" .format (service , self .__url_metadata ))
100+ if isinstance (text , pd .compat .binary_type ):
101+ text = text .decode ('windows-1251' )
102+
103+ header_str = 'secid;boardid;'
104+ get_data = False
105+ for s in text .splitlines ():
106+ if s .startswith (header_str ):
107+ get_data = True
108+ continue
109+ if get_data and s != '' :
110+ fields = s .split (';' )
111+ markets [symbol ], engines [symbol ] = fields [5 ], fields [7 ]
112+ break
113+ if symbol not in markets or symbol not in engines :
114+ raise IOError ("{} request returned no metadata: {}\n "
115+ "Typo in the security symbol `{}`?" .format (
116+ self .__class__ .__name__ ,
117+ self .__url_metadata .format (symbol = symbol ),
118+ symbol ))
119+ return markets , engines
112120
113121 def read (self ):
114122 """Read data"""
115- try :
116- self .__market , self .__engine = self ._get_metadata ()
117-
118- out_list = []
119- date_column = None
120- while True : # read in loop with small date intervals
121- if len (out_list ) > 0 :
122- if date_column is None :
123- date_column = out_list [0 ].split (';' ).index ('TRADEDATE' )
124-
125- # get the last downloaded date
126- start_str = out_list [- 1 ].split (';' , 4 )[date_column ]
127- start = dt .datetime .strptime (start_str , '%Y-%m-%d' ).date ()
128- else :
129- start_str = self .start .strftime ('%Y-%m-%d' )
130- start = self .start
131-
132- if start >= self .end or start >= dt .date .today ():
133- break
134123
135- params = self ._get_params (start_str )
136- strings_out = self ._read_url_as_String (self .url , params ) \
137- .splitlines ()[2 :]
138- strings_out = list (filter (lambda x : x .strip (), strings_out ))
139-
140- if len (out_list ) == 0 :
141- out_list = strings_out
142- if len (strings_out ) < 101 :
143- break
144- else :
145- out_list += strings_out [1 :] # remove CSV head line
146- if len (strings_out ) < 100 :
124+ try :
125+ self .__markets , self .__engines = self ._get_metadata ()
126+ urls = self .url # generate urls per symbols
127+ dfs = [] # an array of pandas dataframes per symbol to concatenate
128+
129+ for i , symbol in enumerate (self .symbols ):
130+ out_list = []
131+ date_column = None
132+
133+ while True : # read in a loop with small date intervals
134+ if len (out_list ) > 0 :
135+ if date_column is None :
136+ date_column = out_list [0 ].split (';' ).index ('TRADEDATE' )
137+
138+ # get the last downloaded date
139+ start_str = out_list [- 1 ].split (';' , 4 )[date_column ]
140+ start = dt .datetime .strptime (start_str , '%Y-%m-%d' ).date ()
141+ else :
142+ start_str = self .start .strftime ('%Y-%m-%d' )
143+ start = self .start
144+
145+ if start >= self .end or start >= dt .date .today ():
147146 break
148- str_io = StringIO ('\r \n ' .join (out_list ))
149- df = self ._read_lines (str_io )
150- return df
147+
148+ params = self ._get_params (start_str )
149+ strings_out = self ._read_url_as_String (urls [i ], params ) \
150+ .splitlines ()[2 :]
151+ strings_out = list (filter (lambda x : x .strip (), strings_out ))
152+
153+ if len (out_list ) == 0 :
154+ out_list = strings_out
155+ if len (strings_out ) < 101 : # all data received - break
156+ break
157+ else :
158+ out_list += strings_out [1 :] # remove a CSV head line
159+ if len (strings_out ) < 100 : # all data recevied - break
160+ break
161+ str_io = StringIO ('\r \n ' .join (out_list ))
162+ dfs .append (self ._read_lines (str_io )) # add a new DataFrame
151163 finally :
152164 self .close ()
153165
166+ if len (dfs ) > 1 :
167+ return pd .concat (dfs , axis = 0 , join = 'outer' , sort = True )
168+ else :
169+ return dfs [0 ]
170+
154171 def _read_url_as_String (self , url , params = None ):
155- """ Open url (and retry) """
172+ """ Open an url (and retry) """
173+
156174 response = self ._get_response (url , params = params )
157175 text = self ._sanitize_response (response )
158176 if len (text ) == 0 :
159177 service = self .__class__ .__name__
160178 raise IOError ("{} request returned no data; check URL for invalid "
161179 "inputs: {}" .format (service , self .url ))
162- if isinstance (text , compat .binary_type ):
163- out = text .decode ('windows-1251' )
164- else :
165- out = text
166- return out
180+ if isinstance (text , pd .compat .binary_type ):
181+ text = text .decode ('windows-1251' )
182+ return text
167183
168184 def _read_lines (self , input ):
169- """ return pandas DataFrame from input """
170- rs = read_csv (input , index_col = 'TRADEDATE' , parse_dates = True , sep = ';' ,
185+ """ Return a pandas DataFrame from input """
186+
187+ rs = pd .read_csv (input , index_col = 'TRADEDATE' , parse_dates = True , sep = ';' ,
171188 na_values = ('-' , 'null' ))
172189 # Get rid of unicode characters in index name.
173190 try :
0 commit comments