From 6e86a08d3f9062e014521cae7bedb4a9f36f2cc2 Mon Sep 17 00:00:00 2001 From: elmotec Date: Sun, 18 Oct 2015 09:42:58 -0400 Subject: [PATCH 1/6] Fixed test_search Removed usage of sys.stderr in mock exception which caused the stream to be closed. This in turn failed the next test. Simplified test_search to avoid the zip and the loop. --- fredapi/tests/test_fred.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/fredapi/tests/test_fred.py b/fredapi/tests/test_fred.py index a88c4e8..1f616d2 100644 --- a/fredapi/tests/test_fred.py +++ b/fredapi/tests/test_fred.py @@ -79,26 +79,32 @@ def __init__(self, rel_url, response=None, side_effect=None): - - - @@ -150,7 +156,6 @@ def setUp(self): self.fake_fred_call = fake_fred_call self.__original_urlopen = fredapi.fred.urlopen - def tearDown(self): """Cleanup.""" pass @@ -230,9 +235,9 @@ def test_invalid_kwarg_in_get_series(self, urlopen): """Test invalid keyword argument in call to get_series.""" url = '{}/series?series_id=invalid&api_key={}'.format(self.root_url, fred_api_key) - side_effect = fredapi.fred.HTTPError(url, 400, '', '', sys.stderr) + side_effect = fredapi.fred.HTTPError(url, 400, '', '', io.StringIO()) self.prepare_urlopen(urlopen, side_effect=side_effect) - with self.assertRaises(ValueError) as context: + with self.assertRaises(ValueError): self.fred.get_series('SP500', observation_start='invalid-datetime-str') self.assertFalse(urlopen.called) @@ -249,12 +254,11 @@ def test_search(self, urlopen): 'seasonal_adjustment_short']]) expected = textwrap.dedent('''\ popularity observation_start seasonal_adjustment_short - series id + series id PCPI01001 0 1969-01-01 NSA PCPI01003 0 1969-01-01 NSA PCPI01005 0 1969-01-01 NSA''') - for aline, eline in zip(actual.split('\n'), expected.split('\n')): - self.assertEqual(aline.strip(), eline.strip()) + self.assertEqual(actual.split('\n'), expected.split('\n')) if __name__ == '__main__': From 095b9f66691e0a9ec01d3c0f13c5b79f7ba482c2 Mon Sep 17 00:00:00 2001 From: elmotec Date: Sat, 25 Jul 2015 22:58:18 -0400 Subject: [PATCH 2/6] Improved handling of realtime parameters in get_series(). Arguments realtime_start and realtime_end in get_series() now cause a pandas.DataFrame to be returned with pandas.MultiIndex for realtime data. Added simple test for the new feature and documentation. Added __init__.py in fredapi.tests so it's correctly interpreted as a package. Now we could revert to python setup.py test in .travis.yml. Fixed test_invalid_kwarg_in_get_series() as we sometimes get a TypeError and sometimes a ValueError. Seems that pandas passes through whatever exception it gets, might be a good reason for this so we follow the same policy. Simplified comparison of dataframe output in tests. --- README.md | 13 ++++++++ fredapi/fred.py | 51 ++++++++++++++++++++++++------ fredapi/tests/test_fred.py | 65 +++++++++++++++++++++++++++++++++++--- 3 files changed, 114 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index d3324f7..e24b548 100644 --- a/README.md +++ b/README.md @@ -50,6 +50,18 @@ For instance, there has been three observations (data points) for the GDP of 201 This means the GDP value for Q1 2014 has been released three times. First release was on 4/30/2014 for a value of 17149.6, and then there have been two revisions on 5/29/2014 and 6/25/2014 for revised values of 17101.3 and 17016.0, respectively. +If you pass realtime_start and/or realtime_end to `get_series`, you will get a pandas.DataFrame with a pandas.MultiIndex instead of a pandas.Series. + +For instance, with observation_start and observation_end set to 2015-01-01 and +realtime_start set to 2015-01-01, one will get: +``` + GDP +obs_date rt_start rt_end +2015-01-01 2015-04-29 2015-05-28 00:00:00 17710.0 + 2015-05-29 2015-06-23 00:00:00 17665.0 + 2015-06-24 9999-12-31 17693.3 +``` + ### Get first data release only (i.e. ignore revisions) ```python @@ -83,6 +95,7 @@ this outputs: 2014-04-01 17294.7 dtype: float64 ``` + ### Get latest data known on a given date ```python diff --git a/fredapi/fred.py b/fredapi/fred.py index f4a27b1..bdae0bb 100644 --- a/fredapi/fred.py +++ b/fredapi/fred.py @@ -98,7 +98,9 @@ def get_series_info(self, series_id): info = pd.Series(root.getchildren()[0].attrib) return info - def get_series(self, series_id, observation_start=None, observation_end=None, **kwargs): + def get_series(self, series_id, observation_start=None, + observation_end=None, realtime_start=None, + realtime_end=None, **kwargs): """ Get data for a Fred series id. This fetches the latest known data, and is equivalent to get_series_latest_release() @@ -106,17 +108,25 @@ def get_series(self, series_id, observation_start=None, observation_end=None, ** ---------- series_id : str Fred series id such as 'CPIAUCSL' - observation_start : datetime or datetime-like str such as '7/1/2014', optional - earliest observation date - observation_end : datetime or datetime-like str such as '7/1/2014', optional - latest observation date + + observation_start : datetime or datetime-like str such as '7/1/2014' + earliest observation date (optional) + observation_end : datetime or datetime-like str such as '7/1/2014' + latest observation date (optional) + realtime_start : datetime or datetime-like str such as '7/1/2014' + earliest as-of date (optional) + realtime_end : datetime or datetime-like str such as '7/1/2014' + latest as-of date (optional) kwargs : additional parameters - Any additional parameters supported by FRED. You can see https://api.stlouisfed.org/docs/fred/series_observations.html for the full list + Any additional parameters supported by FRED. You can see + https://api.stlouisfed.org/docs/fred/series_observations.html + for the full list Returns ------- data : Series - a Series where each index is the observation date and the value is the data for the Fred series + a pandas Series where each index is the observation date and the + value is the data for the Fred series """ url = "%s/series/observations?series_id=%s" % (self.root_url, series_id) if observation_start is not None: @@ -126,20 +136,41 @@ def get_series(self, series_id, observation_start=None, observation_end=None, ** if observation_end is not None: observation_end = pd.to_datetime(observation_end, errors='raise') url += '&observation_end=' + observation_end.strftime('%Y-%m-%d') + if realtime_start is not None: + realtime_start = pd.to_datetime(realtime_start, errors='raise') + url += '&realtime_start=' + realtime_start.strftime('%Y-%m-%d') + if realtime_end is not None: + realtime_end = pd.to_datetime(realtime_end, errors='raise') + url += '&realtime_end=' + realtime_end.strftime('%Y-%m-%d') if kwargs.keys(): url += '&' + urlencode(kwargs) root = self.__fetch_data(url) if root is None: raise ValueError('No data exists for series id: ' + series_id) - data = {} + realtime = (realtime_start or realtime_end) + values = [] + obsdates = [] + rtstarts = [] + rtends = [] for child in root.getchildren(): val = child.get('value') if val == self.nan_char: val = float('NaN') else: val = float(val) - data[self._parse(child.get('date'))] = val - return pd.Series(data) + values.append(val) + obsdates.append(self._parse(child.get('date'))) + if realtime: + rtstarts.append(self._parse(child.get('realtime_start'))) + rtends.append(self._parse(child.get('realtime_end'))) + if realtime: + names = ['obs_date', 'rt_start', 'rt_end'] + index = pd.MultiIndex.from_arrays([obsdates, rtstarts, rtends], + names=names) + return pd.DataFrame(values, index=index, columns=[series_id]) + else: + return pd.Series(values, index=obsdates) + def get_series_latest_release(self, series_id): """ diff --git a/fredapi/tests/test_fred.py b/fredapi/tests/test_fred.py index 1f616d2..b8eb526 100644 --- a/fredapi/tests/test_fred.py +++ b/fredapi/tests/test_fred.py @@ -16,8 +16,6 @@ import textwrap import contextlib -import pandas as pd - import fredapi import fredapi.fred @@ -126,6 +124,39 @@ def __init__(self, rel_url, response=None, side_effect=None): last_updated="2015-06-05 08:47:20-05" popularity="86" notes="..." /> ''')) +gdp_obs_rt_call = HTTPCall('series/observations?{}&{}&{}&{}'. + format('series_id=GDP', + 'observation_start=2014-07-01', + 'observation_end=2015-01-01', + 'realtime_start=2014-07-01'), + response=textwrap.dedent('''\ + + + + + + + + + + + + +''')) + class TestFred(unittest.TestCase): @@ -237,9 +268,9 @@ def test_invalid_kwarg_in_get_series(self, urlopen): fred_api_key) side_effect = fredapi.fred.HTTPError(url, 400, '', '', io.StringIO()) self.prepare_urlopen(urlopen, side_effect=side_effect) - with self.assertRaises(ValueError): - self.fred.get_series('SP500', - observation_start='invalid-datetime-str') + # FIXME: different environment throw ValueError or TypeError. + with self.assertRaises(Exception): + self.fred.get_series('SP500', observation_start='invalid') self.assertFalse(urlopen.called) @mock.patch('fredapi.fred.urlopen') @@ -260,6 +291,30 @@ def test_search(self, urlopen): PCPI01005 0 1969-01-01 NSA''') self.assertEqual(actual.split('\n'), expected.split('\n')) + @mock.patch('fredapi.fred.urlopen') + def test_get_series_with_realtime(self, urlopen): + """Test get_series with realtime argument.""" + side_effects = [gdp_obs_rt_call.response] + self.prepare_urlopen(urlopen, side_effect=side_effects) + df = self.fred.get_series('GDP', observation_start='7/1/2014', + observation_end='1/1/2015', + realtime_start='7/1/2014') + urlopen.assert_called_with(gdp_obs_rt_call.url) + actual = str(df) + expected = textwrap.dedent('''\ + GDP + obs_date rt_start rt_end + 2014-07-01 2014-10-30 2014-11-24 00:00:00 17535.4 + 2014-11-25 2014-12-22 00:00:00 17555.2 + 2014-12-23 9999-12-31 17599.8 + 2014-10-01 2015-01-30 2015-02-26 00:00:00 17710.7 + 2015-02-27 2015-03-26 00:00:00 17701.3 + 2015-03-27 9999-12-31 17703.7 + 2015-01-01 2015-04-29 2015-05-28 00:00:00 17710.0 + 2015-05-29 2015-06-23 00:00:00 17665.0 + 2015-06-24 9999-12-31 17693.3''') + self.assertEqual(actual.split('\n'), expected.split('\n')) + if __name__ == '__main__': unittest.main() From 65c01b5d4e4d63ff00e6e2a0cae4ba10501b2570 Mon Sep 17 00:00:00 2001 From: elmotec Date: Sat, 24 Oct 2015 21:46:16 -0400 Subject: [PATCH 3/6] Changed _parse to return None for 9999-12-31 9999-12-31 cannot be converted to pandas.Timestamp because it's too big. Reason it's prefereable to use pandas.Timestamp than datetime.datetime is that the former can be used as an index whereas the second cannot. --- fredapi/fred.py | 2 ++ fredapi/tests/test_fred.py | 22 +++++++++++----------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/fredapi/fred.py b/fredapi/fred.py index bdae0bb..bb59958 100644 --- a/fredapi/fred.py +++ b/fredapi/fred.py @@ -72,6 +72,8 @@ def _parse(self, date_str, format='%Y-%m-%d'): """ helper function for parsing FRED date string into datetime """ + if date_str == self.latest_realtime_end: + return None rv = pd.to_datetime(date_str, format=format) if hasattr(rv, 'to_datetime'): rv = rv.to_datetime() diff --git a/fredapi/tests/test_fred.py b/fredapi/tests/test_fred.py index b8eb526..2462dba 100644 --- a/fredapi/tests/test_fred.py +++ b/fredapi/tests/test_fred.py @@ -302,17 +302,17 @@ def test_get_series_with_realtime(self, urlopen): urlopen.assert_called_with(gdp_obs_rt_call.url) actual = str(df) expected = textwrap.dedent('''\ - GDP - obs_date rt_start rt_end - 2014-07-01 2014-10-30 2014-11-24 00:00:00 17535.4 - 2014-11-25 2014-12-22 00:00:00 17555.2 - 2014-12-23 9999-12-31 17599.8 - 2014-10-01 2015-01-30 2015-02-26 00:00:00 17710.7 - 2015-02-27 2015-03-26 00:00:00 17701.3 - 2015-03-27 9999-12-31 17703.7 - 2015-01-01 2015-04-29 2015-05-28 00:00:00 17710.0 - 2015-05-29 2015-06-23 00:00:00 17665.0 - 2015-06-24 9999-12-31 17693.3''') + GDP + obs_date rt_start rt_end + 2014-07-01 2014-10-30 2014-11-24 17535.4 + 2014-11-25 2014-12-22 17555.2 + 2014-12-23 NaT 17599.8 + 2014-10-01 2015-01-30 2015-02-26 17710.7 + 2015-02-27 2015-03-26 17701.3 + 2015-03-27 NaT 17703.7 + 2015-01-01 2015-04-29 2015-05-28 17710.0 + 2015-05-29 2015-06-23 17665.0 + 2015-06-24 NaT 17693.3''') self.assertEqual(actual.split('\n'), expected.split('\n')) From adcb93cd13d6a0d9aac957ef6a92fc2b2ee028a4 Mon Sep 17 00:00:00 2001 From: elmotec Date: Sat, 7 Nov 2015 12:25:42 -0500 Subject: [PATCH 4/6] Added get_dataframe Retrieve multiple series in one call with get_dataframe. Reorganized test mock calls. --- fredapi/fred.py | 98 ++++++++- fredapi/tests/test_fred.py | 426 ++++++++++++++++++++++++++++--------- 2 files changed, 427 insertions(+), 97 deletions(-) diff --git a/fredapi/fred.py b/fredapi/fred.py index bb59958..324586c 100644 --- a/fredapi/fred.py +++ b/fredapi/fred.py @@ -20,11 +20,32 @@ class Fred(object): + + """Main interface to Fred.""" + earliest_realtime_start = '1776-07-04' latest_realtime_end = '9999-12-31' nan_char = '.' max_results_per_request = 1000 root_url = 'https://api.stlouisfed.org/fred' + # Maps Fred frequency code to pandas frequency code. + freq_map = {'d': 'B', # business days. + 'w': 'W', # weekly. + 'bw' : '2W', # bi-weekly + 'm': 'M', # monthly. + 'q': '3M', # quarterly (not checked). + 'sa': '6M', # semi-annual. + 'a': '12M', # annual (not checked). + 'wef': 'W-FRI', # Weekly, Ending Friday + 'weth': 'W-THU', # Weekly Ending Thursday + 'wew': 'W-WED', # Weekly Ending Wednesday + 'wetu': 'W-TUE', # Weekly Ending Tuesday + 'wem': 'W-MON', # Weekly Ending Monday + 'wesu': 'W-SUN', # Weekly Ending Sunday + 'wesa': 'W-SAT', # Weekly Ending Saturday + 'bwew': '2W-WED', # Weekly Ending Wednesday + 'bwem': '2W-MON', # Weekly Ending Monday + } def __init__(self, api_key=None, @@ -70,7 +91,17 @@ def __fetch_data(self, url): def _parse(self, date_str, format='%Y-%m-%d'): """ - helper function for parsing FRED date string into datetime + Helper function for parsing FRED date string into datetime.datetime. + + FRED max value of 9999-12-31 is converted to None to stick with + database (SQL NULL) conventions and allow pandas to convert the + time stamp to a pandas.Timestamp if the value is used in an index + (it seems pandas.Timestamp can be part of an index, but not + datetime.datetime). + + Returns: + Time stamp as datetime.datetime or None for 9999-12-31 + """ if date_str == self.latest_realtime_end: return None @@ -81,7 +112,10 @@ def _parse(self, date_str, format='%Y-%m-%d'): def get_series_info(self, series_id): """ - Get information about a series such as its title, frequency, observation start/end dates, units, notes, etc. + Get information about a series. + + Information includes things such as its title, frequency, observation + start/end dates, units, notes, etc. Parameters ---------- @@ -174,6 +208,66 @@ def get_series(self, series_id, observation_start=None, return pd.Series(values, index=obsdates) + def get_dataframe(self, series_ids, observation_start=None, + observation_end=None, **kwargs): + """Get latest release for multiple series in one dataframe. + + Pass a frequency in kwargs to specify the release frequency of interest. + It will save a call to the series info to find out what frequency the + series is released. + + If the series native release frequencies (default used unless one + specify the frequency in kwargs) do not match, the dataframe will show + NaN. + + Parameters + ---------- + series_ids : list of str + Fred series id such as ['CPIAUCSL', 'SP500'] + observation_start : datetime or datetime-like str such as '7/1/2014' + earliest observation date (optional) + observation_end : datetime or datetime-like str such as '7/1/2014' + latest observation date (optional) + frequency : str + Values for frequency are expected to be lowercase codes (e.g. w, m, + q, ...). For more example, See + https://api.stlouisfed.org/docs/fred/series_observations.html#frequency + kwargs : additional parameters + Any additional parameters supported by FRED. For more info, see + https://api.stlouisfed.org/docs/fred/series_observations.html + + Returns + ------- + info : pandas.DataFrame + a DataFrame where each row is the observation date and the value + for the Fred series. + + """ + all_series = [] + columns = [] + freq_override = None + if 'frequency' in kwargs: + freq_override = kwargs['frequency'] + for series_id in series_ids: + if freq_override: + freq = freq_override + else: + info = self.get_series_info(series_id) + freq = info['frequency_short'].lower() + serie = self.get_series(series_id, + observation_start=observation_start, + observation_end=observation_end, **kwargs) + # If the serie is not a stored as a dataframe, turn it into one. + if hasattr(serie, 'to_frame'): + serie = serie.to_frame(series_id) + actual_start = serie.index[0] + if freq not in self.freq_map.keys(): + raise ValueError('unknown frequency {} for {}'. + format(freq, series_id)) + all_series.append(serie) + columns.append(series_id) + return pd.concat(all_series, axis=1) + def get_series_latest_release(self, series_id): """ Get data for a Fred series id. This fetches the latest known data, and is equivalent to get_series() diff --git a/fredapi/tests/test_fred.py b/fredapi/tests/test_fred.py index 2462dba..383272a 100644 --- a/fredapi/tests/test_fred.py +++ b/fredapi/tests/test_fred.py @@ -20,7 +20,6 @@ import fredapi.fred - # Change here if you want to make actual calls to Fred # (https://api.stlouisfed.org/fred...) # Make sure you FRED_API_KEY is set up and internet works. @@ -50,113 +49,265 @@ def __init__(self, rel_url, response=None, side_effect=None): self.side_effect = side_effect +sp500_info_call = HTTPCall('series?series_id=SP500', + response=textwrap.dedent('''\ + + + + ''')) +payems_info_call = HTTPCall('series?series_id=PAYEMS', + response=textwrap.dedent('''\ + + + + ''')) +cp_info_call = HTTPCall('series?series_id=CP', + response=textwrap.dedent('''\ + + + + + ''')) +gdp_info_call = HTTPCall('series?series_id=GDP', + response=textwrap.dedent('''\ + + + + + ''')) + sp500_obs_call = HTTPCall('series/observations?series_id=SP500&{}&{}'. format('observation_start=2014-09-02', 'observation_end=2014-09-05'), response=textwrap.dedent('''\ - - - - - - -''')) + + + + + + + ''')) search_call = HTTPCall('release/series?release_id=175&' + 'order_by=series_id&sort_order=asc', response = textwrap.dedent('''\ - - - - - - - -''')) -payems_info_call = HTTPCall('series?series_id=PAYEMS', - response=textwrap.dedent('''\ - - - -''')) + + + + + + + + ''')) +sp500_obs_q_call = HTTPCall('series/observations?series_id=SP500&{}&{}&{}'. + format('observation_start=2014-07-01', + 'observation_end=2015-01-01', + 'frequency=q'), + response=textwrap.dedent('''\ + + + + + + ''')) +gdp_obs_q_call = HTTPCall('series/observations?series_id=GDP&{}&{}&{}'. + format('observation_start=2014-07-01', + 'observation_end=2015-01-01', + 'frequency=q'), + response=textwrap.dedent('''\ + + + + + + + ''')) +payems_obs_call = HTTPCall('series/observations?{}&{}&{}'. + format('series_id=PAYEMS', + 'observation_start=2014-07-01', + 'observation_end=2015-01-01'), + response=textwrap.dedent('''\ + + + + + + + + + + + ''')) +gdp_obs_call = HTTPCall('series/observations?{}&{}&{}'. + format('series_id=GDP', + 'observation_start=2014-07-01', + 'observation_end=2015-01-01'), + response=textwrap.dedent('''\ + + + + + + + ''')) gdp_obs_rt_call = HTTPCall('series/observations?{}&{}&{}&{}'. format('series_id=GDP', 'observation_start=2014-07-01', 'observation_end=2015-01-01', 'realtime_start=2014-07-01'), response=textwrap.dedent('''\ - - - - - - - - - - - - -''')) - + + + + + + + + + + + + + ''')) +cp_obs_rt_call = HTTPCall('series/observations?{}&{}&{}&{}'. + format('series_id=CP', + 'observation_start=2014-07-01', + 'observation_end=2015-01-01', + 'realtime_start=2014-07-01'), + response=textwrap.dedent('''\ + + + + + + + + + + + + ''')) class TestFred(unittest.TestCase): @@ -315,6 +466,91 @@ def test_get_series_with_realtime(self, urlopen): 2015-06-24 NaT 17693.3''') self.assertEqual(actual.split('\n'), expected.split('\n')) + @mock.patch('fredapi.fred.urlopen') + def test_get_dataframe_forced_freq(self, urlopen): + """Test get_dataframe to multi-series with heterogeous frequency.""" + series = ['SP500', 'GDP'] + side_effects = [sp500_obs_q_call.response, + gdp_obs_q_call.response] + self.prepare_urlopen(urlopen, side_effect=side_effects) + df = self.fred.get_dataframe(series, observation_start='7/1/2014', + observation_end='1/1/2015', + frequency='q') + expected_calls = [(sp500_obs_q_call.url), + (gdp_obs_q_call.url)] + for actual, expected in zip(urlopen.call_args_list, expected_calls): + self.assertEqual(actual[0][0], expected) + expected = textwrap.dedent('''\ + SP500 GDP + 2014-07-01 1975.91 17599.8 + 2014-10-01 2009.34 17703.7 + 2015-01-01 2063.69 17693.3''') + self.assertEqual(str(df), expected) + + @mock.patch('fredapi.fred.urlopen') + def test_get_dataframe(self, urlopen): + """Test get_dataframe to get multiple series with info.""" + series = ['GDP', 'PAYEMS'] + side_effects = [gdp_info_call.response, + gdp_obs_call.response, + payems_info_call.response, + payems_obs_call.response,] + self.prepare_urlopen(urlopen, side_effect=side_effects) + df = self.fred.get_dataframe(series, observation_start='7/1/2014', + observation_end='1/1/2015') + expected_calls = [(gdp_info_call.url), + (gdp_obs_call.url), + (payems_info_call.url), + (payems_obs_call.url)] + for actual, expected in zip(urlopen.call_args_list, expected_calls): + self.assertEqual(actual[0][0], expected) + expected = textwrap.dedent('''\ + GDP PAYEMS + 2014-07-01 17599.8 139156 + 2014-08-01 NaN 139369 + 2014-09-01 NaN 139619 + 2014-10-01 17703.7 139840 + 2014-11-01 NaN 140263 + 2014-12-01 NaN 140592 + 2015-01-01 17693.3 140793''') + + @mock.patch('fredapi.fred.urlopen') + def test_get_dataframe_with_realtime(self, urlopen): + """Test get_dataframe to get multi-series with realtime info.""" + series = ['GDP', 'CP'] + side_effects = [gdp_info_call.response, + gdp_obs_rt_call.response, + cp_info_call.response, + cp_obs_rt_call.response,] + self.prepare_urlopen(urlopen, side_effect=side_effects) + df = self.fred.get_dataframe(series, observation_start='7/1/2014', + observation_end='1/1/2015', + realtime_start='7/1/2014') + expected_calls = [(gdp_info_call.url), + (gdp_obs_rt_call.url), + (cp_info_call.url), + (cp_obs_rt_call.url)] + for actual, expected in zip(urlopen.call_args_list, expected_calls): + self.assertEqual(actual[0][0], expected) + expected = textwrap.dedent('''\ + GDP CP + obs_date rt_start rt_end + 2014-07-01 2014-10-30 2014-11-24 17535.4 NaN + 2014-11-25 2014-12-22 17555.2 1872.7 + 2014-12-23 NaT 17599.8 NaN + 2015-07-29 NaN 1894.6 + 2015-07-30 NaT NaN 1761.1 + 2014-10-01 2015-01-30 2015-02-26 17710.7 NaN + 2015-02-27 2015-03-26 17701.3 NaN + 2015-03-27 NaT 17703.7 NaN + 2015-07-29 NaN 1837.5 + 2015-07-30 NaT NaN 1700.5 + 2015-01-01 2015-04-29 2015-05-28 17710.0 NaN + 2015-05-29 2015-06-23 17665.0 1893.8 + 2015-06-24 NaT 17693.3 NaN + 2015-07-29 NaN 1891.2 + 2015-07-30 NaT NaN 1734.5''') + self.assertEqual(str(df), expected) if __name__ == '__main__': unittest.main() From 8a724a11729118c6293966252e85ad414933b102 Mon Sep 17 00:00:00 2001 From: elmotec Date: Sat, 7 Nov 2015 14:22:55 -0500 Subject: [PATCH 5/6] Added documentation for get_dataframe --- README.md | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 69 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index e24b548..d536e86 100644 --- a/README.md +++ b/README.md @@ -38,7 +38,7 @@ In [ALFRED](http://research.stlouisfed.org/tips/alfred/) there is the concept of - date: the date the value is for - realtime_start: the first date the value is valid -- realitime_end: the last date the value is valid +- realtime_end: the last date the value is valid For instance, there has been three observations (data points) for the GDP of 2014 Q1: @@ -55,11 +55,11 @@ If you pass realtime_start and/or realtime_end to `get_series`, you will get a p For instance, with observation_start and observation_end set to 2015-01-01 and realtime_start set to 2015-01-01, one will get: ``` - GDP + GDP obs_date rt_start rt_end -2015-01-01 2015-04-29 2015-05-28 00:00:00 17710.0 - 2015-05-29 2015-06-23 00:00:00 17665.0 - 2015-06-24 9999-12-31 17693.3 +2015-01-01 2015-04-29 2015-05-28 17710.0 + 2015-05-29 2015-06-23 17665.0 + 2015-06-24 9999-12-31 17693.3 ``` ### Get first data release only (i.e. ignore revisions) @@ -96,6 +96,39 @@ this outputs: dtype: float64 ``` +### Get latest data for multiple series for the latest release +```python +data = fred.get_dataframe(['SP500', 'GDP'], frequency='q') +data.tail() +``` +this outputs: +``` + SP500 GDP +2014-07-31 1975.91 17599.8 +2014-10-31 2009.34 17703.7 +2015-01-31 2063.69 17693.3 +dtype: float64 +``` + +Note that if you do not specify the frequency each series will be output on its +own intrinsic frequency introducing NaN in the dataframe. +```python +data = fred.get_dataframe(['GDP', 'PAYEMS']) +data.tail() +``` +outputs: +``` + GDP PAYEMS +2014-07-31 17599.8 139156 +2014-08-31 NaN 139369 +2014-09-30 NaN 139619 +2014-10-31 17703.7 139840 +2014-11-30 NaN 140263 +2014-12-31 NaN 140592 +2015-01-31 17693.3 140793 +``` + + ### Get latest data known on a given date ```python @@ -228,6 +261,37 @@ this outputs: +### Get multiple series at multiple point in time + +This work the same way as for the latest release, one just adds either +realtime_start, realtime_end, or both. + +```python +data = fred.get_dataframe(['GDP', 'CP'], observation_start='7/1/2014', + observation_end='1/1/2015', realtime_start='7/1/2014') +data.tail() +``` +outputs: +``` + GDP CP +obs_date rt_start rt_end +2014-07-01 2014-10-30 2014-11-24 17535.4 NaN + 2014-11-25 2014-12-22 17555.2 1872.7 + 2014-12-23 NaT 17599.8 NaN + 2015-07-29 NaN 1894.6 + 2015-07-30 NaT NaN 1761.1 +2014-10-01 2015-01-30 2015-02-26 17710.7 NaN + 2015-02-27 2015-03-26 17701.3 NaN + 2015-03-27 NaT 17703.7 NaN + 2015-07-29 NaN 1837.5 + 2015-07-30 NaT NaN 1700.5 +2015-01-01 2015-04-29 2015-05-28 17710.0 NaN + 2015-05-29 2015-06-23 17665.0 1893.8 + 2015-06-24 NaT 17693.3 NaN + 2015-07-29 NaN 1891.2 + 2015-07-30 NaT NaN 1734.5''') +``` + ### Get all vintage dates ```python from __future__ import print_function From 7c790c6c53bffc700b172833ce7e6f489e1a1663 Mon Sep 17 00:00:00 2001 From: elmotec Date: Sun, 15 Nov 2015 19:19:04 -0500 Subject: [PATCH 6/6] Changed transaction of 9999-12-31 to time stamp Changed the translation of 9999-12-31 in Fred API to a customizable variable Fred.latest_time_stamp which defaults to pandas.Timestamp.max Renamed get_dataframe to get_multi_series to reflect what it does rather than what it returns. Made Fred.freq_map an internal variable (Fred.__freq_map). Documentation improvements. --- README.md | 18 +++++++++ fredapi/fred.py | 78 +++++++++++++++++++++----------------- fredapi/tests/test_fred.py | 65 +++++++++++++++---------------- 3 files changed, 95 insertions(+), 66 deletions(-) diff --git a/README.md b/README.md index d536e86..fe2b4f1 100644 --- a/README.md +++ b/README.md @@ -292,6 +292,24 @@ obs_date rt_start rt_end 2015-07-30 NaT NaN 1734.5''') ``` +The advantage of a this approach is that all the information is downloaded +now and one can apply further transformation without making more web queries. + +For instance: +```python +dfo = df.reset_index(levels=[1, 2]) # move rt_start and rt_end to columns. +target = pd.to_datetime('2015-06-01') +dfo[(dfo.rt_start < target) & (target < dfo.rt_end)].groupby(level=0).first() +``` +will output the value of the series as of the `target` date: +```python + rt_start rt_end GDP CP +obs_date +2014-07-01 2014-12-23 2015-07-29 17599.8 1894.6 +2014-10-01 2015-03-27 2015-07-29 17703.7 1837.5 +2015-01-01 2015-05-29 2015-06-23 17665.0 1893.8 +``` + ### Get all vintage dates ```python from __future__ import print_function diff --git a/fredapi/fred.py b/fredapi/fred.py index 324586c..570fd27 100644 --- a/fredapi/fred.py +++ b/fredapi/fred.py @@ -21,31 +21,39 @@ class Fred(object): - """Main interface to Fred.""" + """Main interface to Fred. + + Attributes: + earliest_realtime_start: minimum rt_start for Fred series. + latest_realtime_end: maximum rt_end for Fred series. + latest_time_stamp: maximum value for rt_end series. Defaults to + pandas.Timestamp.max. + """ earliest_realtime_start = '1776-07-04' latest_realtime_end = '9999-12-31' + latest_time_stamp = pd.Timestamp.max nan_char = '.' max_results_per_request = 1000 root_url = 'https://api.stlouisfed.org/fred' # Maps Fred frequency code to pandas frequency code. - freq_map = {'d': 'B', # business days. - 'w': 'W', # weekly. - 'bw' : '2W', # bi-weekly - 'm': 'M', # monthly. - 'q': '3M', # quarterly (not checked). - 'sa': '6M', # semi-annual. - 'a': '12M', # annual (not checked). - 'wef': 'W-FRI', # Weekly, Ending Friday - 'weth': 'W-THU', # Weekly Ending Thursday - 'wew': 'W-WED', # Weekly Ending Wednesday - 'wetu': 'W-TUE', # Weekly Ending Tuesday - 'wem': 'W-MON', # Weekly Ending Monday - 'wesu': 'W-SUN', # Weekly Ending Sunday - 'wesa': 'W-SAT', # Weekly Ending Saturday - 'bwew': '2W-WED', # Weekly Ending Wednesday - 'bwem': '2W-MON', # Weekly Ending Monday - } + __freq_map = {'d': 'B', # business days. + 'w': 'W', # weekly. + 'bw' : '2W', # bi-weekly + 'm': 'M', # monthly. + 'q': '3M', # quarterly (not checked). + 'sa': '6M', # semi-annual. + 'a': '12M', # annual (not checked). + 'wef': 'W-FRI', # Weekly, Ending Friday + 'weth': 'W-THU', # Weekly Ending Thursday + 'wew': 'W-WED', # Weekly Ending Wednesday + 'wetu': 'W-TUE', # Weekly Ending Tuesday + 'wem': 'W-MON', # Weekly Ending Monday + 'wesu': 'W-SUN', # Weekly Ending Sunday + 'wesa': 'W-SAT', # Weekly Ending Saturday + 'bwew': '2W-WED', # Bi-weekly Ending Wednesday + 'bwem': '2W-MON', # Bi-weekly Ending Monday + } def __init__(self, api_key=None, @@ -90,21 +98,16 @@ def __fetch_data(self, url): return root def _parse(self, date_str, format='%Y-%m-%d'): - """ - Helper function for parsing FRED date string into datetime.datetime. + """Helper function to convert FRED date string into datetime.datetime. - FRED max value of 9999-12-31 is converted to None to stick with - database (SQL NULL) conventions and allow pandas to convert the - time stamp to a pandas.Timestamp if the value is used in an index - (it seems pandas.Timestamp can be part of an index, but not - datetime.datetime). + FRED max value 9999-12-31 is converted to Fred.latest_time_stamp. Returns: - Time stamp as datetime.datetime or None for 9999-12-31 + Time stamp as datetime.datetime or Fred.latest_time_stamp. """ if date_str == self.latest_realtime_end: - return None + return self.latest_time_stamp rv = pd.to_datetime(date_str, format=format) if hasattr(rv, 'to_datetime'): rv = rv.to_datetime() @@ -207,10 +210,10 @@ def get_series(self, series_id, observation_start=None, else: return pd.Series(values, index=obsdates) - - def get_dataframe(self, series_ids, observation_start=None, - observation_end=None, **kwargs): - """Get latest release for multiple series in one dataframe. + def get_multi_series(self, series_ids, observation_start=None, + observation_end=None, realtime_start=None, + realtime_end=None, **kwargs): + """Get multiple series in one dataframe. Pass a frequency in kwargs to specify the release frequency of interest. It will save a call to the series info to find out what frequency the @@ -228,6 +231,10 @@ def get_dataframe(self, series_ids, observation_start=None, earliest observation date (optional) observation_end : datetime or datetime-like str such as '7/1/2014' latest observation date (optional) + realtime_start : datetime or datetime-like str such as '7/1/2014' + earliest as-of date (optional) + realtime_end : datetime or datetime-like str such as '7/1/2014' + latest as-of date (optional) frequency : str Values for frequency are expected to be lowercase codes (e.g. w, m, q, ...). For more example, See @@ -255,13 +262,16 @@ def get_dataframe(self, series_ids, observation_start=None, info = self.get_series_info(series_id) freq = info['frequency_short'].lower() serie = self.get_series(series_id, - observation_start=observation_start, - observation_end=observation_end, **kwargs) + observation_start=observation_start, + observation_end=observation_end, + realtime_start=realtime_start, + realtime_end=realtime_end, + **kwargs) # If the serie is not a stored as a dataframe, turn it into one. if hasattr(serie, 'to_frame'): serie = serie.to_frame(series_id) actual_start = serie.index[0] - if freq not in self.freq_map.keys(): + if freq not in self.__freq_map.keys(): raise ValueError('unknown frequency {} for {}'. format(freq, series_id)) all_series.append(serie) diff --git a/fredapi/tests/test_fred.py b/fredapi/tests/test_fred.py index 383272a..773c317 100644 --- a/fredapi/tests/test_fred.py +++ b/fredapi/tests/test_fred.py @@ -453,27 +453,27 @@ def test_get_series_with_realtime(self, urlopen): urlopen.assert_called_with(gdp_obs_rt_call.url) actual = str(df) expected = textwrap.dedent('''\ - GDP - obs_date rt_start rt_end - 2014-07-01 2014-10-30 2014-11-24 17535.4 - 2014-11-25 2014-12-22 17555.2 - 2014-12-23 NaT 17599.8 - 2014-10-01 2015-01-30 2015-02-26 17710.7 - 2015-02-27 2015-03-26 17701.3 - 2015-03-27 NaT 17703.7 - 2015-01-01 2015-04-29 2015-05-28 17710.0 - 2015-05-29 2015-06-23 17665.0 - 2015-06-24 NaT 17693.3''') + GDP + obs_date rt_start rt_end + 2014-07-01 2014-10-30 2014-11-24 00:00:00.000000000 17535.4 + 2014-11-25 2014-12-22 00:00:00.000000000 17555.2 + 2014-12-23 2262-04-11 23:47:16.854775807 17599.8 + 2014-10-01 2015-01-30 2015-02-26 00:00:00.000000000 17710.7 + 2015-02-27 2015-03-26 00:00:00.000000000 17701.3 + 2015-03-27 2262-04-11 23:47:16.854775807 17703.7 + 2015-01-01 2015-04-29 2015-05-28 00:00:00.000000000 17710.0 + 2015-05-29 2015-06-23 00:00:00.000000000 17665.0 + 2015-06-24 2262-04-11 23:47:16.854775807 17693.3''') self.assertEqual(actual.split('\n'), expected.split('\n')) @mock.patch('fredapi.fred.urlopen') - def test_get_dataframe_forced_freq(self, urlopen): - """Test get_dataframe to multi-series with heterogeous frequency.""" + def test_get_multi_series_forced_freq(self, urlopen): + """Test get_multi_series to multi-series with heterogeous frequency.""" series = ['SP500', 'GDP'] side_effects = [sp500_obs_q_call.response, gdp_obs_q_call.response] self.prepare_urlopen(urlopen, side_effect=side_effects) - df = self.fred.get_dataframe(series, observation_start='7/1/2014', + df = self.fred.get_multi_series(series, observation_start='7/1/2014', observation_end='1/1/2015', frequency='q') expected_calls = [(sp500_obs_q_call.url), @@ -488,15 +488,15 @@ def test_get_dataframe_forced_freq(self, urlopen): self.assertEqual(str(df), expected) @mock.patch('fredapi.fred.urlopen') - def test_get_dataframe(self, urlopen): - """Test get_dataframe to get multiple series with info.""" + def test_get_multi_series(self, urlopen): + """Test get_multi_series to get multiple series with info.""" series = ['GDP', 'PAYEMS'] side_effects = [gdp_info_call.response, gdp_obs_call.response, payems_info_call.response, payems_obs_call.response,] self.prepare_urlopen(urlopen, side_effect=side_effects) - df = self.fred.get_dataframe(series, observation_start='7/1/2014', + df = self.fred.get_multi_series(series, observation_start='7/1/2014', observation_end='1/1/2015') expected_calls = [(gdp_info_call.url), (gdp_obs_call.url), @@ -515,17 +515,18 @@ def test_get_dataframe(self, urlopen): 2015-01-01 17693.3 140793''') @mock.patch('fredapi.fred.urlopen') - def test_get_dataframe_with_realtime(self, urlopen): - """Test get_dataframe to get multi-series with realtime info.""" + def test_get_multi_series_with_custom_rt_end(self, urlopen): + """Test get_multi_series with customized Fred.latest_time_stamp.""" series = ['GDP', 'CP'] side_effects = [gdp_info_call.response, gdp_obs_rt_call.response, cp_info_call.response, cp_obs_rt_call.response,] self.prepare_urlopen(urlopen, side_effect=side_effects) - df = self.fred.get_dataframe(series, observation_start='7/1/2014', - observation_end='1/1/2015', - realtime_start='7/1/2014') + self.fred.latest_time_stamp = dt.datetime(2262, 4, 11) + df = self.fred.get_multi_series(series, observation_start='7/1/2014', + observation_end='1/1/2015', + realtime_start='7/1/2014') expected_calls = [(gdp_info_call.url), (gdp_obs_rt_call.url), (cp_info_call.url), @@ -537,20 +538,20 @@ def test_get_dataframe_with_realtime(self, urlopen): obs_date rt_start rt_end 2014-07-01 2014-10-30 2014-11-24 17535.4 NaN 2014-11-25 2014-12-22 17555.2 1872.7 - 2014-12-23 NaT 17599.8 NaN - 2015-07-29 NaN 1894.6 - 2015-07-30 NaT NaN 1761.1 + 2014-12-23 2015-07-29 NaN 1894.6 + 2262-04-11 17599.8 NaN + 2015-07-30 2262-04-11 NaN 1761.1 2014-10-01 2015-01-30 2015-02-26 17710.7 NaN 2015-02-27 2015-03-26 17701.3 NaN - 2015-03-27 NaT 17703.7 NaN - 2015-07-29 NaN 1837.5 - 2015-07-30 NaT NaN 1700.5 + 2015-03-27 2015-07-29 NaN 1837.5 + 2262-04-11 17703.7 NaN + 2015-07-30 2262-04-11 NaN 1700.5 2015-01-01 2015-04-29 2015-05-28 17710.0 NaN 2015-05-29 2015-06-23 17665.0 1893.8 - 2015-06-24 NaT 17693.3 NaN - 2015-07-29 NaN 1891.2 - 2015-07-30 NaT NaN 1734.5''') - self.assertEqual(str(df), expected) + 2015-06-24 2015-07-29 NaN 1891.2 + 2262-04-11 17693.3 NaN + 2015-07-30 2262-04-11 NaN 1734.5''') + self.assertEqual(str(df).split('\n'), expected.split('\n')) if __name__ == '__main__': unittest.main()