Caching queriesΒΆ

Making the same request repeatedly can use a lot of bandwidth, slow down your code and may result in your IP being banned.

pandas-datareader allows you to cache queries using requests_cache by passing a requests_cache.Session to DataReader or Options using the session parameter.

Below is an example with Yahoo! Finance. The session parameter is implemented for all datareaders.

In [1]: import pandas_datareader.data as web

In [2]: import datetime

In [3]: import requests_cache

In [4]: expire_after = datetime.timedelta(days=3)

In [5]: session = requests_cache.CachedSession(cache_name='cache', backend='sqlite', expire_after=expire_after)

In [6]: start = datetime.datetime(2010, 1, 1)

In [7]: end = datetime.datetime(2013, 1, 27)

In [8]: f = web.DataReader("F", 'yahoo', start, end, session=session)

ConnectionErrorTraceback (most recent call last)
<ipython-input-8-5fb28fb6560e> in <module>()
----> 1 f = web.DataReader("F", 'yahoo', start, end, session=session)

/home/docs/checkouts/readthedocs.org/user_builds/pandas-datareader/envs/latest/local/lib/python2.7/site-packages/pandas_datareader-0.4.0-py2.7.egg/pandas_datareader/data.pyc in DataReader(name, data_source, start, end, retry_count, pause, session, access_key)
    115                                 adjust_price=False, chunksize=25,
    116                                 retry_count=retry_count, pause=pause,
--> 117                                 session=session).read()
    118 
    119     elif data_source == "yahoo-actions":

/home/docs/checkouts/readthedocs.org/user_builds/pandas-datareader/envs/latest/local/lib/python2.7/site-packages/pandas_datareader-0.4.0-py2.7.egg/pandas_datareader/yahoo/daily.pyc in read(self)
     75     def read(self):
     76         """ read one data from specified URL """
---> 77         df = super(YahooDailyReader, self).read()
     78         if self.ret_index:
     79             df['Ret_Index'] = _calc_return_index(df['Adj Close'])

/home/docs/checkouts/readthedocs.org/user_builds/pandas-datareader/envs/latest/local/lib/python2.7/site-packages/pandas_datareader-0.4.0-py2.7.egg/pandas_datareader/base.pyc in read(self)
    155         if isinstance(self.symbols, (compat.string_types, int)):
    156             df = self._read_one_data(self.url,
--> 157                                      params=self._get_params(self.symbols))
    158         # Or multiple symbols, (e.g., ['GOOG', 'AAPL', 'MSFT'])
    159         elif isinstance(self.symbols, DataFrame):

/home/docs/checkouts/readthedocs.org/user_builds/pandas-datareader/envs/latest/local/lib/python2.7/site-packages/pandas_datareader-0.4.0-py2.7.egg/pandas_datareader/base.pyc in _read_one_data(self, url, params)
     72         """ read one data from specified URL """
     73         if self._format == 'string':
---> 74             out = self._read_url_as_StringIO(url, params=params)
     75         elif self._format == 'json':
     76             out = self._get_response(url, params=params).json()

/home/docs/checkouts/readthedocs.org/user_builds/pandas-datareader/envs/latest/local/lib/python2.7/site-packages/pandas_datareader-0.4.0-py2.7.egg/pandas_datareader/base.pyc in _read_url_as_StringIO(self, url, params)
     83         Open url (and retry)
     84         """
---> 85         response = self._get_response(url, params=params)
     86         text = self._sanitize_response(response)
     87         out = StringIO()

/home/docs/checkouts/readthedocs.org/user_builds/pandas-datareader/envs/latest/local/lib/python2.7/site-packages/pandas_datareader-0.4.0-py2.7.egg/pandas_datareader/base.pyc in _get_response(self, url, params)
    112         # initial attempt + retry
    113         for i in range(self.retry_count + 1):
--> 114             response = self.session.get(url, params=params)
    115             if response.status_code == requests.codes.ok:
    116                 return response

/home/docs/checkouts/readthedocs.org/user_builds/pandas-datareader/envs/latest/local/lib/python2.7/site-packages/requests/sessions.pyc in get(self, url, **kwargs)
    513 
    514         kwargs.setdefault('allow_redirects', True)
--> 515         return self.request('GET', url, **kwargs)
    516 
    517     def options(self, url, **kwargs):

/home/docs/checkouts/readthedocs.org/user_builds/pandas-datareader/envs/latest/local/lib/python2.7/site-packages/requests_cache/core.pyc in request(self, method, url, params, data, **kwargs)
    124             _normalize_parameters(params),
    125             _normalize_parameters(data),
--> 126             **kwargs
    127         )
    128         if self._is_cache_disabled:

/home/docs/checkouts/readthedocs.org/user_builds/pandas-datareader/envs/latest/local/lib/python2.7/site-packages/requests/sessions.pyc in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
    500         }
    501         send_kwargs.update(settings)
--> 502         resp = self.send(prep, **send_kwargs)
    503 
    504         return resp

/home/docs/checkouts/readthedocs.org/user_builds/pandas-datareader/envs/latest/local/lib/python2.7/site-packages/requests_cache/core.pyc in send(self, request, **kwargs)
     97         response, timestamp = self.cache.get_response_and_time(cache_key)
     98         if response is None:
---> 99             return send_request_and_cache_response()
    100 
    101         if self._cache_expire_after is not None:

/home/docs/checkouts/readthedocs.org/user_builds/pandas-datareader/envs/latest/local/lib/python2.7/site-packages/requests_cache/core.pyc in send_request_and_cache_response()
     89 
     90         def send_request_and_cache_response():
---> 91             response = super(CachedSession, self).send(request, **kwargs)
     92             if response.status_code in self._cache_allowable_codes:
     93                 self.cache.save_response(cache_key, response)

/home/docs/checkouts/readthedocs.org/user_builds/pandas-datareader/envs/latest/local/lib/python2.7/site-packages/requests/sessions.pyc in send(self, request, **kwargs)
    610 
    611         # Send the request
--> 612         r = adapter.send(request, **kwargs)
    613 
    614         # Total elapsed time of the request (approximately)

/home/docs/checkouts/readthedocs.org/user_builds/pandas-datareader/envs/latest/local/lib/python2.7/site-packages/requests/adapters.pyc in send(self, request, stream, timeout, verify, cert, proxies)
    502                 raise ProxyError(e, request=request)
    503 
--> 504             raise ConnectionError(e, request=request)
    505 
    506         except ClosedPoolError as e:

ConnectionError: HTTPConnectionPool(host='ichart.finance.yahoo.com', port=80): Max retries exceeded with url: /table.csv?a=0&b=1&c=2010&d=0&e=27&f=2013&g=d&ignore=.csv&s=F (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7feaae18cf50>: Failed to establish a new connection: [Errno -2] Name or service not known',))

In [9]: f.ix['2010-01-04']

NameErrorTraceback (most recent call last)
<ipython-input-9-a881e6f45410> in <module>()
----> 1 f.ix['2010-01-04']

NameError: name 'f' is not defined

A SQLite file named cache.sqlite will be created in the working directory, storing the request until the expiry date.

For additional information on using requests-cache, see the documentation.