From 942a26fd37ce1e94bc6b935f459ad1af7bcaafc5 Mon Sep 17 00:00:00 2001 From: Value Raider Date: Fri, 1 Mar 2024 22:00:30 +0000 Subject: [PATCH 01/15] Fix potential for price repair to discard price=0 rows --- yfinance/scrapers/history.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yfinance/scrapers/history.py b/yfinance/scrapers/history.py index 0b3cf1f70..5b51aedfe 100644 --- a/yfinance/scrapers/history.py +++ b/yfinance/scrapers/history.py @@ -852,7 +852,7 @@ def _fix_unit_random_mixups(self, df, interval, tz_exchange, prepost): if f_zeroes.any(): df2_zeroes = df2[f_zeroes] df2 = df2[~f_zeroes] - df = df[~f_zeroes] # all row slicing must be applied to both df and df2 + df_orig = df[~f_zeroes] # all row slicing must be applied to both df and df2 else: df2_zeroes = None if df2.shape[0] <= 1: @@ -954,7 +954,7 @@ def _fix_unit_random_mixups(self, df, interval, tz_exchange, prepost): fj = f_either[:, j] if fj.any(): c = data_cols[j] - df2.loc[fj, c] = df.loc[fj, c] + df2.loc[fj, c] = df_orig.loc[fj, c] if df2_zeroes is not None: if "Repaired?" not in df2_zeroes.columns: df2_zeroes["Repaired?"] = False From 812931ff985435cc1465c99b71235484a1d15280 Mon Sep 17 00:00:00 2001 From: ValueRaider Date: Sat, 6 Apr 2024 12:19:46 +0100 Subject: [PATCH 02/15] Replace dead 'appdirs' package with 'platformdirs' --- meta.yaml | 4 ++-- requirements.txt | 2 +- setup.py | 2 +- tests/context.py | 2 +- yfinance/cache.py | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/meta.yaml b/meta.yaml index 843a8f43f..eaff1c58b 100644 --- a/meta.yaml +++ b/meta.yaml @@ -21,7 +21,7 @@ requirements: - requests >=2.31 - multitasking >=0.0.7 - lxml >=4.9.1 - - appdirs >=1.4.4 + - platformdirs >=2.0.0 - pytz >=2022.5 - frozendict >=2.3.4 - beautifulsoup4 >=4.11.1 @@ -37,7 +37,7 @@ requirements: - requests >=2.31 - multitasking >=0.0.7 - lxml >=4.9.1 - - appdirs >=1.4.4 + - platformdirs >=2.0.0 - pytz >=2022.5 - frozendict >=2.3.4 - beautifulsoup4 >=4.11.1 diff --git a/requirements.txt b/requirements.txt index b8768416b..f19ca36b1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,7 +3,7 @@ numpy>=1.16.5 requests>=2.31 multitasking>=0.0.7 lxml>=4.9.1 -appdirs>=1.4.4 +platformdirs>=2.0.0 pytz>=2022.5 frozendict>=2.3.4 beautifulsoup4>=4.11.1 diff --git a/setup.py b/setup.py index 88050d963..9b0c073b4 100644 --- a/setup.py +++ b/setup.py @@ -61,7 +61,7 @@ packages=find_packages(exclude=['contrib', 'docs', 'tests', 'examples']), install_requires=['pandas>=1.3.0', 'numpy>=1.16.5', 'requests>=2.31', 'multitasking>=0.0.7', - 'lxml>=4.9.1', 'appdirs>=1.4.4', 'pytz>=2022.5', + 'lxml>=4.9.1', 'platformdirs>=2.0.0', 'pytz>=2022.5', 'frozendict>=2.3.4', 'peewee>=3.16.2', 'beautifulsoup4>=4.11.1', 'html5lib>=1.1'], extras_require={ diff --git a/tests/context.py b/tests/context.py index a9ec7d07e..598cfa429 100644 --- a/tests/context.py +++ b/tests/context.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -import appdirs as _ad +import platformdirs as _ad import datetime as _dt import sys import os diff --git a/yfinance/cache.py b/yfinance/cache.py index e34254909..0ab73c855 100644 --- a/yfinance/cache.py +++ b/yfinance/cache.py @@ -1,7 +1,7 @@ import peewee as _peewee from threading import Lock import os as _os -import appdirs as _ad +import platformdirs as _ad import atexit as _atexit import datetime as _datetime import pickle as _pkl From 48e9075a2da88fe9906486a2e5303f53568f092c Mon Sep 17 00:00:00 2001 From: ValueRaider Date: Sat, 6 Apr 2024 12:28:41 +0100 Subject: [PATCH 03/15] Deprecate 'pandas_datareader', remove a deprecated argument. Drop official support for 'pandas_datareader', tag pdr_override() as deprecated. Also removed deprecated argument 'download(show_errors)'. --- README.md | 17 ----------------- yfinance/__init__.py | 2 ++ yfinance/multi.py | 13 +------------ 3 files changed, 3 insertions(+), 29 deletions(-) diff --git a/README.md b/README.md index 2ba663d7f..801fe2fa9 100644 --- a/README.md +++ b/README.md @@ -228,23 +228,6 @@ yfinance?](https://stackoverflow.com/questions/63107801) - How to download single or multiple tickers into a single dataframe with single level column names and a ticker column -### `pandas_datareader` override - -If your code uses `pandas_datareader` and you want to download data -faster, you can "hijack" `pandas_datareader.data.get_data_yahoo()` -method to use **yfinance** while making sure the returned data is in the -same format as **pandas\_datareader**'s `get_data_yahoo()`. - -```python -from pandas_datareader import data as pdr - -import yfinance as yf -yf.pdr_override() # <== that's all it takes :-) - -# download dataframe -data = pdr.get_data_yahoo("SPY", start="2017-01-01", end="2017-04-30") -``` - ### Persistent cache store To reduce Yahoo, yfinance store some data locally: timezones to localize dates, and cookie. Cache location is: diff --git a/yfinance/__init__.py b/yfinance/__init__.py index 141be3621..4f07c6342 100644 --- a/yfinance/__init__.py +++ b/yfinance/__init__.py @@ -35,6 +35,8 @@ def pdr_override(): make pandas datareader optional otherwise can be called via fix_yahoo_finance.download(...) """ + from .utils import print_once + print_once("yfinance: pandas_datareader support is deprecated & semi-broken so will be removed in a future verison. Just use yfinance.") try: import pandas_datareader pandas_datareader.data.get_data_yahoo = download diff --git a/yfinance/multi.py b/yfinance/multi.py index 87ae98d56..cee8e923f 100644 --- a/yfinance/multi.py +++ b/yfinance/multi.py @@ -36,7 +36,7 @@ @utils.log_indent_decorator def download(tickers, start=None, end=None, actions=False, threads=True, ignore_tz=None, group_by='column', auto_adjust=False, back_adjust=False, repair=False, keepna=False, - progress=True, period="max", show_errors=None, interval="1d", prepost=False, + progress=True, period="max", interval="1d", prepost=False, proxy=None, rounding=False, timeout=10, session=None): """Download yahoo tickers :Parameters: @@ -80,9 +80,6 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_ Optional. Proxy server URL scheme. Default is None rounding: bool Optional. Round values to 2 decimal places? - show_errors: bool - Optional. Doesn't print errors if False - DEPRECATED, will be removed in future version timeout: None or float If not None stops waiting for a response after given number of seconds. (Can also be a fraction of a second e.g. 0.01) @@ -91,14 +88,6 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_ """ logger = utils.get_yf_logger() - if show_errors is not None: - if show_errors: - utils.print_once(f"yfinance: download(show_errors={show_errors}) argument is deprecated and will be removed in future version. Do this instead: logging.getLogger('yfinance').setLevel(logging.ERROR)") - logger.setLevel(logging.ERROR) - else: - utils.print_once(f"yfinance: download(show_errors={show_errors}) argument is deprecated and will be removed in future version. Do this instead to suppress error messages: logging.getLogger('yfinance').setLevel(logging.CRITICAL)") - logger.setLevel(logging.CRITICAL) - if logger.isEnabledFor(logging.DEBUG): if threads: # With DEBUG, each thread generates a lot of log messages. From f8e8eecf44c41d9383f9fda83f3b6f330404a140 Mon Sep 17 00:00:00 2001 From: vittoboa Date: Mon, 22 Apr 2024 21:08:10 +0200 Subject: [PATCH 04/15] Add functools.wraps to log_indent_decorator's wrapper function --- yfinance/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yfinance/utils.py b/yfinance/utils.py index ee3997c09..e51952075 100644 --- a/yfinance/utils.py +++ b/yfinance/utils.py @@ -26,7 +26,7 @@ import re as _re import sys as _sys import threading -from functools import lru_cache +from functools import lru_cache, wraps from inspect import getmembers from types import FunctionType from typing import List, Optional @@ -95,6 +95,7 @@ def get_indented_logger(name=None): def log_indent_decorator(func): + @wraps(func) def wrapper(*args, **kwargs): logger = get_indented_logger('yfinance') logger.debug(f'Entering {func.__name__}()') From 2dcbe349109da2a0ffd61fde453db4ecbe05280b Mon Sep 17 00:00:00 2001 From: ValueRaider Date: Fri, 26 Apr 2024 21:32:39 +0100 Subject: [PATCH 05/15] Don't price-repair FX volume=0, is normal --- yfinance/scrapers/history.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/yfinance/scrapers/history.py b/yfinance/scrapers/history.py index 5b51aedfe..a94b524a5 100644 --- a/yfinance/scrapers/history.py +++ b/yfinance/scrapers/history.py @@ -1014,9 +1014,13 @@ def _fix_zeroes(self, df, interval, tz_exchange, prepost): df2 = df2[~f_zero_or_nan_ignore] f_prices_bad = (df2[price_cols] == 0.0) | df2[price_cols].isna() - f_high_low_good = (~df2["High"].isna().to_numpy()) & (~df2["Low"].isna().to_numpy()) f_change = df2["High"].to_numpy() != df2["Low"].to_numpy() - f_vol_bad = (df2["Volume"] == 0).to_numpy() & f_high_low_good & f_change + if self.ticker.endswith("=X"): + # FX, volume always 0 + f_vol_bad = None + else: + f_high_low_good = (~df2["High"].isna().to_numpy()) & (~df2["Low"].isna().to_numpy()) + f_vol_bad = (df2["Volume"] == 0).to_numpy() & f_high_low_good & f_change # If stock split occurred, then trading must have happened. # I should probably rename the function, because prices aren't zero ... @@ -1029,7 +1033,9 @@ def _fix_zeroes(self, df, interval, tz_exchange, prepost): # Check whether worth attempting repair f_prices_bad = f_prices_bad.to_numpy() - f_bad_rows = f_prices_bad.any(axis=1) | f_vol_bad + f_bad_rows = f_prices_bad.any(axis=1) + if f_vol_bad is not None: + f_bad_rows = f_bad_rows | f_vol_bad if not f_bad_rows.any(): logger.info("price-repair-missing: No price=0 errors to repair") if "Repaired?" not in df.columns: From dc5c7185565a8d79c06ac1c1339e9707fdf3ef49 Mon Sep 17 00:00:00 2001 From: ValueRaider Date: Sun, 28 Apr 2024 19:40:55 +0100 Subject: [PATCH 06/15] Fix: datetime.datetime.utcnow() is deprecated ... Python 3.12 deprecates datetime.datetime.utcnow(). Instead of switching to datetime.datetime.now(datetime.UTC), which won't work in Python 3.11, just switch to Pandas.utcnow(). --- tests/prices.py | 4 ++-- yfinance/scrapers/history.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/prices.py b/tests/prices.py index 61f633249..792020c38 100644 --- a/tests/prices.py +++ b/tests/prices.py @@ -62,7 +62,7 @@ def test_duplicatingHourly(self): dat = yf.Ticker(tkr, session=self.session) tz = dat._get_ticker_tz(proxy=None, timeout=None) - dt_utc = _tz.timezone("UTC").localize(_dt.datetime.utcnow()) + dt_utc = _pd.Timestamp.utcnow() dt = dt_utc.astimezone(_tz.timezone(tz)) start_d = dt.date() - _dt.timedelta(days=7) df = dat.history(start=start_d, interval="1h") @@ -82,7 +82,7 @@ def test_duplicatingDaily(self): dat = yf.Ticker(tkr, session=self.session) tz = dat._get_ticker_tz(proxy=None, timeout=None) - dt_utc = _tz.timezone("UTC").localize(_dt.datetime.utcnow()) + dt_utc = _pd.Timestamp.utcnow() dt = dt_utc.astimezone(_tz.timezone(tz)) if dt.time() < _dt.time(17, 0): continue diff --git a/yfinance/scrapers/history.py b/yfinance/scrapers/history.py index a94b524a5..149d19c00 100644 --- a/yfinance/scrapers/history.py +++ b/yfinance/scrapers/history.py @@ -210,7 +210,7 @@ def history(self, period="1mo", interval="1d", quotes = utils.parse_quotes(data["chart"]["result"][0]) # Yahoo bug fix - it often appends latest price even if after end date if end and not quotes.empty: - endDt = pd.to_datetime(_datetime.datetime.utcfromtimestamp(end)) + endDt = pd.to_datetime(end, unit='s') if quotes.index[quotes.shape[0] - 1] >= endDt: quotes = quotes.iloc[0:quotes.shape[0] - 1] except Exception: From 685ef71d9fbfb9c01c6407648383c034ead1b567 Mon Sep 17 00:00:00 2001 From: Elijah Lopez Date: Wed, 24 Apr 2024 19:28:56 -0400 Subject: [PATCH 07/15] Add error classes for symbol delisting errors, closes #270 --- yfinance/base.py | 6 ++-- yfinance/exceptions.py | 39 +++++++++++++++++++++++- yfinance/scrapers/history.py | 59 ++++++++++++++++++++---------------- 3 files changed, 75 insertions(+), 29 deletions(-) diff --git a/yfinance/base.py b/yfinance/base.py index d395fb107..73fb068e6 100644 --- a/yfinance/base.py +++ b/yfinance/base.py @@ -32,6 +32,7 @@ from . import utils, cache from .data import YfData +from .exceptions import YFinanceEarningsDateMissing from .scrapers.analysis import Analysis from .scrapers.fundamentals import Fundamentals from .scrapers.holders import Holders @@ -192,7 +193,7 @@ def get_mutualfund_holders(self, proxy=None, as_dict=False): if as_dict: return data.to_dict() return data - + def get_insider_purchases(self, proxy=None, as_dict=False): self._holders.proxy = proxy or self.proxy data = self._holders.insider_purchases @@ -567,7 +568,8 @@ def get_earnings_dates(self, limit=12, proxy=None) -> Optional[pd.DataFrame]: page_size = min(limit - len(dates), page_size) if dates is None or dates.shape[0] == 0: - err_msg = "No earnings dates found, symbol may be delisted" + _exception = YFinanceEarningsDateMissing(self.ticker) + err_msg = str(_exception) logger.error(f'{self.ticker}: {err_msg}') return None dates = dates.reset_index(drop=True) diff --git a/yfinance/exceptions.py b/yfinance/exceptions.py index 749d6ff29..4390fcad6 100644 --- a/yfinance/exceptions.py +++ b/yfinance/exceptions.py @@ -1,12 +1,49 @@ class YFinanceException(Exception): - pass + def __init__(self, description=""): + super().__init__(description) class YFinanceDataException(YFinanceException): pass +class YFinanceChartError(YFinanceException): + def __init__(self, ticker, description): + self.ticker = ticker + super().__init__(f"{self.ticker}: {description}") + + class YFNotImplementedError(NotImplementedError): def __init__(self, method_name): super().__init__(f"Have not implemented fetching '{method_name}' from Yahoo API") + +class YFinanceTickerMissingError(YFinanceException): + def __init__(self, ticker, rationale): + super().__init__(f"${ticker}: possibly delisted; {rationale}") + self.rationale = rationale + self.ticker = rationale + + +class YFinanceTimezoneMissingError(YFinanceTickerMissingError): + def __init__(self, ticker): + super().__init__(ticker, "No timezone found") + + +class YFinancePriceDataMissingError(YFinanceTickerMissingError): + def __init__(self, ticker, debug_info): + self.debug_info = debug_info + super().__init__(ticker, f"No price data found {debug_info}") + + +class YFinanceEarningsDateMissing(YFinanceTickerMissingError): + def __init__(self, ticker): + super().__init__(ticker, "No earnings dates found") + + +class YFinanceInvalidPeriodError(YFinanceException): + def __init__(self, ticker, invalid_period, valid_ranges): + self.ticker = ticker + self.invalid_period = invalid_period + self.valid_ranges = valid_ranges + super().__init__(f"{self.ticker}: Period '{invalid_period}' is invalid, must be one of {valid_ranges}") diff --git a/yfinance/scrapers/history.py b/yfinance/scrapers/history.py index 149d19c00..bd98a2f7c 100644 --- a/yfinance/scrapers/history.py +++ b/yfinance/scrapers/history.py @@ -1,4 +1,3 @@ - import datetime as _datetime import dateutil as _dateutil import logging @@ -8,6 +7,7 @@ from yfinance import shared, utils from yfinance.const import _BASE_URL_, _PRICE_COLNAMES_ +from yfinance.exceptions import YFinanceChartError, YFinanceInvalidPeriodError, YFinancePriceDataMissingError, YFinanceTimezoneMissingError class PriceHistory: def __init__(self, data, ticker, tz, session=None, proxy=None): @@ -23,7 +23,7 @@ def __init__(self, data, ticker, tz, session=None, proxy=None): # Limit recursion depth when repairing prices self._reconstruct_start_interval = None - + @utils.log_indent_decorator def history(self, period="1mo", interval="1d", start=None, end=None, prepost=False, actions=True, @@ -80,14 +80,15 @@ def history(self, period="1mo", interval="1d", # Check can get TZ. Fail => probably delisted tz = self.tz if tz is None: - # Every valid ticker has a timezone. Missing = problem - err_msg = "No timezone found, symbol may be delisted" + # Every valid ticker has a timezone. A missing timezone is a problem problem + _exception = YFinanceTimezoneMissingError(self.ticker) + err_msg = str(_exception) shared._DFS[self.ticker] = utils.empty_df() - shared._ERRORS[self.ticker] = err_msg + shared._ERRORS[self.ticker] = err_msg.split(': ', 1)[1] if raise_errors: - raise Exception(f'{self.ticker}: {err_msg}') + raise _exception else: - logger.error(f'{self.ticker}: {err_msg}') + logger.error(err_msg) return utils.empty_df() if end is None: @@ -159,48 +160,54 @@ def history(self, period="1mo", interval="1d", self._history_metadata = {} intraday = params["interval"][-1] in ("m", 'h') - err_msg = "No price data found, symbol may be delisted" + _price_data_debug = '' + _exception = YFinancePriceDataMissingError(self.ticker, '') if start or period is None or period.lower() == "max": - err_msg += f' ({params["interval"]} ' + _price_data_debug += f' ({params["interval"]} ' if start_user is not None: - err_msg += f'{start_user}' + _price_data_debug += f'{start_user}' elif not intraday: - err_msg += f'{pd.Timestamp(start, unit="s").tz_localize("UTC").tz_convert(tz).date()}' + _price_data_debug += f'{pd.Timestamp(start, unit="s").tz_localize("UTC").tz_convert(tz).date()}' else: - err_msg += f'{pd.Timestamp(start, unit="s").tz_localize("UTC").tz_convert(tz)}' - err_msg += ' -> ' + _price_data_debug += f'{pd.Timestamp(start, unit="s").tz_localize("UTC").tz_convert(tz)}' + _price_data_debug += ' -> ' if end_user is not None: - err_msg += f'{end_user})' + _price_data_debug += f'{end_user})' elif not intraday: - err_msg += f'{pd.Timestamp(end, unit="s").tz_localize("UTC").tz_convert(tz).date()})' + _price_data_debug += f'{pd.Timestamp(end, unit="s").tz_localize("UTC").tz_convert(tz).date()})' else: - err_msg += f'{pd.Timestamp(end, unit="s").tz_localize("UTC").tz_convert(tz)})' + _price_data_debug += f'{pd.Timestamp(end, unit="s").tz_localize("UTC").tz_convert(tz)})' else: - err_msg += f' (period={period})' + _price_data_debug += f' (period={period})' fail = False if data is None or not isinstance(data, dict): fail = True elif isinstance(data, dict) and 'status_code' in data: - err_msg += f"(Yahoo status_code = {data['status_code']})" + _price_data_debug += f"(Yahoo status_code = {data['status_code']})" fail = True elif "chart" in data and data["chart"]["error"]: - err_msg = data["chart"]["error"]["description"] + _exception = YFinanceChartError(self.ticker, data["chart"]["error"]["description"]) fail = True elif "chart" not in data or data["chart"]["result"] is None or not data["chart"]["result"]: fail = True elif period is not None and "timestamp" not in data["chart"]["result"][0] and period not in \ self._history_metadata["validRanges"]: # User provided a bad period. The minimum should be '1d', but sometimes Yahoo accepts '1h'. - err_msg = f"Period '{period}' is invalid, must be one of {self._history_metadata['validRanges']}" + _exception = YFinanceInvalidPeriodError(self.ticker, period, self._history_metadata['validRanges']) fail = True + + if isinstance(_exception, YFinancePriceDataMissingError): + _exception = YFinancePriceDataMissingError(self.ticker, _price_data_debug) + + err_msg = str(_exception) if fail: shared._DFS[self.ticker] = utils.empty_df() - shared._ERRORS[self.ticker] = err_msg + shared._ERRORS[self.ticker] = err_msg.split(': ', 1)[1] if raise_errors: - raise Exception(f'{self.ticker}: {err_msg}') + raise _exception else: - logger.error(f'{self.ticker}: {err_msg}') + logger.error(err_msg) if self._reconstruct_start_interval is not None and self._reconstruct_start_interval == interval: self._reconstruct_start_interval = None return utils.empty_df() @@ -215,11 +222,11 @@ def history(self, period="1mo", interval="1d", quotes = quotes.iloc[0:quotes.shape[0] - 1] except Exception: shared._DFS[self.ticker] = utils.empty_df() - shared._ERRORS[self.ticker] = err_msg + shared._ERRORS[self.ticker] = err_msg.split(': ', 1)[1] if raise_errors: - raise Exception(f'{self.ticker}: {err_msg}') + raise Exception(err_msg) else: - logger.error(f'{self.ticker}: {err_msg}') + logger.error(err_msg) if self._reconstruct_start_interval is not None and self._reconstruct_start_interval == interval: self._reconstruct_start_interval = None return shared._DFS[self.ticker] From ee87a95b8df3210d1e3eb14720b5fc621918ecc2 Mon Sep 17 00:00:00 2001 From: Elijah Lopez Date: Wed, 24 Apr 2024 19:28:56 -0400 Subject: [PATCH 08/15] Rename errors from YFinance to YF --- yfinance/base.py | 4 ++-- yfinance/exceptions.py | 16 ++++++++-------- yfinance/scrapers/fundamentals.py | 6 +++--- yfinance/scrapers/history.py | 14 +++++++------- yfinance/scrapers/holders.py | 8 +++----- yfinance/scrapers/quote.py | 16 ++++++++-------- 6 files changed, 31 insertions(+), 33 deletions(-) diff --git a/yfinance/base.py b/yfinance/base.py index 73fb068e6..93321a0b3 100644 --- a/yfinance/base.py +++ b/yfinance/base.py @@ -32,7 +32,7 @@ from . import utils, cache from .data import YfData -from .exceptions import YFinanceEarningsDateMissing +from .exceptions import YFEarningsDateMissing from .scrapers.analysis import Analysis from .scrapers.fundamentals import Fundamentals from .scrapers.holders import Holders @@ -568,7 +568,7 @@ def get_earnings_dates(self, limit=12, proxy=None) -> Optional[pd.DataFrame]: page_size = min(limit - len(dates), page_size) if dates is None or dates.shape[0] == 0: - _exception = YFinanceEarningsDateMissing(self.ticker) + _exception = YFEarningsDateMissing(self.ticker) err_msg = str(_exception) logger.error(f'{self.ticker}: {err_msg}') return None diff --git a/yfinance/exceptions.py b/yfinance/exceptions.py index 4390fcad6..2846d2199 100644 --- a/yfinance/exceptions.py +++ b/yfinance/exceptions.py @@ -1,13 +1,13 @@ -class YFinanceException(Exception): +class YFException(Exception): def __init__(self, description=""): super().__init__(description) -class YFinanceDataException(YFinanceException): +class YFDataException(YFException): pass -class YFinanceChartError(YFinanceException): +class YFChartError(YFException): def __init__(self, ticker, description): self.ticker = ticker super().__init__(f"{self.ticker}: {description}") @@ -18,30 +18,30 @@ def __init__(self, method_name): super().__init__(f"Have not implemented fetching '{method_name}' from Yahoo API") -class YFinanceTickerMissingError(YFinanceException): +class YFTickerMissingError(YFException): def __init__(self, ticker, rationale): super().__init__(f"${ticker}: possibly delisted; {rationale}") self.rationale = rationale self.ticker = rationale -class YFinanceTimezoneMissingError(YFinanceTickerMissingError): +class YFTzMissingError(YFTickerMissingError): def __init__(self, ticker): super().__init__(ticker, "No timezone found") -class YFinancePriceDataMissingError(YFinanceTickerMissingError): +class YFPricesMissingError(YFTickerMissingError): def __init__(self, ticker, debug_info): self.debug_info = debug_info super().__init__(ticker, f"No price data found {debug_info}") -class YFinanceEarningsDateMissing(YFinanceTickerMissingError): +class YFEarningsDateMissing(YFTickerMissingError): def __init__(self, ticker): super().__init__(ticker, "No earnings dates found") -class YFinanceInvalidPeriodError(YFinanceException): +class YFInvalidPeriodError(YFException): def __init__(self, ticker, invalid_period, valid_ranges): self.ticker = ticker self.invalid_period = invalid_period diff --git a/yfinance/scrapers/fundamentals.py b/yfinance/scrapers/fundamentals.py index d4fba2f75..6bbce27f4 100644 --- a/yfinance/scrapers/fundamentals.py +++ b/yfinance/scrapers/fundamentals.py @@ -5,7 +5,7 @@ from yfinance import utils, const from yfinance.data import YfData -from yfinance.exceptions import YFinanceException, YFNotImplementedError +from yfinance.exceptions import YFException, YFNotImplementedError class Fundamentals: @@ -70,7 +70,7 @@ def get_cash_flow_time_series(self, freq="yearly", proxy=None) -> pd.DataFrame: @utils.log_indent_decorator def _fetch_time_series(self, name, timescale, proxy=None): # Fetching time series preferred over scraping 'QuoteSummaryStore', - # because it matches what Yahoo shows. But for some tickers returns nothing, + # because it matches what Yahoo shows. But for some tickers returns nothing, # despite 'QuoteSummaryStore' containing valid data. allowed_names = ["income", "balance-sheet", "cash-flow"] @@ -86,7 +86,7 @@ def _fetch_time_series(self, name, timescale, proxy=None): if statement is not None: return statement - except YFinanceException as e: + except YFException as e: utils.get_yf_logger().error(f"{self._symbol}: Failed to create {name} financials table for reason: {e}") return pd.DataFrame() diff --git a/yfinance/scrapers/history.py b/yfinance/scrapers/history.py index bd98a2f7c..d8da2b325 100644 --- a/yfinance/scrapers/history.py +++ b/yfinance/scrapers/history.py @@ -7,7 +7,7 @@ from yfinance import shared, utils from yfinance.const import _BASE_URL_, _PRICE_COLNAMES_ -from yfinance.exceptions import YFinanceChartError, YFinanceInvalidPeriodError, YFinancePriceDataMissingError, YFinanceTimezoneMissingError +from yfinance.exceptions import YFChartError, YFInvalidPeriodError, YFPricesMissingError, YFTzMissingError class PriceHistory: def __init__(self, data, ticker, tz, session=None, proxy=None): @@ -81,7 +81,7 @@ def history(self, period="1mo", interval="1d", tz = self.tz if tz is None: # Every valid ticker has a timezone. A missing timezone is a problem problem - _exception = YFinanceTimezoneMissingError(self.ticker) + _exception = YFTzMissingError(self.ticker) err_msg = str(_exception) shared._DFS[self.ticker] = utils.empty_df() shared._ERRORS[self.ticker] = err_msg.split(': ', 1)[1] @@ -161,7 +161,7 @@ def history(self, period="1mo", interval="1d", intraday = params["interval"][-1] in ("m", 'h') _price_data_debug = '' - _exception = YFinancePriceDataMissingError(self.ticker, '') + _exception = YFPricesMissingError(self.ticker, '') if start or period is None or period.lower() == "max": _price_data_debug += f' ({params["interval"]} ' if start_user is not None: @@ -187,18 +187,18 @@ def history(self, period="1mo", interval="1d", _price_data_debug += f"(Yahoo status_code = {data['status_code']})" fail = True elif "chart" in data and data["chart"]["error"]: - _exception = YFinanceChartError(self.ticker, data["chart"]["error"]["description"]) + _exception = YFChartError(self.ticker, data["chart"]["error"]["description"]) fail = True elif "chart" not in data or data["chart"]["result"] is None or not data["chart"]["result"]: fail = True elif period is not None and "timestamp" not in data["chart"]["result"][0] and period not in \ self._history_metadata["validRanges"]: # User provided a bad period. The minimum should be '1d', but sometimes Yahoo accepts '1h'. - _exception = YFinanceInvalidPeriodError(self.ticker, period, self._history_metadata['validRanges']) + _exception = YFInvalidPeriodError(self.ticker, period, self._history_metadata['validRanges']) fail = True - if isinstance(_exception, YFinancePriceDataMissingError): - _exception = YFinancePriceDataMissingError(self.ticker, _price_data_debug) + if isinstance(_exception, YFPricesMissingError): + _exception = YFPricesMissingError(self.ticker, _price_data_debug) err_msg = str(_exception) if fail: diff --git a/yfinance/scrapers/holders.py b/yfinance/scrapers/holders.py index 4a3e7d0b0..d72553796 100644 --- a/yfinance/scrapers/holders.py +++ b/yfinance/scrapers/holders.py @@ -6,7 +6,7 @@ from yfinance import utils from yfinance.data import YfData from yfinance.const import _BASE_URL_ -from yfinance.exceptions import YFinanceDataException +from yfinance.exceptions import YFDataException _QUOTE_SUMMARY_URL_ = f"{_BASE_URL_}/v10/finance/quoteSummary/" @@ -104,7 +104,7 @@ def _fetch_and_parse(self): self._parse_insider_holders(data["insiderHolders"]) self._parse_net_share_purchase_activity(data["netSharePurchaseActivity"]) except (KeyError, IndexError): - raise YFinanceDataException("Failed to parse holders json data.") + raise YFDataException("Failed to parse holders json data.") @staticmethod def _parse_raw_values(data): @@ -189,7 +189,7 @@ def _parse_insider_holders(self, data): if not df.empty: df["positionDirectDate"] = pd.to_datetime(df["positionDirectDate"], unit="s") df["latestTransDate"] = pd.to_datetime(df["latestTransDate"], unit="s") - + df.rename(columns={ "name": "Name", "relation": "Position", @@ -242,5 +242,3 @@ def _parse_net_share_purchase_activity(self, data): } ).convert_dtypes() self._insider_purchases = df - - diff --git a/yfinance/scrapers/quote.py b/yfinance/scrapers/quote.py index 7df7e102d..e2e7ac909 100644 --- a/yfinance/scrapers/quote.py +++ b/yfinance/scrapers/quote.py @@ -10,7 +10,7 @@ from yfinance import utils from yfinance.data import YfData from yfinance.const import quote_summary_valid_modules, _BASE_URL_ -from yfinance.exceptions import YFNotImplementedError, YFinanceDataException, YFinanceException +from yfinance.exceptions import YFNotImplementedError, YFDataException, YFException info_retired_keys_price = {"currentPrice", "dayHigh", "dayLow", "open", "previousClose", "volume", "volume24Hr"} info_retired_keys_price.update({"regularMarket"+s for s in ["DayHigh", "DayLow", "Open", "PreviousClose", "Price", "Volume"]}) @@ -578,7 +578,7 @@ def recommendations(self) -> pd.DataFrame: try: data = result["quoteSummary"]["result"][0]["recommendationTrend"]["trend"] except (KeyError, IndexError): - raise YFinanceDataException(f"Failed to parse json response from Yahoo Finance: {result}") + raise YFDataException(f"Failed to parse json response from Yahoo Finance: {result}") self._recommendations = pd.DataFrame(data) return self._recommendations @@ -592,14 +592,14 @@ def upgrades_downgrades(self) -> pd.DataFrame: try: data = result["quoteSummary"]["result"][0]["upgradeDowngradeHistory"]["history"] if len(data) == 0: - raise YFinanceDataException(f"No upgrade/downgrade history found for {self._symbol}") + raise YFDataException(f"No upgrade/downgrade history found for {self._symbol}") df = pd.DataFrame(data) df.rename(columns={"epochGradeDate": "GradeDate", 'firm': 'Firm', 'toGrade': 'ToGrade', 'fromGrade': 'FromGrade', 'action': 'Action'}, inplace=True) df.set_index('GradeDate', inplace=True) df.index = pd.to_datetime(df.index, unit='s') self._upgrades_downgrades = df except (KeyError, IndexError): - raise YFinanceDataException(f"Failed to parse json response from Yahoo Finance: {result}") + raise YFDataException(f"Failed to parse json response from Yahoo Finance: {result}") return self._upgrades_downgrades @property @@ -614,11 +614,11 @@ def valid_modules(): def _fetch(self, proxy, modules: list): if not isinstance(modules, list): - raise YFinanceException("Should provide a list of modules, see available modules using `valid_modules`") + raise YFException("Should provide a list of modules, see available modules using `valid_modules`") modules = ','.join([m for m in modules if m in quote_summary_valid_modules]) if len(modules) == 0: - raise YFinanceException("No valid modules provided, see available modules using `valid_modules`") + raise YFException("No valid modules provided, see available modules using `valid_modules`") params_dict = {"modules": modules, "corsDomain": "finance.yahoo.com", "formatted": "false", "symbol": self._symbol} try: result = self._data.get_raw_json(_QUOTE_SUMMARY_URL_ + f"/{self._symbol}", user_agent_headers=self._data.user_agent_headers, params=params_dict, proxy=proxy) @@ -721,7 +721,7 @@ def _fetch_complementary(self, proxy): json_data = json.loads(json_str) json_result = json_data.get("timeseries") or json_data.get("finance") if json_result["error"] is not None: - raise YFinanceException("Failed to parse json response from Yahoo Finance: " + str(json_result["error"])) + raise YFException("Failed to parse json response from Yahoo Finance: " + str(json_result["error"])) for k in keys: keydict = json_result["result"][0] if k in keydict: @@ -754,4 +754,4 @@ def _fetch_calendar(self): self._calendar['Revenue Low'] = earnings.get('revenueLow', None) self._calendar['Revenue Average'] = earnings.get('revenueAverage', None) except (KeyError, IndexError): - raise YFinanceDataException(f"Failed to parse json response from Yahoo Finance: {result}") + raise YFDataException(f"Failed to parse json response from Yahoo Finance: {result}") From 30fdc96157139095a6395f978e73fe47be07ea59 Mon Sep 17 00:00:00 2001 From: Elijah Lopez Date: Wed, 24 Apr 2024 19:28:56 -0400 Subject: [PATCH 09/15] Fix: PricesMissingError not being raised --- yfinance/scrapers/history.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yfinance/scrapers/history.py b/yfinance/scrapers/history.py index d8da2b325..34e48e30a 100644 --- a/yfinance/scrapers/history.py +++ b/yfinance/scrapers/history.py @@ -224,7 +224,7 @@ def history(self, period="1mo", interval="1d", shared._DFS[self.ticker] = utils.empty_df() shared._ERRORS[self.ticker] = err_msg.split(': ', 1)[1] if raise_errors: - raise Exception(err_msg) + raise _exception else: logger.error(err_msg) if self._reconstruct_start_interval is not None and self._reconstruct_start_interval == interval: From 5a683b916d481a9c3cc971b73bf55ede990f0cdc Mon Sep 17 00:00:00 2001 From: Elijah Lopez Date: Wed, 24 Apr 2024 19:28:56 -0400 Subject: [PATCH 10/15] Add raise missing ticker tests, replace deprecated datetime methods - renamed test files conform with standards - replaced utcfromtimestamp --- README.md | 30 +++++++++++++++++-- requirements.txt | 2 ++ tests/__init__.py | 1 - tests/{prices.py => test_prices.py} | 4 +-- tests/{ticker.py => test_ticker.py} | 45 ++++++++++++++++++++++------- tests/{utils.py => test_utils.py} | 0 yfinance/exceptions.py | 1 + yfinance/scrapers/history.py | 5 ++-- yfinance/ticker.py | 4 +-- 9 files changed, 71 insertions(+), 21 deletions(-) delete mode 100644 tests/__init__.py rename tests/{prices.py => test_prices.py} (99%) rename tests/{ticker.py => test_ticker.py} (96%) rename tests/{utils.py => test_utils.py} (100%) diff --git a/README.md b/README.md index 801fe2fa9..0995854af 100644 --- a/README.md +++ b/README.md @@ -116,7 +116,7 @@ msft.recommendations msft.recommendations_summary msft.upgrades_downgrades -# Show future and historic earnings dates, returns at most next 4 quarters and last 8 quarters by default. +# Show future and historic earnings dates, returns at most next 4 quarters and last 8 quarters by default. # Note: If more are needed use msft.get_earnings_dates(limit=XX) with increased limit argument. msft.earnings_dates @@ -183,7 +183,7 @@ data = yf.download("SPY AAPL", period="1mo") ### Smarter scraping -Install the `nospam` packages for smarter scraping using `pip` (see [Installation](#installation)). These packages help cache calls such that Yahoo is not spammed with requests. +Install the `nospam` packages for smarter scraping using `pip` (see [Installation](#installation)). These packages help cache calls such that Yahoo is not spammed with requests. To use a custom `requests` session, pass a `session=` argument to the Ticker constructor. This allows for caching calls to the API as well as a custom way to modify requests via the `User-agent` header. @@ -231,11 +231,13 @@ yfinance?](https://stackoverflow.com/questions/63107801) ### Persistent cache store To reduce Yahoo, yfinance store some data locally: timezones to localize dates, and cookie. Cache location is: + - Windows = C:/Users/\/AppData/Local/py-yfinance - Linux = /home/\/.cache/py-yfinance - MacOS = /Users/\/Library/Caches/py-yfinance You can direct cache to use a different location with `set_tz_cache_location()`: + ```python import yfinance as yf yf.set_tz_cache_location("custom/cache/location") @@ -262,10 +264,32 @@ intended for research and educational purposes. You should refer to Yahoo!'s ter ([here](https://policies.yahoo.com/us/en/yahoo/terms/product-atos/apiforydn/index.htm), [here](https://legal.yahoo.com/us/en/yahoo/terms/otos/index.html), and [here](https://policies.yahoo.com/us/en/yahoo/terms/index.htm)) for -detailes on your rights to use the actual data downloaded. +details on your rights to use the actual data downloaded. --- +### Running Tests + +Tests have been written using the built-in Python module `unittest` + +An example of running all tests in a file that calls `unittest.main`: + +```sh +python -m unittest tests.test_prices +``` + +An example of running a test of a single method (applicable to a class as well): + +```sh +python -m unittest tests.test_prices.TestPriceRepair.test_ticker_missing +``` + +To run all tests + +```sh +python -m unittest discover -s tests +``` + ### P.S. Please drop me an note with any feedback you have. diff --git a/requirements.txt b/requirements.txt index f19ca36b1..32f1a0cc2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,3 +9,5 @@ frozendict>=2.3.4 beautifulsoup4>=4.11.1 html5lib>=1.1 peewee>=3.16.2 +requests_cache>=1.0 +requests-ratelimiter>=0.3.1 diff --git a/tests/__init__.py b/tests/__init__.py deleted file mode 100644 index 4265cc3e6..000000000 --- a/tests/__init__.py +++ /dev/null @@ -1 +0,0 @@ -#!/usr/bin/env python diff --git a/tests/prices.py b/tests/test_prices.py similarity index 99% rename from tests/prices.py rename to tests/test_prices.py index 792020c38..46792113b 100644 --- a/tests/prices.py +++ b/tests/test_prices.py @@ -43,14 +43,14 @@ def test_download(self): df_tkrs = df.columns.levels[1] self.assertEqual(sorted(tkrs), sorted(df_tkrs)) - + def test_download_with_invalid_ticker(self): #Checks if using an invalid symbol gives the same output as not using an invalid symbol in combination with a valid symbol (AAPL) #Checks to make sure that invalid symbol handling for the date column is the same as the base case (no invalid symbols) invalid_tkrs = ["AAPL", "ATVI"] #AAPL exists and ATVI does not exist valid_tkrs = ["AAPL", "INTC"] #AAPL and INTC both exist - + data_invalid_sym = yf.download(invalid_tkrs, start='2023-11-16', end='2023-11-17') data_valid_sym = yf.download(valid_tkrs, start='2023-11-16', end='2023-11-17') diff --git a/tests/ticker.py b/tests/test_ticker.py similarity index 96% rename from tests/ticker.py rename to tests/test_ticker.py index c164be283..0b1343a7c 100644 --- a/tests/ticker.py +++ b/tests/test_ticker.py @@ -12,7 +12,7 @@ from .context import yfinance as yf from .context import session_gbl -from yfinance.exceptions import YFNotImplementedError +from yfinance.exceptions import YFChartError, YFInvalidPeriodError, YFNotImplementedError, YFPricesMissingError, YFTickerMissingError, YFTzMissingError import unittest @@ -129,6 +129,30 @@ def test_badTicker(self): assert isinstance(dat.actions, pd.DataFrame) assert dat.actions.empty + def test_invalid_period(self): + tkr = 'VALE' + dat = yf.Ticker(tkr, session=self.session) + with self.assertRaises(YFInvalidPeriodError): + dat.history(period="2wks", interval="1d", raise_errors=True) + with self.assertRaises(YFInvalidPeriodError): + dat.history(period="2mo", interval="1d", raise_errors=True) + + + def test_prices_missing(self): + # this test will need to be updated every time someone wants to run a test + # hard to find a ticker that matches this error other than options + # META call option, 2024 April 26th @ strike of 180000 + tkr = 'META240426C00180000' + dat = yf.Ticker(tkr, session=self.session) + with self.assertRaises(YFPricesMissingError): + dat.history(period="5d", interval="1m", raise_errors=True) + + def test_ticker_missing(self): + tkr = 'ATVI' + dat = yf.Ticker(tkr, session=self.session) + # A missing ticker can trigger either a niche error or the generalized error + with self.assertRaises((YFTickerMissingError, YFTzMissingError, YFChartError)): + dat.history(period="3mo", interval="1d", raise_errors=True) def test_goodTicker(self): # that yfinance works when full api is called on same instance of ticker @@ -150,8 +174,8 @@ def test_goodTicker(self): dat.fast_info[k] for attribute_name, attribute_type in ticker_attributes: - assert_attribute_type(self, dat, attribute_name, attribute_type) - + assert_attribute_type(self, dat, attribute_name, attribute_type) + def test_goodTicker_withProxy(self): tkr = "IBM" dat = yf.Ticker(tkr, session=self.session, proxy=self.proxy) @@ -163,7 +187,7 @@ def test_goodTicker_withProxy(self): for attribute_name, attribute_type in ticker_attributes: assert_attribute_type(self, dat, attribute_name, attribute_type) - + class TestTickerHistory(unittest.TestCase): session = None @@ -370,7 +394,7 @@ def test_insider_transactions(self): data_cached = self.ticker.insider_transactions self.assertIs(data, data_cached, "data not cached") - + def test_insider_purchases(self): data = self.ticker.insider_purchases self.assertIsInstance(data, pd.DataFrame, "data has wrong type") @@ -402,9 +426,9 @@ def tearDownClass(cls): def setUp(self): self.ticker = yf.Ticker("GOOGL", session=self.session) - - # For ticker 'BSE.AX' (and others), Yahoo not returning - # full quarterly financials (usually cash-flow) with all entries, + + # For ticker 'BSE.AX' (and others), Yahoo not returning + # full quarterly financials (usually cash-flow) with all entries, # instead returns a smaller version in different data store. self.ticker_old_fmt = yf.Ticker("BSE.AX", session=self.session) @@ -713,7 +737,7 @@ def tearDownClass(cls): def setUp(self): self.ticker = yf.Ticker("GOOGL", session=self.session) - + def tearDown(self): self.ticker = None @@ -813,7 +837,6 @@ def test_complementary_info(self): # This one should have a trailing PEG ratio data2 = self.tickers[2].info self.assertIsInstance(data2['trailingPegRatio'], float) - pass # def test_fast_info_matches_info(self): # fast_info_keys = set() @@ -851,7 +874,7 @@ def test_complementary_info(self): # key_rename_map[yf.utils.snake_case_2_camelCase(k)] = key_rename_map[k] # # Note: share count items in info[] are bad. Sometimes the float > outstanding! - # # So often fast_info["shares"] does not match. + # # So often fast_info["shares"] does not match. # # Why isn't fast_info["shares"] wrong? Because using it to calculate market cap always correct. # bad_keys = {"shares"} diff --git a/tests/utils.py b/tests/test_utils.py similarity index 100% rename from tests/utils.py rename to tests/test_utils.py diff --git a/yfinance/exceptions.py b/yfinance/exceptions.py index 2846d2199..92fa7efcf 100644 --- a/yfinance/exceptions.py +++ b/yfinance/exceptions.py @@ -37,6 +37,7 @@ def __init__(self, ticker, debug_info): class YFEarningsDateMissing(YFTickerMissingError): + # note that this does not get raised. Added in case of raising it in the future def __init__(self, ticker): super().__init__(ticker, "No earnings dates found") diff --git a/yfinance/scrapers/history.py b/yfinance/scrapers/history.py index 34e48e30a..e43ca13fa 100644 --- a/yfinance/scrapers/history.py +++ b/yfinance/scrapers/history.py @@ -191,8 +191,8 @@ def history(self, period="1mo", interval="1d", fail = True elif "chart" not in data or data["chart"]["result"] is None or not data["chart"]["result"]: fail = True - elif period is not None and "timestamp" not in data["chart"]["result"][0] and period not in \ - self._history_metadata["validRanges"]: + elif period is not None and period not in self._history_metadata["validRanges"]: + # even if timestamp is in the data, the data doesn't encompass the period requested # User provided a bad period. The minimum should be '1d', but sometimes Yahoo accepts '1h'. _exception = YFInvalidPeriodError(self.ticker, period, self._history_metadata['validRanges']) fail = True @@ -222,6 +222,7 @@ def history(self, period="1mo", interval="1d", quotes = quotes.iloc[0:quotes.shape[0] - 1] except Exception: shared._DFS[self.ticker] = utils.empty_df() + print(err_msg) shared._ERRORS[self.ticker] = err_msg.split(': ', 1)[1] if raise_errors: raise _exception diff --git a/yfinance/ticker.py b/yfinance/ticker.py index 075f457d5..c8ef5a49c 100644 --- a/yfinance/ticker.py +++ b/yfinance/ticker.py @@ -48,8 +48,8 @@ def _download_options(self, date=None): r = self._data.get(url=url, proxy=self.proxy).json() if len(r.get('optionChain', {}).get('result', [])) > 0: for exp in r['optionChain']['result'][0]['expirationDates']: - self._expirations[_datetime.datetime.utcfromtimestamp( - exp).strftime('%Y-%m-%d')] = exp + self._expirations[_datetime.datetime.fromtimestamp(exp).strftime('%Y-%m-%d')] = exp + assert _datetime.datetime.utcfromtimestamp(exp).strftime('%Y-%m-%d') in self._expirations self._underlying = r['optionChain']['result'][0].get('quote', {}) From ac4efa3e3d84ac1ed5901349eea8331941f4106c Mon Sep 17 00:00:00 2001 From: ValueRaider Date: Sat, 11 May 2024 09:33:17 +0100 Subject: [PATCH 11/15] Fix switching session from/to requests_cache Session switch logic was not recalculating 'self._session_is_caching'. Also removed message 'help stress-test cookie & crumb & requests_cache', clearly works now. --- yfinance/data.py | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/yfinance/data.py b/yfinance/data.py index 2365f66c7..9a2b2b180 100644 --- a/yfinance/data.py +++ b/yfinance/data.py @@ -60,24 +60,8 @@ class YfData(metaclass=SingletonMeta): 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'} def __init__(self, session=None): - self._session = session or requests.Session() - - try: - self._session.cache - except AttributeError: - # Not caching - self._session_is_caching = False - else: - # Is caching. This is annoying. - # Can't simply use a non-caching session to fetch cookie & crumb, - # because then the caching-session won't have cookie. - self._session_is_caching = True - from requests_cache import DO_NOT_CACHE - self._expire_after = DO_NOT_CACHE self._crumb = None self._cookie = None - if self._session_is_caching and self._cookie is None: - utils.print_once("WARNING: cookie & crumb does not work well with requests_cache. Am experimenting with 'expire_after=DO_NOT_CACHE', but you need to help stress-test.") # Default to using 'basic' strategy self._cookie_strategy = 'basic' @@ -86,12 +70,27 @@ def __init__(self, session=None): self._cookie_lock = threading.Lock() + self._set_session(session or requests.Session()) + def _set_session(self, session): if session is None: return with self._cookie_lock: self._session = session + try: + self._session.cache + except AttributeError: + # Not caching + self._session_is_caching = False + else: + # Is caching. This is annoying. + # Can't simply use a non-caching session to fetch cookie & crumb, + # because then the caching-session won't have cookie. + self._session_is_caching = True + from requests_cache import DO_NOT_CACHE + self._expire_after = DO_NOT_CACHE + def _set_cookie_strategy(self, strategy, have_lock=False): if strategy == self._cookie_strategy: return From 7628bec2a6f44b8d41c6d5f956f1eac8e1cd5ea8 Mon Sep 17 00:00:00 2001 From: Marcao Date: Sat, 11 May 2024 19:20:19 +0200 Subject: [PATCH 12/15] Adjust and fix according to feedback --- README.md | 22 ---------------------- requirements.txt | 2 -- yfinance/exceptions.py | 2 +- yfinance/scrapers/history.py | 2 +- yfinance/ticker.py | 4 ++-- 5 files changed, 4 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index 0995854af..f832d0dfc 100644 --- a/README.md +++ b/README.md @@ -268,28 +268,6 @@ details on your rights to use the actual data downloaded. --- -### Running Tests - -Tests have been written using the built-in Python module `unittest` - -An example of running all tests in a file that calls `unittest.main`: - -```sh -python -m unittest tests.test_prices -``` - -An example of running a test of a single method (applicable to a class as well): - -```sh -python -m unittest tests.test_prices.TestPriceRepair.test_ticker_missing -``` - -To run all tests - -```sh -python -m unittest discover -s tests -``` - ### P.S. Please drop me an note with any feedback you have. diff --git a/requirements.txt b/requirements.txt index 32f1a0cc2..f19ca36b1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,5 +9,3 @@ frozendict>=2.3.4 beautifulsoup4>=4.11.1 html5lib>=1.1 peewee>=3.16.2 -requests_cache>=1.0 -requests-ratelimiter>=0.3.1 diff --git a/yfinance/exceptions.py b/yfinance/exceptions.py index 92fa7efcf..a44dc2d0e 100644 --- a/yfinance/exceptions.py +++ b/yfinance/exceptions.py @@ -22,7 +22,7 @@ class YFTickerMissingError(YFException): def __init__(self, ticker, rationale): super().__init__(f"${ticker}: possibly delisted; {rationale}") self.rationale = rationale - self.ticker = rationale + self.ticker = ticker class YFTzMissingError(YFTickerMissingError): diff --git a/yfinance/scrapers/history.py b/yfinance/scrapers/history.py index e43ca13fa..a4bd380aa 100644 --- a/yfinance/scrapers/history.py +++ b/yfinance/scrapers/history.py @@ -80,7 +80,7 @@ def history(self, period="1mo", interval="1d", # Check can get TZ. Fail => probably delisted tz = self.tz if tz is None: - # Every valid ticker has a timezone. A missing timezone is a problem problem + # Every valid ticker has a timezone. A missing timezone is a problem. _exception = YFTzMissingError(self.ticker) err_msg = str(_exception) shared._DFS[self.ticker] = utils.empty_df() diff --git a/yfinance/ticker.py b/yfinance/ticker.py index c8ef5a49c..075f457d5 100644 --- a/yfinance/ticker.py +++ b/yfinance/ticker.py @@ -48,8 +48,8 @@ def _download_options(self, date=None): r = self._data.get(url=url, proxy=self.proxy).json() if len(r.get('optionChain', {}).get('result', [])) > 0: for exp in r['optionChain']['result'][0]['expirationDates']: - self._expirations[_datetime.datetime.fromtimestamp(exp).strftime('%Y-%m-%d')] = exp - assert _datetime.datetime.utcfromtimestamp(exp).strftime('%Y-%m-%d') in self._expirations + self._expirations[_datetime.datetime.utcfromtimestamp( + exp).strftime('%Y-%m-%d')] = exp self._underlying = r['optionChain']['result'][0].get('quote', {}) From 97f35b721c43d22af1ca7fd238ba97de32555dbf Mon Sep 17 00:00:00 2001 From: ValueRaider Date: Mon, 13 May 2024 20:45:57 +0100 Subject: [PATCH 13/15] Price repair: improve 'sudden change' repair for splits & currency Original logic for repairing missing split adjustment only checked latest split. Improved logic checks ALL splits in data, because any can be missing. Then related changes to 'sudden change detection': - use prices median not mean, reduce sensitivity to noise. - handle Kuwait Dinar, which sub-divides into 1000x not 100x. --- yfinance/scrapers/history.py | 54 +++++++++++++++++++++++------------- 1 file changed, 35 insertions(+), 19 deletions(-) diff --git a/yfinance/scrapers/history.py b/yfinance/scrapers/history.py index a4bd380aa..f7c01f147 100644 --- a/yfinance/scrapers/history.py +++ b/yfinance/scrapers/history.py @@ -338,7 +338,7 @@ def history(self, period="1mo", interval="1d", # Do this before auto/back adjust logger.debug(f'{self.ticker}: checking OHLC for repairs ...') df = self._fix_unit_mixups(df, interval, tz_exchange, prepost) - df = self._fix_bad_stock_split(df, interval, tz_exchange) + df = self._fix_bad_stock_splits(df, interval, tz_exchange) # Must repair 100x and split errors before price reconstruction df = self._fix_zeroes(df, interval, tz_exchange, prepost) df = self._fix_missing_div_adjust(df, interval, tz_exchange) @@ -981,7 +981,12 @@ def _fix_unit_switch(self, df, interval, tz_exchange): # This function fixes the second. # Eventually Yahoo fixes but could take them 2 weeks. - return self._fix_prices_sudden_change(df, interval, tz_exchange, 100.0) + if self._history_metadata['currency'] == 'KWF': + # Kuwaiti Dinar divided into 1000 not 100 + n = 1000 + else: + n = 100 + return self._fix_prices_sudden_change(df, interval, tz_exchange, n) @utils.log_indent_decorator def _fix_zeroes(self, df, interval, tz_exchange, prepost): @@ -1171,9 +1176,12 @@ def _fix_missing_div_adjust(self, df, interval, tz_exchange): return df2 @utils.log_indent_decorator - def _fix_bad_stock_split(self, df, interval, tz_exchange): - # Repair idea is to look for BIG daily price changes that closely match the - # most recent stock split ratio. This indicates Yahoo failed to apply a new + def _fix_bad_stock_splits(self, df, interval, tz_exchange): + # Original logic only considered latest split adjustment could be missing, but + # actually **any** split adjustment can be missing. So check all splits in df. + # + # Improved logic looks for BIG daily price changes that closely match the + # **nearest future** stock split ratio. This indicates Yahoo failed to apply a new # stock split to old price data. # # There is a slight complication, because Yahoo does another stupid thing. @@ -1190,22 +1198,28 @@ def _fix_bad_stock_split(self, df, interval, tz_exchange): if not interday: return df - # Find the most recent stock split - df = df.sort_index(ascending=False) + df = df.sort_index() # scan splits oldest -> newest split_f = df['Stock Splits'].to_numpy() != 0 if not split_f.any(): logger.debug('price-repair-split: No splits in data') return df - most_recent_split_day = df.index[split_f].max() - split = df.loc[most_recent_split_day, 'Stock Splits'] - if most_recent_split_day == df.index[0]: - logger.info( - "price-repair-split: Need 1+ day of price data after split to determine true price. Won't repair") - return df - logger.debug(f'price-repair-split: Most recent split = {split:.4f} @ {most_recent_split_day.date()}') + for split_idx in np.where(split_f)[0]: + split_dt = df.index[split_idx] + split = df.loc[split_dt, 'Stock Splits'] + if split_dt == df.index[0]: + continue - return self._fix_prices_sudden_change(df, interval, tz_exchange, split, correct_volume=True) + cutoff_idx = min(df.shape[0], split_idx+1) # add one row after to detect big change + df_pre_split = df.iloc[0:cutoff_idx+1] + + df_pre_split_repaired = self._fix_prices_sudden_change(df_pre_split, interval, tz_exchange, split, correct_volume=True) + # Merge back in: + if cutoff_idx == df.shape[0]-1: + df = df_pre_split_repaired + else: + df = pd.concat([df_pre_split_repaired.sort_index(), df.iloc[cutoff_idx+1:]]) + return df @utils.log_indent_decorator def _fix_prices_sudden_change(self, df, interval, tz_exchange, change, correct_volume=False): @@ -1302,10 +1316,12 @@ def _fix_prices_sudden_change(self, df, interval, tz_exchange, change, correct_v # average change _1d_change_minx = np.average(_1d_change_x, axis=1) else: - # change nearest to 1.0 - diff = np.abs(_1d_change_x - 1.0) - j_indices = np.argmin(diff, axis=1) - _1d_change_minx = _1d_change_x[np.arange(n), j_indices] + # # change nearest to 1.0 + # diff = np.abs(_1d_change_x - 1.0) + # j_indices = np.argmin(diff, axis=1) + # _1d_change_minx = _1d_change_x[np.arange(n), j_indices] + # Still sensitive to extreme-low low. Try median: + _1d_change_minx = np.median(_1d_change_x, axis=1) f_na = np.isnan(_1d_change_minx) if f_na.any(): # Possible if data was too old for reconstruction. From f3c9f9962d8cef7c51d738be8e821e97c04e0f5c Mon Sep 17 00:00:00 2001 From: ValueRaider Date: Sun, 19 May 2024 14:57:05 +0100 Subject: [PATCH 14/15] Fix tests ; Fine-tune split repair ; Fix UTC warning --- tests/__init__.py | 0 tests/data/AV-L-1wk-bad-stock-split-fixed.csv | 46 +++++++++---------- tests/test_prices.py | 17 ++----- tests/test_ticker.py | 26 +++++------ yfinance/scrapers/history.py | 22 +++++++-- yfinance/scrapers/quote.py | 6 +-- yfinance/ticker.py | 3 +- 7 files changed, 64 insertions(+), 56 deletions(-) create mode 100644 tests/__init__.py diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/data/AV-L-1wk-bad-stock-split-fixed.csv b/tests/data/AV-L-1wk-bad-stock-split-fixed.csv index 95c25a48a..a99eb118b 100644 --- a/tests/data/AV-L-1wk-bad-stock-split-fixed.csv +++ b/tests/data/AV-L-1wk-bad-stock-split-fixed.csv @@ -1,27 +1,27 @@ Date,Open,High,Low,Close,Adj Close,Volume,Dividends,Stock Splits -2021-12-13 00:00:00+00:00,393.999975585938,406.6,391.4,402.899916992188,291.232287597656,62714764.4736842,0,0 -2021-12-20 00:00:00+00:00,393.999975585938,412.199990234375,392.502983398438,409.899997558594,296.292243652344,46596651.3157895,0,0 -2021-12-27 00:00:00+00:00,409.899997558594,416.550971679688,408.387001953125,410.4,296.653642578125,10818482.8947368,0,0 -2022-01-03 00:00:00+00:00,410.4,432.199995117188,410.4,432.099985351563,312.339265136719,44427327.6315789,0,0 -2022-01-10 00:00:00+00:00,431.3,439.199982910156,429.099970703125,436.099912109375,315.230618896484,29091400,0,0 -2022-01-17 00:00:00+00:00,437.999912109375,445.199965820313,426.999997558594,431.999975585938,312.267017822266,43787351.3157895,0,0 -2022-01-24 00:00:00+00:00,430.099975585938,440.999973144531,420.999968261719,433.499982910156,313.351237792969,58487296.0526316,0,0 -2022-01-31 00:00:00+00:00,436.199968261719,443.049987792969,432.099985351563,435.199916992188,314.580045166016,43335806.5789474,0,0 -2022-02-07 00:00:00+00:00,437.899995117188,448.799992675781,436.051994628906,444.39998046875,321.230207519531,39644061.8421053,0,0 -2022-02-14 00:00:00+00:00,437.699975585938,441.999978027344,426.699968261719,432.199995117188,312.411558837891,49972693.4210526,0,0 -2022-02-21 00:00:00+00:00,435.499992675781,438.476999511719,408.29998046875,423.399970703125,306.050571289063,65719596.0526316,0,0 -2022-02-28 00:00:00+00:00,415.099995117188,427.999909667969,386.199932861328,386.799945068359,279.594578857422,94057936.8421053,4.1875,0 -2022-03-07 00:00:00+00:00,374.999952392578,417.299978027344,361.101981201172,409.599968261719,298.389248046875,71269101.3157895,0,0 -2022-03-14 00:00:00+00:00,413.099985351563,426.699968261719,408.899992675781,422.399965820313,307.713929443359,55431927.6315789,0,0 -2022-03-21 00:00:00+00:00,422.699995117188,442.7,422.399965820313,437.799985351563,318.932696533203,39896352.6315789,0,0 -2022-03-28 00:00:00+01:00,442.49998046875,460.999978027344,440.097983398438,444.6,323.886403808594,56413515.7894737,0,0 -2022-04-04 00:00:00+01:00,439.699985351563,445.399985351563,421.999973144531,425.799973144531,310.190817871094,49415836.8421053,19.342106,0 -2022-04-11 00:00:00+01:00,425.39998046875,435.599909667969,420.799995117188,434.299968261719,327.211427001953,29875081.5789474,0,0 -2022-04-18 00:00:00+01:00,434.299968261719,447.799987792969,433.599992675781,437.799985351563,329.848419189453,49288272.3684211,0,0 -2022-04-25 00:00:00+01:00,430.699987792969,438.799990234375,423.999982910156,433.299916992188,326.457967529297,44656776.3157895,0,0 -2022-05-02 00:00:00+01:00,433.299916992188,450.999975585938,414.499982910156,414.899975585938,312.595018310547,29538167.1052632,0,0 -2022-05-09 00:00:00+01:00,413.199995117188,417.449992675781,368.282923583984,408.199970703125,307.547099609375,73989611.8421053,0,0 -2022-05-16 00:00:00+01:00,384,423.600006103516,384,412.100006103516,310.485473632813,81938261,101.69,0.76 +2021-12-13 00:00:00+00:00,518.421020507813,535,515,530.131469726563,383.200378417969,47663221,0,0 +2021-12-20 00:00:00+00:00,518.421020507813,542.368408203125,516.451293945313,539.342102050781,389.858215332031,35413455,0,0 +2021-12-27 00:00:00+00:00,539.342102050781,548.093383789063,537.351318359375,540,390.333740234375,8222047,0,0 +2022-01-03 00:00:00+00:00,540,568.684204101563,540,568.552612304688,410.972717285156,33764769,0,0 +2022-01-10 00:00:00+00:00,567.5,577.894714355469,564.605224609375,573.815673828125,414.777130126953,22109464,0,0 +2022-01-17 00:00:00+00:00,576.315673828125,585.789428710938,561.842102050781,568.421020507813,410.877655029297,33278387,0,0 +2022-01-24 00:00:00+00:00,565.921020507813,580.263122558594,553.947326660156,570.394714355469,412.304260253906,44450345,0,0 +2022-01-31 00:00:00+00:00,573.947326660156,582.960510253906,568.552612304688,572.631469726563,413.921112060547,32935213,0,0 +2022-02-07 00:00:00+00:00,576.184204101563,590.526306152344,573.752624511719,584.73681640625,422.671325683594,30129487,0,0 +2022-02-14 00:00:00+00:00,575.921020507813,581.578918457031,561.447326660156,568.684204101563,411.067840576172,37979247,0,0 +2022-02-21 00:00:00+00:00,573.026306152344,576.943420410156,537.23681640625,557.105224609375,402.698120117188,49946893,0,0 +2022-02-28 00:00:00+00:00,546.184204101563,563.157775878906,508.157806396484,508.947296142578,367.887603759766,71484032,4.1875,0 +2022-03-07 00:00:00+00:00,493.420989990234,549.078918457031,475.134185791016,538.947326660156,392.617431640625,54164517,0,0 +2022-03-14 00:00:00+00:00,543.552612304688,561.447326660156,538.026306152344,555.789428710938,404.886749267578,42128265,0,0 +2022-03-21 00:00:00+00:00,556.184204101563,582.5,555.789428710938,576.052612304688,419.648284912109,30321228,0,0 +2022-03-28 00:00:00+01:00,582.23681640625,606.578918457031,579.076293945313,585,426.166320800781,42874272,0,0 +2022-04-04 00:00:00+01:00,578.552612304688,586.052612304688,555.263122558594,560.263122558594,408.145812988281,37556036,19.342106,0 +2022-04-11 00:00:00+01:00,559.73681640625,573.157775878906,553.684204101563,571.447326660156,430.541351318359,22705062,0,0 +2022-04-18 00:00:00+01:00,571.447326660156,589.210510253906,570.526306152344,576.052612304688,434.011077880859,37459087,0,0 +2022-04-25 00:00:00+01:00,566.710510253906,577.368408203125,557.894714355469,570.131469726563,429.549957275391,33939150,0,0 +2022-05-02 00:00:00+01:00,570.131469726563,593.421020507813,545.394714355469,545.921020507813,411.309234619141,22449007,0,0 +2022-05-09 00:00:00+01:00,543.684204101563,549.276306152344,484.582794189453,537.105224609375,404.667236328125,56232105,0,0 +2022-05-16 00:00:00+01:00,505.263157894737,557.368429083573,505.263157894737,542.236850136205,408.533517937911,62273078.36,101.69,0.76 2022-05-23 00:00:00+01:00,416.100006103516,442.399993896484,341.915008544922,440.899993896484,409.764678955078,45432941,0,0 2022-05-30 00:00:00+01:00,442.700012207031,444.200012207031,426.600006103516,428.700012207031,398.426239013672,37906659,0,0 2022-06-06 00:00:00+01:00,425.299987792969,434.010009765625,405.200012207031,405.399993896484,376.771606445313,40648810,0,0 diff --git a/tests/test_prices.py b/tests/test_prices.py index 46792113b..18a063dab 100644 --- a/tests/test_prices.py +++ b/tests/test_prices.py @@ -359,13 +359,6 @@ def test_monthlyWithEvents2(self): dfd_divs = dfd[dfd['Dividends'] != 0] self.assertEqual(dfm_divs.shape[0], dfd_divs.shape[0]) - dfm = yf.Ticker("F").history(period="50mo", interval="1mo") - dfd = yf.Ticker("F").history(period="50mo", interval="1d") - dfd = dfd[dfd.index > dfm.index[0]] - dfm_divs = dfm[dfm['Dividends'] != 0] - dfd_divs = dfd[dfd['Dividends'] != 0] - self.assertEqual(dfm_divs.shape[0], dfd_divs.shape[0]) - def test_tz_dst_ambiguous(self): # Reproduce issue #1100 try: @@ -791,7 +784,7 @@ def test_repair_zeroes_hourly(self): tz_exchange = dat.fast_info["timezone"] hist = dat._lazy_load_price_history() - correct_df = hist.history(period="1wk", interval="1h", auto_adjust=False, repair=True) + correct_df = hist.history(period="5d", interval="1h", auto_adjust=False, repair=True) df_bad = correct_df.copy() bad_idx = correct_df.index[10] @@ -820,7 +813,7 @@ def test_repair_zeroes_hourly(self): self.assertTrue("Repaired?" in repaired_df.columns) self.assertFalse(repaired_df["Repaired?"].isna().any()) - def test_repair_bad_stock_split(self): + def test_repair_bad_stock_splits(self): # Stocks that split in 2022 but no problems in Yahoo data, # so repair should change nothing good_tkrs = ['AMZN', 'DXCM', 'FTNT', 'GOOG', 'GME', 'PANW', 'SHOP', 'TSLA'] @@ -836,7 +829,7 @@ def test_repair_bad_stock_split(self): _dp = os.path.dirname(__file__) df_good = dat.history(start='2020-01-01', end=_dt.date.today(), interval=interval, auto_adjust=False) - repaired_df = hist._fix_bad_stock_split(df_good, interval, tz_exchange) + repaired_df = hist._fix_bad_stock_splits(df_good, interval, tz_exchange) # Expect no change from repair df_good = df_good.sort_index() @@ -867,7 +860,7 @@ def test_repair_bad_stock_split(self): df_bad = _pd.read_csv(fp, index_col="Date") df_bad.index = _pd.to_datetime(df_bad.index, utc=True) - repaired_df = hist._fix_bad_stock_split(df_bad, "1d", tz_exchange) + repaired_df = hist._fix_bad_stock_splits(df_bad, "1d", tz_exchange) fp = os.path.join(_dp, "data", tkr.replace('.','-')+'-'+interval+"-bad-stock-split-fixed.csv") correct_df = _pd.read_csv(fp, index_col="Date") @@ -902,7 +895,7 @@ def test_repair_bad_stock_split(self): _dp = os.path.dirname(__file__) df_good = hist.history(start='2020-11-30', end='2021-04-01', interval=interval, auto_adjust=False) - repaired_df = hist._fix_bad_stock_split(df_good, interval, tz_exchange) + repaired_df = hist._fix_bad_stock_splits(df_good, interval, tz_exchange) # Expect no change from repair df_good = df_good.sort_index() diff --git a/tests/test_ticker.py b/tests/test_ticker.py index 0b1343a7c..244834d4f 100644 --- a/tests/test_ticker.py +++ b/tests/test_ticker.py @@ -12,7 +12,7 @@ from .context import yfinance as yf from .context import session_gbl -from yfinance.exceptions import YFChartError, YFInvalidPeriodError, YFNotImplementedError, YFPricesMissingError, YFTickerMissingError, YFTzMissingError +from yfinance.exceptions import YFChartError, YFInvalidPeriodError, YFNotImplementedError, YFTickerMissingError, YFTzMissingError import unittest @@ -100,13 +100,13 @@ def test_badTicker(self): tkr = "DJI" # typo of "^DJI" dat = yf.Ticker(tkr, session=self.session) - dat.history(period="1wk") + dat.history(period="5d") dat.history(start="2022-01-01") dat.history(start="2022-01-01", end="2022-03-01") - yf.download([tkr], period="1wk", threads=False, ignore_tz=False) - yf.download([tkr], period="1wk", threads=True, ignore_tz=False) - yf.download([tkr], period="1wk", threads=False, ignore_tz=True) - yf.download([tkr], period="1wk", threads=True, ignore_tz=True) + yf.download([tkr], period="5d", threads=False, ignore_tz=False) + yf.download([tkr], period="5d", threads=True, ignore_tz=False) + yf.download([tkr], period="5d", threads=False, ignore_tz=True) + yf.download([tkr], period="5d", threads=True, ignore_tz=True) for k in dat.fast_info: dat.fast_info[k] @@ -144,7 +144,7 @@ def test_prices_missing(self): # META call option, 2024 April 26th @ strike of 180000 tkr = 'META240426C00180000' dat = yf.Ticker(tkr, session=self.session) - with self.assertRaises(YFPricesMissingError): + with self.assertRaises(YFChartError): dat.history(period="5d", interval="1m", raise_errors=True) def test_ticker_missing(self): @@ -162,13 +162,13 @@ def test_goodTicker(self): for tkr in tkrs: dat = yf.Ticker(tkr, session=self.session) - dat.history(period="1wk") + dat.history(period="5d") dat.history(start="2022-01-01") dat.history(start="2022-01-01", end="2022-03-01") - yf.download([tkr], period="1wk", threads=False, ignore_tz=False) - yf.download([tkr], period="1wk", threads=True, ignore_tz=False) - yf.download([tkr], period="1wk", threads=False, ignore_tz=True) - yf.download([tkr], period="1wk", threads=True, ignore_tz=True) + yf.download([tkr], period="5d", threads=False, ignore_tz=False) + yf.download([tkr], period="5d", threads=True, ignore_tz=False) + yf.download([tkr], period="5d", threads=False, ignore_tz=True) + yf.download([tkr], period="5d", threads=True, ignore_tz=True) for k in dat.fast_info: dat.fast_info[k] @@ -182,7 +182,7 @@ def test_goodTicker_withProxy(self): dat._fetch_ticker_tz(proxy=None, timeout=5) dat._get_ticker_tz(proxy=None, timeout=5) - dat.history(period="1wk") + dat.history(period="5d") for attribute_name, attribute_type in ticker_attributes: assert_attribute_type(self, dat, attribute_name, attribute_type) diff --git a/yfinance/scrapers/history.py b/yfinance/scrapers/history.py index f7c01f147..2007427b3 100644 --- a/yfinance/scrapers/history.py +++ b/yfinance/scrapers/history.py @@ -1204,14 +1204,25 @@ def _fix_bad_stock_splits(self, df, interval, tz_exchange): logger.debug('price-repair-split: No splits in data') return df + logger.debug(f'price-repair-split: Splits: {str(df['Stock Splits'][split_f].to_dict())}') + + if not 'Repaired?' in df.columns: + df['Repaired?'] = False for split_idx in np.where(split_f)[0]: split_dt = df.index[split_idx] split = df.loc[split_dt, 'Stock Splits'] if split_dt == df.index[0]: continue - cutoff_idx = min(df.shape[0], split_idx+1) # add one row after to detect big change + # Add on a week: + if interval in ['1wk', '1mo', '3mo']: + split_idx += 1 + else: + split_idx += 5 + cutoff_idx = min(df.shape[0], split_idx) # add one row after to detect big change df_pre_split = df.iloc[0:cutoff_idx+1] + logger.debug(f'price-repair-split: split_idx={split_idx} split_dt={split_dt}') + logger.debug(f'price-repair-split: df dt range: {df_pre_split.index[0].date()} -> {df_pre_split.index[-1].date()}') df_pre_split_repaired = self._fix_prices_sudden_change(df_pre_split, interval, tz_exchange, split, correct_volume=True) # Merge back in: @@ -1240,7 +1251,7 @@ def _fix_prices_sudden_change(self, df, interval, tz_exchange, change, correct_v # start_min = 1 year before oldest split f = df['Stock Splits'].to_numpy() != 0.0 start_min = (df.index[f].min() - _dateutil.relativedelta.relativedelta(years=1)).date() - logger.debug(f'price-repair-split: start_min={start_min}') + logger.debug(f'price-repair-split: start_min={start_min} change={change}') OHLC = ['Open', 'High', 'Low', 'Close'] @@ -1438,8 +1449,13 @@ def _fix_prices_sudden_change(self, df, interval, tz_exchange, change, correct_v # if logger.isEnabledFor(logging.DEBUG): # df_debug['i'] = list(range(0, df_debug.shape[0])) # df_debug['i_rev'] = df_debug.shape[0]-1 - df_debug['i'] + # if correct_columns_individually: + # f_change = df_debug[[c+'_f_down' for c in debug_cols]].any(axis=1) | df_debug[[c+'_f_up' for c in debug_cols]].any(axis=1) + # else: + # f_change = df_debug['f_down'] | df_debug['f_up'] + # f_change = f_change | np.roll(f_change, -1) | np.roll(f_change, 1) | np.roll(f_change, -2) | np.roll(f_change, 2) # with pd.option_context('display.max_rows', None, 'display.max_columns', 10, 'display.width', 1000): # more options can be specified also - # logger.debug(f"price-repair-split: my workings:" + '\n' + str(df_debug)) + # logger.debug(f"price-repair-split: my workings:" + '\n' + str(df_debug[f_change])) def map_signals_to_ranges(f, f_up, f_down): # Ensure 0th element is False, because True is nonsense diff --git a/yfinance/scrapers/quote.py b/yfinance/scrapers/quote.py index e2e7ac909..1b36dada9 100644 --- a/yfinance/scrapers/quote.py +++ b/yfinance/scrapers/quote.py @@ -181,7 +181,7 @@ def toJSON(self, indent=4): def _get_1y_prices(self, fullDaysOnly=False): if self._prices_1y is None: - self._prices_1y = self._tkr.history(period="380d", auto_adjust=False, keepna=True, proxy=self.proxy) + self._prices_1y = self._tkr.history(period="1y", auto_adjust=False, keepna=True, proxy=self.proxy) self._md = self._tkr.get_history_metadata(proxy=self.proxy) try: ctp = self._md["currentTradingPeriod"] @@ -207,12 +207,12 @@ def _get_1y_prices(self, fullDaysOnly=False): def _get_1wk_1h_prepost_prices(self): if self._prices_1wk_1h_prepost is None: - self._prices_1wk_1h_prepost = self._tkr.history(period="1wk", interval="1h", auto_adjust=False, prepost=True, proxy=self.proxy) + self._prices_1wk_1h_prepost = self._tkr.history(period="5d", interval="1h", auto_adjust=False, prepost=True, proxy=self.proxy) return self._prices_1wk_1h_prepost def _get_1wk_1h_reg_prices(self): if self._prices_1wk_1h_reg is None: - self._prices_1wk_1h_reg = self._tkr.history(period="1wk", interval="1h", auto_adjust=False, prepost=False, proxy=self.proxy) + self._prices_1wk_1h_reg = self._tkr.history(period="5d", interval="1h", auto_adjust=False, prepost=False, proxy=self.proxy) return self._prices_1wk_1h_reg def _get_exchange_metadata(self): diff --git a/yfinance/ticker.py b/yfinance/ticker.py index 075f457d5..438733684 100644 --- a/yfinance/ticker.py +++ b/yfinance/ticker.py @@ -48,8 +48,7 @@ def _download_options(self, date=None): r = self._data.get(url=url, proxy=self.proxy).json() if len(r.get('optionChain', {}).get('result', [])) > 0: for exp in r['optionChain']['result'][0]['expirationDates']: - self._expirations[_datetime.datetime.utcfromtimestamp( - exp).strftime('%Y-%m-%d')] = exp + self._expirations[_pd.Timestamp(exp, unit='s').strftime('%Y-%m-%d')] = exp self._underlying = r['optionChain']['result'][0].get('quote', {}) From fe00fd5152d2a6957659d91de83ba57bfb35e6db Mon Sep 17 00:00:00 2001 From: ValueRaider Date: Sun, 19 May 2024 15:09:57 +0100 Subject: [PATCH 15/15] Ruff fixes --- yfinance/scrapers/history.py | 2 +- yfinance/ticker.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/yfinance/scrapers/history.py b/yfinance/scrapers/history.py index 2007427b3..6a6116205 100644 --- a/yfinance/scrapers/history.py +++ b/yfinance/scrapers/history.py @@ -1206,7 +1206,7 @@ def _fix_bad_stock_splits(self, df, interval, tz_exchange): logger.debug(f'price-repair-split: Splits: {str(df['Stock Splits'][split_f].to_dict())}') - if not 'Repaired?' in df.columns: + if 'Repaired?' not in df.columns: df['Repaired?'] = False for split_idx in np.where(split_f)[0]: split_dt = df.index[split_idx] diff --git a/yfinance/ticker.py b/yfinance/ticker.py index 438733684..837739700 100644 --- a/yfinance/ticker.py +++ b/yfinance/ticker.py @@ -21,7 +21,6 @@ from __future__ import print_function -import datetime as _datetime from collections import namedtuple as _namedtuple import pandas as _pd