Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sync dev -> main #1971

Merged
merged 24 commits into from
Jul 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
19f9760
Merge pull request #1946 from ranaroussi/main
ValueRaider May 20, 2024
ead2b89
Implement Sustainibility Fetching
Darksinian Jun 9, 2024
2b1dd86
Merge pull request #1959 from MohamedAlaa201/mohamed/sustainibillity_…
ValueRaider Jun 13, 2024
17b4518
Add pull of Loss Adjustment Expense data in income statement
vittoboa Jun 20, 2024
2d5978e
Merge pull request #1965 from vittoboa/add-loss-adjustment-expense
ValueRaider Jun 21, 2024
a42a5b7
fixed history fetching when period='max'
SnowCheetos Jun 25, 2024
a9f6c33
history metadata: Fix '1wk is invalid' & repeated calls
ValueRaider Jun 28, 2024
6be7cc3
Merge pull request #1970 from ranaroussi/fix/history-metadata
ValueRaider Jun 28, 2024
b1d56ac
directly subtracted seconds, kept 99 years impl as on main branch
SnowCheetos Jul 3, 2024
57dac67
Deprecate 'Ticker.earnings'
ValueRaider Jul 5, 2024
4160fec
Clarify max period when interval=60m/1h. Tidy.
ValueRaider Jul 6, 2024
9c89308
Merge pull request #1967 from SnowCheetos/preiod_max_fix
ValueRaider Jul 6, 2024
742cc85
Prices: fix some Pandas deprecation warnings
ValueRaider Jul 13, 2024
2e48495
Fix deprecation warnings not printing. Remove deprecated info code.
ValueRaider Jul 13, 2024
a73e974
Merge pull request #1977 from ranaroussi/fix/earnings
ValueRaider Jul 13, 2024
d219e84
const.py addition
rhwvu Jul 15, 2024
b0e9e91
Merge pull request #1985 from rhwvu/main
ValueRaider Jul 15, 2024
96279b9
Merge pull request #1981 from ranaroussi/fix/pandas-warnings
ValueRaider Jul 15, 2024
0681a77
Tests small fixes + fix another Pandas warning
ValueRaider Jul 15, 2024
c894b9e
Price repair: fix 2x syntax errors
ValueRaider Jul 18, 2024
d948db7
Price repair zeroes: improve for 1d+ data
ValueRaider Jul 18, 2024
862ef7c
Merge pull request #1990 from ranaroussi/feature/price-repair-zeroes-…
ValueRaider Jul 18, 2024
d024dbb
Merge pull request #1989 from ranaroussi/fix/price-repair-syntax-erro…
ValueRaider Jul 18, 2024
7ca6b10
Fix typo in merged branch feature/price-repair-zeroes-improve
ValueRaider Jul 19, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ build/
*.html
*.css
*.png
test.ipynb

# Environments
.env
Expand Down
2 changes: 1 addition & 1 deletion tests/test_prices.py
Original file line number Diff line number Diff line change
Expand Up @@ -864,7 +864,7 @@ def test_repair_bad_stock_splits(self):

fp = os.path.join(_dp, "data", tkr.replace('.','-')+'-'+interval+"-bad-stock-split-fixed.csv")
correct_df = _pd.read_csv(fp, index_col="Date")
correct_df.index = _pd.to_datetime(correct_df.index)
correct_df.index = _pd.to_datetime(correct_df.index, utc=True)

repaired_df = repaired_df.sort_index()
correct_df = correct_df.sort_index()
Expand Down
34 changes: 6 additions & 28 deletions tests/test_ticker.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,6 @@
("recommendations", Union[pd.DataFrame, dict]),
("recommendations_summary", Union[pd.DataFrame, dict]),
("upgrades_downgrades", Union[pd.DataFrame, dict]),
("earnings", pd.DataFrame),
("quarterly_earnings", pd.DataFrame),
("quarterly_cashflow", pd.DataFrame),
("cashflow", pd.DataFrame),
("quarterly_balance_sheet", pd.DataFrame),
Expand Down Expand Up @@ -114,9 +112,6 @@ def test_badTicker(self):
for attribute_name, attribute_type in ticker_attributes:
assert_attribute_type(self, dat, attribute_name, attribute_type)

with self.assertRaises(YFNotImplementedError):
assert isinstance(dat.earnings, pd.Series)
assert dat.earnings.empty
assert isinstance(dat.dividends, pd.Series)
assert dat.dividends.empty
assert isinstance(dat.splits, pd.Series)
Expand Down Expand Up @@ -309,22 +304,6 @@ def test_earnings_dates_with_limit(self):

# Below will fail because not ported to Yahoo API

# def test_earnings(self):
# data = self.ticker.earnings
# self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
# self.assertFalse(data.empty, "data is empty")

# data_cached = self.ticker.earnings
# self.assertIs(data, data_cached, "data not cached")

# def test_quarterly_earnings(self):
# data = self.ticker.quarterly_earnings
# self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
# self.assertFalse(data.empty, "data is empty")

# data_cached = self.ticker.quarterly_earnings
# self.assertIs(data, data_cached, "data not cached")

# def test_earnings_forecasts(self):
# data = self.ticker.earnings_forecasts
# self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
Expand Down Expand Up @@ -707,15 +686,14 @@ def test_calendar(self):
data_cached = self.ticker.calendar
self.assertIs(data, data_cached, "data not cached")

# Below will fail because not ported to Yahoo API

# def test_sustainability(self):
# data = self.ticker.sustainability
# self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
# self.assertFalse(data.empty, "data is empty")
def test_sustainability(self):
data = self.ticker.sustainability
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")

# data_cached = self.ticker.sustainability
# self.assertIs(data, data_cached, "data not cached")
data_cached = self.ticker.sustainability
self.assertIs(data, data_cached, "data not cached")

# def test_shares(self):
# data = self.ticker.shares
Expand Down
20 changes: 3 additions & 17 deletions yfinance/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,21 +29,7 @@
__version__ = version.version
__author__ = "Ran Aroussi"

import warnings
warnings.filterwarnings('default', category=DeprecationWarning, module='^yfinance')

def pdr_override():
"""
make pandas datareader optional
otherwise can be called via fix_yahoo_finance.download(...)
"""
from .utils import print_once
print_once("yfinance: pandas_datareader support is deprecated & semi-broken so will be removed in a future verison. Just use yfinance.")
try:
import pandas_datareader
pandas_datareader.data.get_data_yahoo = download
pandas_datareader.data.get_data_yahoo_actions = download
pandas_datareader.data.DataReader = download
except Exception:
pass


__all__ = ['download', 'Ticker', 'Tickers', 'pdr_override', 'enable_debug_mode', 'set_tz_cache_location']
__all__ = ['download', 'Ticker', 'Tickers', 'enable_debug_mode', 'set_tz_cache_location']
4 changes: 3 additions & 1 deletion yfinance/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ def get_fast_info(self, proxy=None):

@property
def basic_info(self):
warnings.warn("'Ticker.basic_info' is renamed to 'Ticker.fast_info', hopefully purpose is clearer", DeprecationWarning)
warnings.warn("'Ticker.basic_info' is deprecated and will be removed in future, Switch to 'Ticker.fast_info'", DeprecationWarning)
return self.fast_info

def get_sustainability(self, proxy=None, as_dict=False):
Expand Down Expand Up @@ -289,6 +289,8 @@ def get_earnings(self, proxy=None, as_dict=False, freq="yearly"):
Default is None
"""
self._fundamentals.proxy = proxy or self.proxy
if self._fundamentals.earnings is None:
return None
data = self._fundamentals.earnings[freq]
if as_dict:
dict_data = data.to_dict()
Expand Down
7 changes: 5 additions & 2 deletions yfinance/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@
"AmortizationOfIntangiblesIncomeStatement", "DepreciationIncomeStatement", "ResearchAndDevelopment",
"SellingGeneralAndAdministration", "SellingAndMarketingExpense", "GeneralAndAdministrativeExpense",
"OtherGandA", "InsuranceAndClaims", "RentAndLandingFees", "SalariesAndWages", "GrossProfit",
"CostOfRevenue", "TotalRevenue", "ExciseTaxes", "OperatingRevenue"],
"CostOfRevenue", "TotalRevenue", "ExciseTaxes", "OperatingRevenue", "LossAdjustmentExpense",
"NetPolicyholderBenefitsAndClaims", "PolicyholderBenefitsGross", "PolicyholderBenefitsCeded",
"OccupancyAndEquipment", "ProfessionalExpenseAndContractServicesExpense", "OtherNonInterestExpense"],
'balance-sheet': ["TreasurySharesNumber", "PreferredSharesNumber", "OrdinarySharesNumber", "ShareIssued", "NetDebt",
"TotalDebt", "TangibleBookValue", "InvestedCapital", "WorkingCapital", "NetTangibleAssets",
"CapitalLeaseObligations", "CommonStockEquity", "PreferredStockEquity", "TotalCapitalization",
Expand Down Expand Up @@ -74,7 +76,8 @@
"DuefromRelatedPartiesCurrent", "TaxesReceivable", "AccruedInterestReceivable", "NotesReceivable",
"LoansReceivable", "AccountsReceivable", "AllowanceForDoubtfulAccountsReceivable",
"GrossAccountsReceivable", "CashCashEquivalentsAndShortTermInvestments",
"OtherShortTermInvestments", "CashAndCashEquivalents", "CashEquivalents", "CashFinancial"],
"OtherShortTermInvestments", "CashAndCashEquivalents", "CashEquivalents", "CashFinancial",
"CashCashEquivalentsAndFederalFundsSold"],
'cash-flow': ["ForeignSales", "DomesticSales", "AdjustedGeographySegmentData", "FreeCashFlow",
"RepurchaseOfCapitalStock", "RepaymentOfDebt", "IssuanceOfDebt", "IssuanceOfCapitalStock",
"CapitalExpenditure", "InterestPaidSupplementalData", "IncomeTaxPaidSupplementalData",
Expand Down
7 changes: 3 additions & 4 deletions yfinance/scrapers/fundamentals.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import datetime
import json
import warnings

import pandas as pd

from yfinance import utils, const
from yfinance.data import YfData
from yfinance.exceptions import YFException, YFNotImplementedError


class Fundamentals:

def __init__(self, data: YfData, symbol: str, proxy=None):
Expand All @@ -30,9 +30,8 @@ def financials(self) -> "Financials":

@property
def earnings(self) -> dict:
if self._earnings is None:
raise YFNotImplementedError('earnings')
return self._earnings
warnings.warn("'Ticker.earnings' is deprecated as not available via API. Look for \"Net Income\" in Ticker.income_stmt.", DeprecationWarning)
return None

@property
def shares(self) -> pd.DataFrame:
Expand Down
49 changes: 38 additions & 11 deletions yfinance/scrapers/history.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,10 +97,13 @@ def history(self, period="1mo", interval="1d",
end = utils._parse_user_dt(end, tz)
if start is None:
if interval == "1m":
start = end - 604800 # Subtract 7 days
start = end - 604800 # 7 days
elif interval in ("5m", "15m", "30m", "90m"):
start = end - 5184000 # 60 days
elif interval in ("1h", '60m'):
start = end - 63072000 # 730 days
else:
max_start_datetime = pd.Timestamp.utcnow().floor("D") - _datetime.timedelta(days=99 * 365)
start = int(max_start_datetime.timestamp())
start = end - 3122064000 # 99 years
else:
start = utils._parse_user_dt(start, tz)
params = {"period1": start, "period2": end}
Expand Down Expand Up @@ -236,7 +239,7 @@ def history(self, period="1mo", interval="1d",
# 2) fix weired bug with Yahoo! - returning 60m for 30m bars
if interval.lower() == "30m":
logger.debug(f'{self.ticker}: resampling 30m OHLC from 15m')
quotes2 = quotes.resample('30T')
quotes2 = quotes.resample('30min')
quotes = pd.DataFrame(index=quotes2.last().index, data={
'Open': quotes2['Open'].first(),
'High': quotes2['High'].max(),
Expand Down Expand Up @@ -265,6 +268,7 @@ def history(self, period="1mo", interval="1d",
tps = self._history_metadata["tradingPeriods"]
if not isinstance(tps, pd.DataFrame):
self._history_metadata = utils.format_history_metadata(self._history_metadata, tradingPeriodsOnly=True)
self._history_metadata_formatted = True
tps = self._history_metadata["tradingPeriods"]
quotes = utils.fix_Yahoo_returning_prepost_unrequested(quotes, params["interval"], tps)
logger.debug(f'{self.ticker}: OHLC after cleaning: {quotes.index[0]} -> {quotes.index[-1]}')
Expand Down Expand Up @@ -391,7 +395,7 @@ def history(self, period="1mo", interval="1d",
def get_history_metadata(self, proxy=None) -> dict:
if self._history_metadata is None:
# Request intraday data, because then Yahoo returns exchange schedule.
self.history(period="1wk", interval="1h", prepost=True, proxy=proxy)
self.history(period="5d", interval="1h", prepost=True, proxy=proxy)

if self._history_metadata_formatted is False:
self._history_metadata = utils.format_history_metadata(self._history_metadata)
Expand Down Expand Up @@ -863,6 +867,7 @@ def _fix_unit_random_mixups(self, df, interval, tz_exchange, prepost):
df_orig = df[~f_zeroes] # all row slicing must be applied to both df and df2
else:
df2_zeroes = None
df_orig = df
if df2.shape[0] <= 1:
logger.info("price-repair-100x: Insufficient good data for detecting 100x price errors")
if "Repaired?" not in df.columns:
Expand Down Expand Up @@ -1025,6 +1030,10 @@ def _fix_zeroes(self, df, interval, tz_exchange, prepost):
f_zero_or_nan_ignore = np.isin(f_prices_bad.index.date, dts)
df2_reserve = df2[f_zero_or_nan_ignore]
df2 = df2[~f_zero_or_nan_ignore]
if df2.empty:
# No good data
return df
df2 = df2.copy()
f_prices_bad = (df2[price_cols] == 0.0) | df2[price_cols].isna()

f_change = df2["High"].to_numpy() != df2["Low"].to_numpy()
Expand All @@ -1033,7 +1042,19 @@ def _fix_zeroes(self, df, interval, tz_exchange, prepost):
f_vol_bad = None
else:
f_high_low_good = (~df2["High"].isna().to_numpy()) & (~df2["Low"].isna().to_numpy())
f_vol_bad = (df2["Volume"] == 0).to_numpy() & f_high_low_good & f_change
f_vol_zero = (df2["Volume"] == 0).to_numpy()
f_vol_bad = f_vol_zero & f_high_low_good & f_change
# ^ intra-interval price changed without volume, bad

if not intraday:
# Interday data: if close changes between intervals with volume=0 then volume is wrong.
# Possible can repair with intraday, but usually Yahoo does not have the volume.
close_diff = df2['Close'].diff()
close_diff.iloc[0] = 0
close_chg_pct_abs = np.abs(close_diff / df2['Close'])
f_bad_price_chg = (close_chg_pct_abs > 0.05).to_numpy() & f_vol_zero
f_bad_price_chg = f_bad_price_chg & (~f_vol_bad) # exclude where already know volume is bad
f_vol_bad = f_vol_bad | f_bad_price_chg

# If stock split occurred, then trading must have happened.
# I should probably rename the function, because prices aren't zero ...
Expand Down Expand Up @@ -1068,7 +1089,8 @@ def _fix_zeroes(self, df, interval, tz_exchange, prepost):
for i in range(len(price_cols)):
c = price_cols[i]
df2.loc[f_prices_bad[:, i], c] = tag
df2.loc[f_vol_bad, "Volume"] = tag
if f_vol_bad is not None:
df2.loc[f_vol_bad, "Volume"] = tag
# If volume=0 or NaN for bad prices, then tag volume for repair
f_vol_zero_or_nan = (df2["Volume"].to_numpy() == 0) | (df2["Volume"].isna().to_numpy())
df2.loc[f_prices_bad.any(axis=1) & f_vol_zero_or_nan, "Volume"] = tag
Expand Down Expand Up @@ -1229,7 +1251,11 @@ def _fix_bad_stock_splits(self, df, interval, tz_exchange):
if cutoff_idx == df.shape[0]-1:
df = df_pre_split_repaired
else:
df = pd.concat([df_pre_split_repaired.sort_index(), df.iloc[cutoff_idx+1:]])
df_post_cutoff = df.iloc[cutoff_idx+1:]
if df_post_cutoff.empty:
df = df_pre_split_repaired.sort_index()
else:
df = pd.concat([df_pre_split_repaired.sort_index(), df_post_cutoff])
return df

@utils.log_indent_decorator
Expand Down Expand Up @@ -1594,9 +1620,9 @@ def map_signals_to_ranges(f, f_up, f_down):
f_open_and_closed_fixed = f_open_fixed & f_close_fixed
f_open_xor_closed_fixed = np.logical_xor(f_open_fixed, f_close_fixed)
if f_open_and_closed_fixed.any():
df2.loc[f_open_and_closed_fixed, "Volume"] *= m_rcp
df2.loc[f_open_and_closed_fixed, "Volume"] = (df2.loc[f_open_and_closed_fixed, "Volume"] * m_rcp).round().astype('int')
if f_open_xor_closed_fixed.any():
df2.loc[f_open_xor_closed_fixed, "Volume"] *= 0.5 * m_rcp
df2.loc[f_open_xor_closed_fixed, "Volume"] = (df2.loc[f_open_xor_closed_fixed, "Volume"] * 0.5 * m_rcp).round().astype('int')

df2.loc[f_corrected, 'Repaired?'] = True

Expand Down Expand Up @@ -1649,7 +1675,8 @@ def map_signals_to_ranges(f, f_up, f_down):
for c in ['Open', 'High', 'Low', 'Close', 'Adj Close']:
df2.iloc[r[0]:r[1], df2.columns.get_loc(c)] *= m
if correct_volume:
df2.iloc[r[0]:r[1], df2.columns.get_loc("Volume")] *= m_rcp
col_loc = df2.columns.get_loc("Volume")
df2.iloc[r[0]:r[1], col_loc] = (df2.iloc[r[0]:r[1], col_loc] * m_rcp).round().astype('int')
df2.iloc[r[0]:r[1], df2.columns.get_loc('Repaired?')] = True
if r[0] == r[1] - 1:
if interday:
Expand Down
65 changes: 10 additions & 55 deletions yfinance/scrapers/quote.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import datetime
import json
import warnings
from collections.abc import MutableMapping

import numpy as _np
import pandas as pd
Expand All @@ -10,7 +8,7 @@
from yfinance import utils
from yfinance.data import YfData
from yfinance.const import quote_summary_valid_modules, _BASE_URL_
from yfinance.exceptions import YFNotImplementedError, YFDataException, YFException
from yfinance.exceptions import YFDataException, YFException

info_retired_keys_price = {"currentPrice", "dayHigh", "dayLow", "open", "previousClose", "volume", "volume24Hr"}
info_retired_keys_price.update({"regularMarket"+s for s in ["DayHigh", "DayLow", "Open", "PreviousClose", "Price", "Volume"]})
Expand All @@ -25,57 +23,6 @@
_QUOTE_SUMMARY_URL_ = f"{_BASE_URL_}/v10/finance/quoteSummary"


class InfoDictWrapper(MutableMapping):
""" Simple wrapper around info dict, intercepting 'gets' to
print how-to-migrate messages for specific keys. Requires
override dict API"""

def __init__(self, info):
self.info = info

def keys(self):
return self.info.keys()

def __str__(self):
return self.info.__str__()

def __repr__(self):
return self.info.__repr__()

def __contains__(self, k):
return k in self.info.keys()

def __getitem__(self, k):
if k in info_retired_keys_price:
warnings.warn(f"Price data removed from info (key='{k}'). Use Ticker.fast_info or history() instead", DeprecationWarning)
return None
elif k in info_retired_keys_exchange:
warnings.warn(f"Exchange data removed from info (key='{k}'). Use Ticker.fast_info or Ticker.get_history_metadata() instead", DeprecationWarning)
return None
elif k in info_retired_keys_marketCap:
warnings.warn(f"Market cap removed from info (key='{k}'). Use Ticker.fast_info instead", DeprecationWarning)
return None
elif k in info_retired_keys_symbol:
warnings.warn(f"Symbol removed from info (key='{k}'). You know this already", DeprecationWarning)
return None
return self.info[self._keytransform(k)]

def __setitem__(self, k, value):
self.info[self._keytransform(k)] = value

def __delitem__(self, k):
del self.info[self._keytransform(k)]

def __iter__(self):
return iter(self.info)

def __len__(self):
return len(self.info)

def _keytransform(self, k):
return k


class FastInfo:
# Contain small subset of info[] items that can be fetched faster elsewhere.
# Imitates a dict.
Expand Down Expand Up @@ -565,7 +512,15 @@ def info(self) -> dict:
@property
def sustainability(self) -> pd.DataFrame:
if self._sustainability is None:
raise YFNotImplementedError('sustainability')
result = self._fetch(self.proxy, modules=['esgScores'])
if result is None:
self._sustainability = pd.DataFrame()
else:
try:
data = result["quoteSummary"]["result"][0]
except (KeyError, IndexError):
raise YFDataException(f"Failed to parse json response from Yahoo Finance: {result}")
self._sustainability = pd.DataFrame(data)
return self._sustainability

@property
Expand Down
Loading