460 lines
14 KiB
Python
460 lines
14 KiB
Python
# -*- coding: utf-8 -*-
|
|
# flake8: noqa: E501
|
|
# pylint: disable=C0301
|
|
"""Data fetching tools backed by the unified provider router."""
|
|
import datetime
|
|
import os
|
|
|
|
import httpx
|
|
import pandas as pd
|
|
import pandas_market_calendars as mcal
|
|
from backend.data.provider_utils import normalize_symbol
|
|
|
|
from backend.data.cache import get_cache
|
|
from backend.data.provider_router import get_provider_router
|
|
from shared.schema import (
|
|
CompanyNews,
|
|
FinancialMetrics,
|
|
InsiderTrade,
|
|
LineItem,
|
|
Price,
|
|
)
|
|
from backend.utils.settlement import logger
|
|
|
|
# Global cache instance
|
|
_cache = get_cache()
|
|
_router = get_provider_router()
|
|
|
|
|
|
def _service_name() -> str:
|
|
return str(os.getenv("SERVICE_NAME", "")).strip().lower()
|
|
|
|
|
|
def _trading_service_url() -> str | None:
|
|
value = str(os.getenv("TRADING_SERVICE_URL", "")).strip().rstrip("/")
|
|
if not value or _service_name() == "trading_service":
|
|
return None
|
|
return value
|
|
|
|
|
|
def _news_service_url() -> str | None:
|
|
value = str(os.getenv("NEWS_SERVICE_URL", "")).strip().rstrip("/")
|
|
if not value or _service_name() == "news_service":
|
|
return None
|
|
return value
|
|
|
|
|
|
def _service_get_json(base_url: str, path: str, *, params: dict[str, object]) -> dict:
|
|
with httpx.Client(base_url=base_url, timeout=30.0) as client:
|
|
response = client.get(path, params=params)
|
|
response.raise_for_status()
|
|
return response.json()
|
|
|
|
|
|
def get_last_tradeday(date: str) -> str:
|
|
"""
|
|
Get the previous trading day for the specified date
|
|
|
|
Args:
|
|
date: Date string (YYYY-MM-DD)
|
|
|
|
Returns:
|
|
Previous trading day date string (YYYY-MM-DD)
|
|
"""
|
|
current_date = datetime.datetime.strptime(date, "%Y-%m-%d")
|
|
_NYSE_CALENDAR = mcal.get_calendar("NYSE")
|
|
|
|
if _NYSE_CALENDAR is not None:
|
|
# Get trading days before current date
|
|
# Go back 90 days from current date to get all trading days
|
|
start_search = current_date - datetime.timedelta(days=90)
|
|
|
|
if hasattr(_NYSE_CALENDAR, "valid_days"):
|
|
# pandas_market_calendars
|
|
trading_dates = _NYSE_CALENDAR.valid_days(
|
|
start_date=start_search.strftime("%Y-%m-%d"),
|
|
end_date=current_date.strftime("%Y-%m-%d"),
|
|
)
|
|
else:
|
|
# exchange_calendars
|
|
trading_dates = _NYSE_CALENDAR.sessions_in_range(
|
|
start_search.strftime("%Y-%m-%d"),
|
|
current_date.strftime("%Y-%m-%d"),
|
|
)
|
|
|
|
# Convert to date list
|
|
trading_dates_list = [
|
|
pd.Timestamp(d).strftime("%Y-%m-%d") for d in trading_dates
|
|
]
|
|
|
|
# Find current date position in the list
|
|
if date in trading_dates_list:
|
|
# If current date is a trading day, return previous trading day
|
|
idx = trading_dates_list.index(date)
|
|
if idx > 0:
|
|
return trading_dates_list[idx - 1]
|
|
else:
|
|
# If it's the first trading day, go back further
|
|
prev_date = current_date - datetime.timedelta(days=1)
|
|
return get_last_tradeday(prev_date.strftime("%Y-%m-%d"))
|
|
else:
|
|
# If current date is not a trading day, return the nearest trading day
|
|
if trading_dates_list:
|
|
return trading_dates_list[-1]
|
|
|
|
return prev_date.strftime("%Y-%m-%d")
|
|
|
|
|
|
def get_prices(
|
|
ticker: str,
|
|
start_date: str,
|
|
end_date: str,
|
|
) -> list[Price]:
|
|
"""
|
|
Fetch price data from cache or API.
|
|
|
|
Uses centralized data source configuration (FINNHUB_API_KEY prioritized).
|
|
|
|
Args:
|
|
ticker: Stock ticker symbol
|
|
start_date: Start date (YYYY-MM-DD)
|
|
end_date: End date (YYYY-MM-DD)
|
|
|
|
Returns:
|
|
list[Price]: List of Price objects
|
|
"""
|
|
ticker = normalize_symbol(ticker)
|
|
if not ticker:
|
|
return []
|
|
cached_sources = _router.price_sources()
|
|
for source in cached_sources:
|
|
cache_key = f"{ticker}_{start_date}_{end_date}_{source}"
|
|
if cached_data := _cache.get_prices(cache_key):
|
|
return [Price(**price) for price in cached_data]
|
|
|
|
service_url = _trading_service_url()
|
|
if service_url:
|
|
try:
|
|
payload = _service_get_json(
|
|
service_url,
|
|
"/api/prices",
|
|
params={
|
|
"ticker": ticker,
|
|
"start_date": start_date,
|
|
"end_date": end_date,
|
|
},
|
|
)
|
|
prices = [Price(**price) for price in payload.get("prices", [])]
|
|
if prices:
|
|
return prices
|
|
except Exception as exc:
|
|
logger.info("Trading service price lookup failed for %s: %s", ticker, exc)
|
|
|
|
try:
|
|
prices, data_source = _router.get_prices(ticker, start_date, end_date)
|
|
except Exception as exc:
|
|
logger.info("Price lookup failed for %s: %s", ticker, exc)
|
|
return []
|
|
|
|
if not prices:
|
|
return []
|
|
|
|
cache_key = f"{ticker}_{start_date}_{end_date}_{data_source}"
|
|
_cache.set_prices(cache_key, [p.model_dump() for p in prices])
|
|
return prices
|
|
|
|
|
|
def get_financial_metrics(
|
|
ticker: str,
|
|
end_date: str,
|
|
period: str = "ttm",
|
|
limit: int = 10,
|
|
) -> list[FinancialMetrics]:
|
|
"""
|
|
Fetch financial metrics from cache or API.
|
|
|
|
Uses centralized data source configuration (FINNHUB_API_KEY prioritized).
|
|
|
|
Args:
|
|
ticker: Stock ticker symbol
|
|
end_date: End date (YYYY-MM-DD)
|
|
period: Period type (default: "ttm")
|
|
limit: Number of records to fetch
|
|
|
|
Returns:
|
|
list[FinancialMetrics]: List of financial metrics
|
|
"""
|
|
ticker = normalize_symbol(ticker)
|
|
if not ticker:
|
|
return []
|
|
for source in _router.api_sources():
|
|
cache_key = f"{ticker}_{period}_{end_date}_{limit}_{source}"
|
|
if cached_data := _cache.get_financial_metrics(cache_key):
|
|
return [FinancialMetrics(**metric) for metric in cached_data]
|
|
|
|
service_url = _trading_service_url()
|
|
if service_url:
|
|
try:
|
|
payload = _service_get_json(
|
|
service_url,
|
|
"/api/financials",
|
|
params={
|
|
"ticker": ticker,
|
|
"end_date": end_date,
|
|
"period": period,
|
|
"limit": limit,
|
|
},
|
|
)
|
|
metrics = [
|
|
FinancialMetrics(**metric)
|
|
for metric in payload.get("financial_metrics", [])
|
|
]
|
|
if metrics:
|
|
return metrics
|
|
except Exception as exc:
|
|
logger.info("Trading service financial lookup failed for %s: %s", ticker, exc)
|
|
|
|
try:
|
|
financial_metrics, data_source = _router.get_financial_metrics(
|
|
ticker=ticker,
|
|
end_date=end_date,
|
|
period=period,
|
|
limit=limit,
|
|
)
|
|
except Exception as exc:
|
|
logger.info("Financial metrics lookup failed for %s: %s", ticker, exc)
|
|
return []
|
|
|
|
if not financial_metrics:
|
|
return []
|
|
|
|
cache_key = f"{ticker}_{period}_{end_date}_{limit}_{data_source}"
|
|
_cache.set_financial_metrics(
|
|
cache_key,
|
|
[m.model_dump() for m in financial_metrics],
|
|
)
|
|
return financial_metrics
|
|
|
|
def search_line_items(
|
|
ticker: str,
|
|
line_items: list[str],
|
|
end_date: str,
|
|
period: str = "ttm",
|
|
limit: int = 10,
|
|
) -> list[LineItem]:
|
|
"""
|
|
Fetch line items from Financial Datasets API (only supported source).
|
|
|
|
Returns empty list on API errors to allow graceful degradation.
|
|
"""
|
|
try:
|
|
ticker = normalize_symbol(ticker)
|
|
if not ticker:
|
|
return []
|
|
|
|
service_url = _trading_service_url()
|
|
if service_url:
|
|
payload = _service_get_json(
|
|
service_url,
|
|
"/api/line-items",
|
|
params={
|
|
"ticker": ticker,
|
|
"line_items": line_items,
|
|
"end_date": end_date,
|
|
"period": period,
|
|
"limit": limit,
|
|
},
|
|
)
|
|
return [LineItem(**item) for item in payload.get("search_results", [])]
|
|
|
|
return _router.search_line_items(
|
|
ticker=ticker,
|
|
line_items=line_items,
|
|
end_date=end_date,
|
|
period=period,
|
|
limit=limit,
|
|
)
|
|
except Exception as e:
|
|
logger.info(
|
|
f"Warning: Exception while fetching line items for {ticker}: {str(e)}",
|
|
)
|
|
return []
|
|
|
|
def get_insider_trades(
|
|
ticker: str,
|
|
end_date: str,
|
|
start_date: str | None = None,
|
|
limit: int = 1000,
|
|
) -> list[InsiderTrade]:
|
|
"""Fetch insider trades from cache or API."""
|
|
ticker = normalize_symbol(ticker)
|
|
if not ticker:
|
|
return []
|
|
for source in _router.api_sources():
|
|
cache_key = (
|
|
f"{ticker}_{start_date or 'none'}_{end_date}_{limit}_{source}"
|
|
)
|
|
if cached_data := _cache.get_insider_trades(cache_key):
|
|
return [InsiderTrade(**trade) for trade in cached_data]
|
|
|
|
service_url = _trading_service_url()
|
|
if service_url:
|
|
try:
|
|
params = {"ticker": ticker, "end_date": end_date, "limit": limit}
|
|
if start_date:
|
|
params["start_date"] = start_date
|
|
payload = _service_get_json(
|
|
service_url,
|
|
"/api/insider-trades",
|
|
params=params,
|
|
)
|
|
trades = [
|
|
InsiderTrade(**trade)
|
|
for trade in payload.get("insider_trades", [])
|
|
]
|
|
if trades:
|
|
return trades
|
|
except Exception as exc:
|
|
logger.info("Trading service insider lookup failed for %s: %s", ticker, exc)
|
|
|
|
try:
|
|
all_trades, data_source = _router.get_insider_trades(
|
|
ticker=ticker,
|
|
end_date=end_date,
|
|
start_date=start_date,
|
|
limit=limit,
|
|
)
|
|
except Exception as exc:
|
|
logger.info("Insider trades lookup failed for %s: %s", ticker, exc)
|
|
return []
|
|
|
|
if not all_trades:
|
|
return []
|
|
|
|
cache_key = f"{ticker}_{start_date or 'none'}_{end_date}_{limit}_{data_source}"
|
|
_cache.set_insider_trades(cache_key, [trade.model_dump() for trade in all_trades])
|
|
return all_trades
|
|
|
|
def get_company_news(
|
|
ticker: str,
|
|
end_date: str,
|
|
start_date: str | None = None,
|
|
limit: int = 1000,
|
|
) -> list[CompanyNews]:
|
|
"""Fetch company news from cache or API."""
|
|
ticker = normalize_symbol(ticker)
|
|
if not ticker:
|
|
return []
|
|
for source in _router.api_sources():
|
|
cache_key = (
|
|
f"{ticker}_{start_date or 'none'}_{end_date}_{limit}_{source}"
|
|
)
|
|
if cached_data := _cache.get_company_news(cache_key):
|
|
return [CompanyNews(**news) for news in cached_data]
|
|
|
|
trading_service_url = _trading_service_url()
|
|
if trading_service_url:
|
|
try:
|
|
params = {"ticker": ticker, "end_date": end_date, "limit": limit}
|
|
if start_date:
|
|
params["start_date"] = start_date
|
|
payload = _service_get_json(
|
|
trading_service_url,
|
|
"/api/news",
|
|
params=params,
|
|
)
|
|
news = [CompanyNews(**item) for item in payload.get("news", [])]
|
|
if news:
|
|
return news
|
|
except Exception as exc:
|
|
logger.info("Trading service news lookup failed for %s: %s", ticker, exc)
|
|
|
|
news_service_url = _news_service_url()
|
|
if news_service_url:
|
|
try:
|
|
params = {"ticker": ticker, "end_date": end_date, "limit": limit}
|
|
if start_date:
|
|
params["start_date"] = start_date
|
|
payload = _service_get_json(
|
|
news_service_url,
|
|
"/api/enriched-news",
|
|
params=params,
|
|
)
|
|
news = [CompanyNews(**item) for item in payload.get("news", [])]
|
|
if news:
|
|
return news
|
|
except Exception as exc:
|
|
logger.info("News service lookup failed for %s: %s", ticker, exc)
|
|
|
|
try:
|
|
all_news, data_source = _router.get_company_news(
|
|
ticker=ticker,
|
|
end_date=end_date,
|
|
start_date=start_date,
|
|
limit=limit,
|
|
)
|
|
except Exception as exc:
|
|
logger.info("Company news lookup failed for %s: %s", ticker, exc)
|
|
return []
|
|
|
|
if not all_news:
|
|
return []
|
|
|
|
cache_key = f"{ticker}_{start_date or 'none'}_{end_date}_{limit}_{data_source}"
|
|
_cache.set_company_news(cache_key, [news.model_dump() for news in all_news])
|
|
return all_news
|
|
|
|
def get_market_cap(ticker: str, end_date: str) -> float | None:
|
|
"""Fetch market cap from the API. Finnhub values are converted from millions."""
|
|
ticker = normalize_symbol(ticker)
|
|
if not ticker:
|
|
return None
|
|
|
|
service_url = _trading_service_url()
|
|
if service_url:
|
|
try:
|
|
payload = _service_get_json(
|
|
service_url,
|
|
"/api/market-cap",
|
|
params={"ticker": ticker, "end_date": end_date},
|
|
)
|
|
value = payload.get("market_cap")
|
|
return float(value) if value is not None else None
|
|
except Exception as exc:
|
|
logger.info("Trading service market-cap lookup failed for %s: %s", ticker, exc)
|
|
|
|
def _metrics_lookup(symbol: str, date: str):
|
|
for source in _router.api_sources():
|
|
cache_key = f"{symbol}_ttm_{date}_10_{source}"
|
|
if cached_data := _cache.get_financial_metrics(cache_key):
|
|
return [FinancialMetrics(**metric) for metric in cached_data], source
|
|
return _router.get_financial_metrics(
|
|
ticker=symbol,
|
|
end_date=date,
|
|
period="ttm",
|
|
limit=10,
|
|
)
|
|
|
|
try:
|
|
market_cap, _ = _router.get_market_cap(
|
|
ticker=ticker,
|
|
end_date=end_date,
|
|
metrics_lookup=_metrics_lookup,
|
|
)
|
|
except Exception as exc:
|
|
logger.info("Market cap lookup failed for %s: %s", ticker, exc)
|
|
return None
|
|
return market_cap
|
|
|
|
|
|
def prices_to_df(prices: list[Price]) -> pd.DataFrame:
|
|
"""Convert prices to a DataFrame."""
|
|
df = pd.DataFrame([p.model_dump() for p in prices])
|
|
df["Date"] = pd.to_datetime(df["time"])
|
|
df.set_index("Date", inplace=True)
|
|
numeric_cols = ["open", "close", "high", "low", "volume"]
|
|
for col in numeric_cols:
|
|
df[col] = pd.to_numeric(df[col], errors="coerce")
|
|
df.sort_index(inplace=True)
|
|
return df
|