Files
evotraders/backend/tools/data_tools.py
2026-03-30 17:46:44 +08:00

460 lines
14 KiB
Python

# -*- coding: utf-8 -*-
# flake8: noqa: E501
# pylint: disable=C0301
"""Data fetching tools backed by the unified provider router."""
import datetime
import os
import httpx
import pandas as pd
import pandas_market_calendars as mcal
from backend.data.provider_utils import normalize_symbol
from backend.data.cache import get_cache
from backend.data.provider_router import get_provider_router
from shared.schema import (
CompanyNews,
FinancialMetrics,
InsiderTrade,
LineItem,
Price,
)
from backend.utils.settlement import logger
# Global cache instance
_cache = get_cache()
_router = get_provider_router()
def _service_name() -> str:
return str(os.getenv("SERVICE_NAME", "")).strip().lower()
def _trading_service_url() -> str | None:
value = str(os.getenv("TRADING_SERVICE_URL", "")).strip().rstrip("/")
if not value or _service_name() == "trading_service":
return None
return value
def _news_service_url() -> str | None:
value = str(os.getenv("NEWS_SERVICE_URL", "")).strip().rstrip("/")
if not value or _service_name() == "news_service":
return None
return value
def _service_get_json(base_url: str, path: str, *, params: dict[str, object]) -> dict:
with httpx.Client(base_url=base_url, timeout=30.0) as client:
response = client.get(path, params=params)
response.raise_for_status()
return response.json()
def get_last_tradeday(date: str) -> str:
"""
Get the previous trading day for the specified date
Args:
date: Date string (YYYY-MM-DD)
Returns:
Previous trading day date string (YYYY-MM-DD)
"""
current_date = datetime.datetime.strptime(date, "%Y-%m-%d")
_NYSE_CALENDAR = mcal.get_calendar("NYSE")
if _NYSE_CALENDAR is not None:
# Get trading days before current date
# Go back 90 days from current date to get all trading days
start_search = current_date - datetime.timedelta(days=90)
if hasattr(_NYSE_CALENDAR, "valid_days"):
# pandas_market_calendars
trading_dates = _NYSE_CALENDAR.valid_days(
start_date=start_search.strftime("%Y-%m-%d"),
end_date=current_date.strftime("%Y-%m-%d"),
)
else:
# exchange_calendars
trading_dates = _NYSE_CALENDAR.sessions_in_range(
start_search.strftime("%Y-%m-%d"),
current_date.strftime("%Y-%m-%d"),
)
# Convert to date list
trading_dates_list = [
pd.Timestamp(d).strftime("%Y-%m-%d") for d in trading_dates
]
# Find current date position in the list
if date in trading_dates_list:
# If current date is a trading day, return previous trading day
idx = trading_dates_list.index(date)
if idx > 0:
return trading_dates_list[idx - 1]
else:
# If it's the first trading day, go back further
prev_date = current_date - datetime.timedelta(days=1)
return get_last_tradeday(prev_date.strftime("%Y-%m-%d"))
else:
# If current date is not a trading day, return the nearest trading day
if trading_dates_list:
return trading_dates_list[-1]
return prev_date.strftime("%Y-%m-%d")
def get_prices(
ticker: str,
start_date: str,
end_date: str,
) -> list[Price]:
"""
Fetch price data from cache or API.
Uses centralized data source configuration (FINNHUB_API_KEY prioritized).
Args:
ticker: Stock ticker symbol
start_date: Start date (YYYY-MM-DD)
end_date: End date (YYYY-MM-DD)
Returns:
list[Price]: List of Price objects
"""
ticker = normalize_symbol(ticker)
if not ticker:
return []
cached_sources = _router.price_sources()
for source in cached_sources:
cache_key = f"{ticker}_{start_date}_{end_date}_{source}"
if cached_data := _cache.get_prices(cache_key):
return [Price(**price) for price in cached_data]
service_url = _trading_service_url()
if service_url:
try:
payload = _service_get_json(
service_url,
"/api/prices",
params={
"ticker": ticker,
"start_date": start_date,
"end_date": end_date,
},
)
prices = [Price(**price) for price in payload.get("prices", [])]
if prices:
return prices
except Exception as exc:
logger.info("Trading service price lookup failed for %s: %s", ticker, exc)
try:
prices, data_source = _router.get_prices(ticker, start_date, end_date)
except Exception as exc:
logger.info("Price lookup failed for %s: %s", ticker, exc)
return []
if not prices:
return []
cache_key = f"{ticker}_{start_date}_{end_date}_{data_source}"
_cache.set_prices(cache_key, [p.model_dump() for p in prices])
return prices
def get_financial_metrics(
ticker: str,
end_date: str,
period: str = "ttm",
limit: int = 10,
) -> list[FinancialMetrics]:
"""
Fetch financial metrics from cache or API.
Uses centralized data source configuration (FINNHUB_API_KEY prioritized).
Args:
ticker: Stock ticker symbol
end_date: End date (YYYY-MM-DD)
period: Period type (default: "ttm")
limit: Number of records to fetch
Returns:
list[FinancialMetrics]: List of financial metrics
"""
ticker = normalize_symbol(ticker)
if not ticker:
return []
for source in _router.api_sources():
cache_key = f"{ticker}_{period}_{end_date}_{limit}_{source}"
if cached_data := _cache.get_financial_metrics(cache_key):
return [FinancialMetrics(**metric) for metric in cached_data]
service_url = _trading_service_url()
if service_url:
try:
payload = _service_get_json(
service_url,
"/api/financials",
params={
"ticker": ticker,
"end_date": end_date,
"period": period,
"limit": limit,
},
)
metrics = [
FinancialMetrics(**metric)
for metric in payload.get("financial_metrics", [])
]
if metrics:
return metrics
except Exception as exc:
logger.info("Trading service financial lookup failed for %s: %s", ticker, exc)
try:
financial_metrics, data_source = _router.get_financial_metrics(
ticker=ticker,
end_date=end_date,
period=period,
limit=limit,
)
except Exception as exc:
logger.info("Financial metrics lookup failed for %s: %s", ticker, exc)
return []
if not financial_metrics:
return []
cache_key = f"{ticker}_{period}_{end_date}_{limit}_{data_source}"
_cache.set_financial_metrics(
cache_key,
[m.model_dump() for m in financial_metrics],
)
return financial_metrics
def search_line_items(
ticker: str,
line_items: list[str],
end_date: str,
period: str = "ttm",
limit: int = 10,
) -> list[LineItem]:
"""
Fetch line items from Financial Datasets API (only supported source).
Returns empty list on API errors to allow graceful degradation.
"""
try:
ticker = normalize_symbol(ticker)
if not ticker:
return []
service_url = _trading_service_url()
if service_url:
payload = _service_get_json(
service_url,
"/api/line-items",
params={
"ticker": ticker,
"line_items": line_items,
"end_date": end_date,
"period": period,
"limit": limit,
},
)
return [LineItem(**item) for item in payload.get("search_results", [])]
return _router.search_line_items(
ticker=ticker,
line_items=line_items,
end_date=end_date,
period=period,
limit=limit,
)
except Exception as e:
logger.info(
f"Warning: Exception while fetching line items for {ticker}: {str(e)}",
)
return []
def get_insider_trades(
ticker: str,
end_date: str,
start_date: str | None = None,
limit: int = 1000,
) -> list[InsiderTrade]:
"""Fetch insider trades from cache or API."""
ticker = normalize_symbol(ticker)
if not ticker:
return []
for source in _router.api_sources():
cache_key = (
f"{ticker}_{start_date or 'none'}_{end_date}_{limit}_{source}"
)
if cached_data := _cache.get_insider_trades(cache_key):
return [InsiderTrade(**trade) for trade in cached_data]
service_url = _trading_service_url()
if service_url:
try:
params = {"ticker": ticker, "end_date": end_date, "limit": limit}
if start_date:
params["start_date"] = start_date
payload = _service_get_json(
service_url,
"/api/insider-trades",
params=params,
)
trades = [
InsiderTrade(**trade)
for trade in payload.get("insider_trades", [])
]
if trades:
return trades
except Exception as exc:
logger.info("Trading service insider lookup failed for %s: %s", ticker, exc)
try:
all_trades, data_source = _router.get_insider_trades(
ticker=ticker,
end_date=end_date,
start_date=start_date,
limit=limit,
)
except Exception as exc:
logger.info("Insider trades lookup failed for %s: %s", ticker, exc)
return []
if not all_trades:
return []
cache_key = f"{ticker}_{start_date or 'none'}_{end_date}_{limit}_{data_source}"
_cache.set_insider_trades(cache_key, [trade.model_dump() for trade in all_trades])
return all_trades
def get_company_news(
ticker: str,
end_date: str,
start_date: str | None = None,
limit: int = 1000,
) -> list[CompanyNews]:
"""Fetch company news from cache or API."""
ticker = normalize_symbol(ticker)
if not ticker:
return []
for source in _router.api_sources():
cache_key = (
f"{ticker}_{start_date or 'none'}_{end_date}_{limit}_{source}"
)
if cached_data := _cache.get_company_news(cache_key):
return [CompanyNews(**news) for news in cached_data]
trading_service_url = _trading_service_url()
if trading_service_url:
try:
params = {"ticker": ticker, "end_date": end_date, "limit": limit}
if start_date:
params["start_date"] = start_date
payload = _service_get_json(
trading_service_url,
"/api/news",
params=params,
)
news = [CompanyNews(**item) for item in payload.get("news", [])]
if news:
return news
except Exception as exc:
logger.info("Trading service news lookup failed for %s: %s", ticker, exc)
news_service_url = _news_service_url()
if news_service_url:
try:
params = {"ticker": ticker, "end_date": end_date, "limit": limit}
if start_date:
params["start_date"] = start_date
payload = _service_get_json(
news_service_url,
"/api/enriched-news",
params=params,
)
news = [CompanyNews(**item) for item in payload.get("news", [])]
if news:
return news
except Exception as exc:
logger.info("News service lookup failed for %s: %s", ticker, exc)
try:
all_news, data_source = _router.get_company_news(
ticker=ticker,
end_date=end_date,
start_date=start_date,
limit=limit,
)
except Exception as exc:
logger.info("Company news lookup failed for %s: %s", ticker, exc)
return []
if not all_news:
return []
cache_key = f"{ticker}_{start_date or 'none'}_{end_date}_{limit}_{data_source}"
_cache.set_company_news(cache_key, [news.model_dump() for news in all_news])
return all_news
def get_market_cap(ticker: str, end_date: str) -> float | None:
"""Fetch market cap from the API. Finnhub values are converted from millions."""
ticker = normalize_symbol(ticker)
if not ticker:
return None
service_url = _trading_service_url()
if service_url:
try:
payload = _service_get_json(
service_url,
"/api/market-cap",
params={"ticker": ticker, "end_date": end_date},
)
value = payload.get("market_cap")
return float(value) if value is not None else None
except Exception as exc:
logger.info("Trading service market-cap lookup failed for %s: %s", ticker, exc)
def _metrics_lookup(symbol: str, date: str):
for source in _router.api_sources():
cache_key = f"{symbol}_ttm_{date}_10_{source}"
if cached_data := _cache.get_financial_metrics(cache_key):
return [FinancialMetrics(**metric) for metric in cached_data], source
return _router.get_financial_metrics(
ticker=symbol,
end_date=date,
period="ttm",
limit=10,
)
try:
market_cap, _ = _router.get_market_cap(
ticker=ticker,
end_date=end_date,
metrics_lookup=_metrics_lookup,
)
except Exception as exc:
logger.info("Market cap lookup failed for %s: %s", ticker, exc)
return None
return market_cap
def prices_to_df(prices: list[Price]) -> pd.DataFrame:
"""Convert prices to a DataFrame."""
df = pd.DataFrame([p.model_dump() for p in prices])
df["Date"] = pd.to_datetime(df["time"])
df.set_index("Date", inplace=True)
numeric_cols = ["open", "close", "high", "low", "volume"]
for col in numeric_cols:
df[col] = pd.to_numeric(df[col], errors="coerce")
df.sort_index(inplace=True)
return df