Initial commit of integrated agent system
This commit is contained in:
5
backend/data/__init__.py
Normal file
5
backend/data/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from backend.data.historical_price_manager import HistoricalPriceManager
|
||||
from backend.data.polling_price_manager import PollingPriceManager
|
||||
|
||||
__all__ = ["PollingPriceManager", "HistoricalPriceManager"]
|
||||
107
backend/data/cache.py
Normal file
107
backend/data/cache.py
Normal file
@@ -0,0 +1,107 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from typing_extensions import Any
|
||||
|
||||
|
||||
class Cache:
|
||||
"""In-memory cache for API responses."""
|
||||
|
||||
def __init__(self):
|
||||
self._prices_cache = {}
|
||||
self._financial_metrics_cache = {}
|
||||
self._line_items_cache = {}
|
||||
self._insider_trades_cache = {}
|
||||
self._company_news_cache = {}
|
||||
|
||||
def _merge_data(
|
||||
self,
|
||||
existing: list[dict] | None,
|
||||
new_data: list[dict],
|
||||
key_field: str,
|
||||
) -> list[dict]:
|
||||
"""Merge existing and new data"""
|
||||
if not existing:
|
||||
return new_data
|
||||
|
||||
# Create a set of existing keys for O(1) lookup
|
||||
existing_keys = {item[key_field] for item in existing}
|
||||
|
||||
# Only add items that don't exist yet
|
||||
merged = existing.copy()
|
||||
merged.extend(
|
||||
[
|
||||
item
|
||||
for item in new_data
|
||||
if item[key_field] not in existing_keys
|
||||
],
|
||||
)
|
||||
return merged
|
||||
|
||||
def get_prices(self, ticker: str) -> list[dict[str, Any]] | None:
|
||||
"""Get cached price data if available."""
|
||||
return self._prices_cache.get(ticker)
|
||||
|
||||
def set_prices(self, ticker: str, data: list[dict[str, Any]]):
|
||||
"""Append new price data to cache."""
|
||||
self._prices_cache[ticker] = self._merge_data(
|
||||
self._prices_cache.get(ticker),
|
||||
data,
|
||||
key_field="time",
|
||||
)
|
||||
|
||||
def get_financial_metrics(self, ticker: str) -> list[dict[str, Any]]:
|
||||
"""Get cached financial metrics if available."""
|
||||
return self._financial_metrics_cache.get(ticker)
|
||||
|
||||
def set_financial_metrics(self, ticker: str, data: list[dict[str, Any]]):
|
||||
"""Append new financial metrics to cache."""
|
||||
self._financial_metrics_cache[ticker] = self._merge_data(
|
||||
self._financial_metrics_cache.get(ticker),
|
||||
data,
|
||||
key_field="report_period",
|
||||
)
|
||||
|
||||
def get_line_items(self, ticker: str) -> list[dict[str, Any]] | None:
|
||||
"""Get cached line items if available."""
|
||||
return self._line_items_cache.get(ticker)
|
||||
|
||||
def set_line_items(self, ticker: str, data: list[dict[str, Any]]):
|
||||
"""Append new line items to cache."""
|
||||
self._line_items_cache[ticker] = self._merge_data(
|
||||
self._line_items_cache.get(ticker),
|
||||
data,
|
||||
key_field="report_period",
|
||||
)
|
||||
|
||||
def get_insider_trades(self, ticker: str) -> list[dict[str, Any]] | None:
|
||||
"""Get cached insider trades if available."""
|
||||
return self._insider_trades_cache.get(ticker)
|
||||
|
||||
def set_insider_trades(self, ticker: str, data: list[dict[str, Any]]):
|
||||
"""Append new insider trades to cache."""
|
||||
self._insider_trades_cache[ticker] = self._merge_data(
|
||||
self._insider_trades_cache.get(ticker),
|
||||
data,
|
||||
key_field="filing_date",
|
||||
) # Could also use transaction_date if preferred
|
||||
|
||||
def get_company_news(self, ticker: str) -> list[dict[str, Any]] | None:
|
||||
"""Get cached company news if available."""
|
||||
return self._company_news_cache.get(ticker)
|
||||
|
||||
def set_company_news(self, ticker: str, data: list[dict[str, Any]]):
|
||||
"""Append new company news to cache."""
|
||||
self._company_news_cache[ticker] = self._merge_data(
|
||||
self._company_news_cache.get(ticker),
|
||||
data,
|
||||
key_field="date",
|
||||
)
|
||||
|
||||
|
||||
# Global cache instance
|
||||
_cache = Cache()
|
||||
|
||||
|
||||
def get_cache() -> Cache:
|
||||
"""Get the global cache instance."""
|
||||
return _cache
|
||||
253
backend/data/historical_price_manager.py
Normal file
253
backend/data/historical_price_manager.py
Normal file
@@ -0,0 +1,253 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Historical Price Manager for backtest mode
|
||||
"""
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import Callable, Dict, List, Optional
|
||||
|
||||
import pandas as pd
|
||||
from backend.data.market_store import MarketStore
|
||||
from backend.data.provider_utils import normalize_symbol
|
||||
from backend.data.provider_router import get_provider_router
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class HistoricalPriceManager:
|
||||
"""Provides historical prices for backtest mode"""
|
||||
|
||||
def __init__(self):
|
||||
self.subscribed_symbols = []
|
||||
self.price_callbacks = []
|
||||
self._price_cache = {}
|
||||
self._current_date = None
|
||||
self.latest_prices = {}
|
||||
self.open_prices = {}
|
||||
self.close_prices = {}
|
||||
self.running = False
|
||||
self._router = get_provider_router()
|
||||
self._market_store = MarketStore()
|
||||
|
||||
def subscribe(
|
||||
self,
|
||||
symbols: List[str],
|
||||
):
|
||||
"""Subscribe to symbols"""
|
||||
for symbol in symbols:
|
||||
symbol = normalize_symbol(symbol)
|
||||
if symbol not in self.subscribed_symbols:
|
||||
self.subscribed_symbols.append(symbol)
|
||||
|
||||
def unsubscribe(self, symbols: List[str]):
|
||||
"""Unsubscribe from symbols"""
|
||||
for symbol in symbols:
|
||||
symbol = normalize_symbol(symbol)
|
||||
if symbol in self.subscribed_symbols:
|
||||
self.subscribed_symbols.remove(symbol)
|
||||
self._price_cache.pop(symbol, None)
|
||||
|
||||
def add_price_callback(self, callback: Callable):
|
||||
"""Add price update callback"""
|
||||
self.price_callbacks.append(callback)
|
||||
|
||||
def _load_from_csv(self, symbol: str) -> Optional[pd.DataFrame]:
|
||||
"""Load price data from local CSV file."""
|
||||
try:
|
||||
df = self._router.load_local_price_frame(symbol)
|
||||
return df if not df.empty else None
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to load CSV for {symbol}: {e}")
|
||||
return None
|
||||
|
||||
def _load_from_market_db(
|
||||
self,
|
||||
symbol: str,
|
||||
start_date: str,
|
||||
end_date: str,
|
||||
) -> Optional[pd.DataFrame]:
|
||||
"""Load price data from the long-lived market research database."""
|
||||
try:
|
||||
rows = self._market_store.get_ohlc(symbol, start_date, end_date)
|
||||
if not rows:
|
||||
return None
|
||||
df = pd.DataFrame(rows)
|
||||
if df.empty or "date" not in df.columns:
|
||||
return None
|
||||
df["Date"] = pd.to_datetime(df["date"])
|
||||
df.set_index("Date", inplace=True)
|
||||
df.sort_index(inplace=True)
|
||||
return df
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to load market DB data for {symbol}: {e}")
|
||||
return None
|
||||
|
||||
def preload_data(self, start_date: str, end_date: str):
|
||||
"""Preload historical data from market DB first, then local CSV."""
|
||||
logger.info(f"Preloading data: {start_date} to {end_date}")
|
||||
|
||||
for symbol in self.subscribed_symbols:
|
||||
if symbol in self._price_cache:
|
||||
continue
|
||||
|
||||
df = self._load_from_market_db(symbol, start_date, end_date)
|
||||
if df is not None and not df.empty:
|
||||
self._price_cache[symbol] = df
|
||||
logger.info(f"Loaded {symbol} from market DB: {len(df)} records")
|
||||
continue
|
||||
|
||||
df = self._load_from_csv(symbol)
|
||||
if df is not None and not df.empty:
|
||||
self._price_cache[symbol] = df
|
||||
logger.info(f"Loaded {symbol} from CSV: {len(df)} records")
|
||||
else:
|
||||
logger.warning(f"No market DB or CSV data for {symbol}")
|
||||
|
||||
def set_date(self, date: str):
|
||||
"""Set current trading date and update prices"""
|
||||
self._current_date = date
|
||||
date_dt = pd.Timestamp(date)
|
||||
|
||||
for symbol in self.subscribed_symbols:
|
||||
df = self._price_cache.get(symbol)
|
||||
if df is None or df.empty:
|
||||
# Keep previous prices if no data available
|
||||
logger.warning(f"No cached data for {symbol} on {date}")
|
||||
continue
|
||||
|
||||
# Find exact date or closest earlier date
|
||||
if date_dt in df.index:
|
||||
row = df.loc[date_dt]
|
||||
else:
|
||||
valid_dates = df.index[df.index <= date_dt]
|
||||
if len(valid_dates) == 0:
|
||||
logger.warning(f"No data for {symbol} on or before {date}")
|
||||
continue
|
||||
row = df.loc[valid_dates[-1]]
|
||||
|
||||
open_price = float(row["open"])
|
||||
close_price = float(row["close"])
|
||||
|
||||
self.open_prices[symbol] = open_price
|
||||
self.close_prices[symbol] = close_price
|
||||
self.latest_prices[symbol] = open_price
|
||||
|
||||
logger.debug(
|
||||
f"{symbol} @ {date}: open={open_price:.2f}, close={close_price:.2f}", # noqa: E501
|
||||
)
|
||||
|
||||
def emit_open_prices(self):
|
||||
"""Emit open prices to callbacks"""
|
||||
if not self._current_date:
|
||||
return
|
||||
|
||||
timestamp = int(
|
||||
datetime.strptime(self._current_date, "%Y-%m-%d").timestamp()
|
||||
* 1000,
|
||||
)
|
||||
|
||||
for symbol in self.subscribed_symbols:
|
||||
price = self.open_prices.get(symbol)
|
||||
if price is None or price <= 0:
|
||||
logger.warning(f"Invalid open price for {symbol}: {price}")
|
||||
continue
|
||||
|
||||
self.latest_prices[symbol] = price
|
||||
self._emit_price(symbol, price, timestamp)
|
||||
|
||||
def emit_close_prices(self):
|
||||
"""Emit close prices to callbacks"""
|
||||
if not self._current_date:
|
||||
return
|
||||
|
||||
timestamp = int(
|
||||
datetime.strptime(self._current_date, "%Y-%m-%d").timestamp()
|
||||
* 1000,
|
||||
)
|
||||
timestamp += 23400000 # Add 6.5 hours
|
||||
|
||||
for symbol in self.subscribed_symbols:
|
||||
price = self.close_prices.get(symbol)
|
||||
if price is None or price <= 0:
|
||||
logger.warning(f"Invalid close price for {symbol}: {price}")
|
||||
continue
|
||||
|
||||
self.latest_prices[symbol] = price
|
||||
self._emit_price(symbol, price, timestamp)
|
||||
|
||||
def _emit_price(self, symbol: str, price: float, timestamp: int):
|
||||
"""Emit single price to callbacks"""
|
||||
open_price = self.open_prices.get(symbol, price)
|
||||
close_price = self.close_prices.get(symbol, price)
|
||||
ret = (
|
||||
((price - open_price) / open_price) * 100 if open_price > 0 else 0
|
||||
)
|
||||
|
||||
price_data = {
|
||||
"symbol": symbol,
|
||||
"price": price,
|
||||
"timestamp": timestamp,
|
||||
"open": open_price,
|
||||
"close": close_price,
|
||||
"high": max(open_price, close_price),
|
||||
"low": min(open_price, close_price),
|
||||
"ret": ret,
|
||||
}
|
||||
|
||||
for callback in self.price_callbacks:
|
||||
try:
|
||||
callback(price_data)
|
||||
except Exception as e:
|
||||
logger.error(f"Callback error for {symbol}: {e}")
|
||||
|
||||
def get_price_for_date(
|
||||
self,
|
||||
symbol: str,
|
||||
date: str,
|
||||
price_type: str = "close",
|
||||
) -> Optional[float]:
|
||||
"""Get price for a specific date"""
|
||||
df = self._price_cache.get(symbol)
|
||||
if df is None or df.empty:
|
||||
return self.latest_prices.get(symbol)
|
||||
|
||||
date_dt = pd.Timestamp(date)
|
||||
if date_dt in df.index:
|
||||
return float(df.loc[date_dt, price_type])
|
||||
|
||||
valid_dates = df.index[df.index <= date_dt]
|
||||
if len(valid_dates) == 0:
|
||||
return self.latest_prices.get(symbol)
|
||||
return float(df.loc[valid_dates[-1], price_type])
|
||||
|
||||
def start(self):
|
||||
"""Start manager"""
|
||||
self.running = True
|
||||
|
||||
def stop(self):
|
||||
"""Stop manager"""
|
||||
self.running = False
|
||||
|
||||
def get_latest_price(self, symbol: str) -> Optional[float]:
|
||||
return self.latest_prices.get(symbol)
|
||||
|
||||
def get_all_latest_prices(self) -> Dict[str, float]:
|
||||
return self.latest_prices.copy()
|
||||
|
||||
def get_open_price(self, symbol: str) -> Optional[float]:
|
||||
# Return open price, fallback to latest if not set
|
||||
price = self.open_prices.get(symbol)
|
||||
if price is None or price <= 0:
|
||||
return self.latest_prices.get(symbol)
|
||||
return price
|
||||
|
||||
def get_close_price(self, symbol: str) -> Optional[float]:
|
||||
# Return close price, fallback to latest if not set
|
||||
price = self.close_prices.get(symbol)
|
||||
if price is None or price <= 0:
|
||||
return self.latest_prices.get(symbol)
|
||||
return price
|
||||
|
||||
def reset_open_prices(self):
|
||||
# Don't clear prices - keep them for continuity
|
||||
pass
|
||||
299
backend/data/market_ingest.py
Normal file
299
backend/data/market_ingest.py
Normal file
@@ -0,0 +1,299 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Ingest Polygon market data into the long-lived research warehouse."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Iterable
|
||||
|
||||
from backend.data.market_store import MarketStore
|
||||
from backend.data.news_alignment import align_news_for_symbol
|
||||
from backend.data.provider_router import DataProviderRouter
|
||||
from backend.data.polygon_client import (
|
||||
fetch_news,
|
||||
fetch_ohlc,
|
||||
fetch_ticker_details,
|
||||
)
|
||||
from backend.data.provider_utils import normalize_symbol
|
||||
|
||||
|
||||
def _today_utc() -> str:
|
||||
return datetime.now(timezone.utc).date().isoformat()
|
||||
|
||||
|
||||
def _default_start(years: int = 2) -> str:
|
||||
return (datetime.now(timezone.utc).date() - timedelta(days=years * 366)).isoformat()
|
||||
|
||||
|
||||
def _max_news_date(news_rows: Iterable[dict]) -> str | None:
|
||||
dates = [
|
||||
str(item.get("published_utc") or "").strip()[:10]
|
||||
for item in news_rows
|
||||
if str(item.get("published_utc") or "").strip()
|
||||
]
|
||||
dates = [value for value in dates if value]
|
||||
return max(dates) if dates else None
|
||||
|
||||
|
||||
def _effective_last_news_fetch(
|
||||
market_store: MarketStore,
|
||||
*,
|
||||
ticker: str,
|
||||
end_date: str,
|
||||
watermark_value: str | None,
|
||||
) -> str | None:
|
||||
"""Clamp stale/future watermarks to the latest actually stored news date."""
|
||||
raw = str(watermark_value or "").strip()[:10]
|
||||
if not raw:
|
||||
return None
|
||||
if raw <= end_date:
|
||||
return raw
|
||||
|
||||
latest_stored = market_store.get_latest_news_date(ticker)
|
||||
if latest_stored and latest_stored <= end_date:
|
||||
return latest_stored
|
||||
return end_date
|
||||
|
||||
|
||||
def _normalize_provider_news_rows(ticker: str, news_items: Iterable[Any]) -> list[dict]:
|
||||
rows: list[dict] = []
|
||||
for item in news_items:
|
||||
payload = item.model_dump() if hasattr(item, "model_dump") else dict(item or {})
|
||||
related = payload.get("related")
|
||||
if isinstance(related, str):
|
||||
related_list = [value.strip().upper() for value in related.split(",") if value.strip()]
|
||||
elif isinstance(related, list):
|
||||
related_list = [str(value).strip().upper() for value in related if str(value).strip()]
|
||||
else:
|
||||
related_list = []
|
||||
if ticker not in related_list:
|
||||
related_list.append(ticker)
|
||||
rows.append(
|
||||
{
|
||||
"title": payload.get("title"),
|
||||
"description": payload.get("summary"),
|
||||
"summary": payload.get("summary"),
|
||||
"article_url": payload.get("url"),
|
||||
"published_utc": payload.get("date"),
|
||||
"publisher": payload.get("source"),
|
||||
"tickers": related_list,
|
||||
"category": payload.get("category"),
|
||||
"raw_json": payload,
|
||||
}
|
||||
)
|
||||
return rows
|
||||
|
||||
|
||||
def ingest_ticker_history(
|
||||
symbol: str,
|
||||
*,
|
||||
start_date: str | None = None,
|
||||
end_date: str | None = None,
|
||||
store: MarketStore | None = None,
|
||||
) -> dict:
|
||||
"""Fetch and persist Polygon OHLC + news for a ticker."""
|
||||
ticker = normalize_symbol(symbol)
|
||||
start = start_date or _default_start()
|
||||
end = end_date or _today_utc()
|
||||
market_store = store or MarketStore()
|
||||
|
||||
details = fetch_ticker_details(ticker)
|
||||
market_store.upsert_ticker(
|
||||
symbol=ticker,
|
||||
name=details.get("name"),
|
||||
sector=details.get("sic_description"),
|
||||
is_active=bool(details.get("active", True)),
|
||||
)
|
||||
|
||||
ohlc_rows = fetch_ohlc(ticker, start, end)
|
||||
news_rows = fetch_news(ticker, start, end)
|
||||
price_count = market_store.upsert_ohlc(ticker, ohlc_rows, source="polygon")
|
||||
news_count = market_store.upsert_news(ticker, news_rows, source="polygon")
|
||||
aligned_count = align_news_for_symbol(market_store, ticker)
|
||||
market_store.update_fetch_watermark(
|
||||
symbol=ticker,
|
||||
price_date=end,
|
||||
news_date=_max_news_date(news_rows),
|
||||
)
|
||||
|
||||
return {
|
||||
"symbol": ticker,
|
||||
"start_date": start,
|
||||
"end_date": end,
|
||||
"prices": price_count,
|
||||
"news": news_count,
|
||||
"aligned": aligned_count,
|
||||
}
|
||||
|
||||
|
||||
def update_ticker_incremental(
|
||||
symbol: str,
|
||||
*,
|
||||
end_date: str | None = None,
|
||||
store: MarketStore | None = None,
|
||||
) -> dict:
|
||||
"""Incrementally fetch OHLC + news since the last watermark."""
|
||||
ticker = normalize_symbol(symbol)
|
||||
market_store = store or MarketStore()
|
||||
watermarks = market_store.get_ticker_watermarks(ticker)
|
||||
end = end_date or _today_utc()
|
||||
start_prices = (
|
||||
(datetime.fromisoformat(watermarks["last_price_fetch"]) + timedelta(days=1)).date().isoformat()
|
||||
if watermarks.get("last_price_fetch")
|
||||
else _default_start()
|
||||
)
|
||||
effective_last_news_fetch = _effective_last_news_fetch(
|
||||
market_store,
|
||||
ticker=ticker,
|
||||
end_date=end,
|
||||
watermark_value=watermarks.get("last_news_fetch"),
|
||||
)
|
||||
start_news = (
|
||||
(datetime.fromisoformat(effective_last_news_fetch) + timedelta(days=1)).date().isoformat()
|
||||
if effective_last_news_fetch
|
||||
else _default_start()
|
||||
)
|
||||
|
||||
details = fetch_ticker_details(ticker)
|
||||
market_store.upsert_ticker(
|
||||
symbol=ticker,
|
||||
name=details.get("name"),
|
||||
sector=details.get("sic_description"),
|
||||
is_active=bool(details.get("active", True)),
|
||||
)
|
||||
|
||||
ohlc_rows = [] if start_prices > end else fetch_ohlc(ticker, start_prices, end)
|
||||
news_rows = [] if start_news > end else fetch_news(ticker, start_news, end)
|
||||
price_count = market_store.upsert_ohlc(ticker, ohlc_rows, source="polygon") if ohlc_rows else 0
|
||||
news_count = market_store.upsert_news(ticker, news_rows, source="polygon") if news_rows else 0
|
||||
aligned_count = align_news_for_symbol(market_store, ticker)
|
||||
market_store.update_fetch_watermark(
|
||||
symbol=ticker,
|
||||
price_date=end if ohlc_rows or watermarks.get("last_price_fetch") else None,
|
||||
news_date=_max_news_date(news_rows),
|
||||
)
|
||||
|
||||
return {
|
||||
"symbol": ticker,
|
||||
"start_price_date": start_prices,
|
||||
"start_news_date": start_news,
|
||||
"end_date": end,
|
||||
"prices": price_count,
|
||||
"news": news_count,
|
||||
"aligned": aligned_count,
|
||||
}
|
||||
|
||||
|
||||
def refresh_news_incremental(
|
||||
symbol: str,
|
||||
*,
|
||||
end_date: str | None = None,
|
||||
store: MarketStore | None = None,
|
||||
) -> dict:
|
||||
"""Incrementally fetch company news using the configured provider router."""
|
||||
ticker = normalize_symbol(symbol)
|
||||
market_store = store or MarketStore()
|
||||
watermarks = market_store.get_ticker_watermarks(ticker)
|
||||
end = end_date or _today_utc()
|
||||
effective_last_news_fetch = _effective_last_news_fetch(
|
||||
market_store,
|
||||
ticker=ticker,
|
||||
end_date=end,
|
||||
watermark_value=watermarks.get("last_news_fetch"),
|
||||
)
|
||||
start_news = (
|
||||
(datetime.fromisoformat(effective_last_news_fetch) + timedelta(days=1)).date().isoformat()
|
||||
if effective_last_news_fetch
|
||||
else _default_start()
|
||||
)
|
||||
|
||||
if start_news > end:
|
||||
return {
|
||||
"symbol": ticker,
|
||||
"start_news_date": start_news,
|
||||
"end_date": end,
|
||||
"news": 0,
|
||||
"aligned": 0,
|
||||
}
|
||||
|
||||
router = DataProviderRouter()
|
||||
news_items, source = router.get_company_news(
|
||||
ticker=ticker,
|
||||
start_date=start_news,
|
||||
end_date=end,
|
||||
limit=1000,
|
||||
)
|
||||
news_rows = _normalize_provider_news_rows(ticker, news_items)
|
||||
news_count = market_store.upsert_news(ticker, news_rows, source=source) if news_rows else 0
|
||||
aligned_count = align_news_for_symbol(market_store, ticker)
|
||||
market_store.update_fetch_watermark(
|
||||
symbol=ticker,
|
||||
news_date=_max_news_date(news_rows),
|
||||
)
|
||||
|
||||
return {
|
||||
"symbol": ticker,
|
||||
"start_news_date": start_news,
|
||||
"end_date": end,
|
||||
"news": news_count,
|
||||
"aligned": aligned_count,
|
||||
"source": source,
|
||||
}
|
||||
|
||||
|
||||
def refresh_news_for_symbols(
|
||||
symbols: Iterable[str],
|
||||
*,
|
||||
end_date: str | None = None,
|
||||
store: MarketStore | None = None,
|
||||
) -> list[dict]:
|
||||
"""Incrementally refresh company news for a list of tickers."""
|
||||
market_store = store or MarketStore()
|
||||
results = []
|
||||
for symbol in symbols:
|
||||
ticker = normalize_symbol(symbol)
|
||||
if not ticker:
|
||||
continue
|
||||
results.append(
|
||||
refresh_news_incremental(
|
||||
ticker,
|
||||
end_date=end_date,
|
||||
store=market_store,
|
||||
)
|
||||
)
|
||||
return results
|
||||
|
||||
|
||||
def ingest_symbols(
|
||||
symbols: Iterable[str],
|
||||
*,
|
||||
mode: str = "incremental",
|
||||
start_date: str | None = None,
|
||||
end_date: str | None = None,
|
||||
store: MarketStore | None = None,
|
||||
) -> list[dict]:
|
||||
"""Fetch Polygon data for a list of tickers."""
|
||||
market_store = store or MarketStore()
|
||||
results = []
|
||||
for symbol in symbols:
|
||||
ticker = normalize_symbol(symbol)
|
||||
if not ticker:
|
||||
continue
|
||||
if mode == "full":
|
||||
results.append(
|
||||
ingest_ticker_history(
|
||||
ticker,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
store=market_store,
|
||||
)
|
||||
)
|
||||
else:
|
||||
results.append(
|
||||
update_ticker_incremental(
|
||||
ticker,
|
||||
end_date=end_date,
|
||||
store=market_store,
|
||||
)
|
||||
)
|
||||
return results
|
||||
1106
backend/data/market_store.py
Normal file
1106
backend/data/market_store.py
Normal file
File diff suppressed because it is too large
Load Diff
64
backend/data/news_alignment.py
Normal file
64
backend/data/news_alignment.py
Normal file
@@ -0,0 +1,64 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Align persisted news to the nearest NYSE trading date."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import time
|
||||
|
||||
import pandas as pd
|
||||
import pandas_market_calendars as mcal
|
||||
|
||||
from backend.data.market_store import MarketStore
|
||||
|
||||
|
||||
NYSE_CALENDAR = mcal.get_calendar("NYSE")
|
||||
|
||||
|
||||
def _next_trading_day(date_str: str) -> str:
|
||||
start = pd.Timestamp(date_str).tz_localize(None)
|
||||
sessions = NYSE_CALENDAR.valid_days(
|
||||
start_date=(start - pd.Timedelta(days=1)).strftime("%Y-%m-%d"),
|
||||
end_date=(start + pd.Timedelta(days=10)).strftime("%Y-%m-%d"),
|
||||
)
|
||||
future = [
|
||||
pd.Timestamp(day).tz_localize(None).strftime("%Y-%m-%d")
|
||||
for day in sessions
|
||||
if pd.Timestamp(day).tz_localize(None) >= start
|
||||
]
|
||||
return future[0] if future else date_str
|
||||
|
||||
|
||||
def resolve_trade_date(published_utc: str | None) -> str | None:
|
||||
"""Map a published timestamp to an NYSE trade date."""
|
||||
if not published_utc:
|
||||
return None
|
||||
timestamp = pd.to_datetime(published_utc, utc=True, errors="coerce")
|
||||
if pd.isna(timestamp):
|
||||
return None
|
||||
nyse_time = timestamp.tz_convert("America/New_York")
|
||||
candidate = nyse_time.date().isoformat()
|
||||
valid_days = NYSE_CALENDAR.valid_days(start_date=candidate, end_date=candidate)
|
||||
if len(valid_days) == 0:
|
||||
return _next_trading_day(candidate)
|
||||
if nyse_time.time() >= time(16, 0):
|
||||
return _next_trading_day((nyse_time + pd.Timedelta(days=1)).date().isoformat())
|
||||
return candidate
|
||||
|
||||
|
||||
def align_news_for_symbol(store: MarketStore, symbol: str, *, limit: int = 5000) -> int:
|
||||
"""Fill missing trade_date values for one ticker."""
|
||||
pending = store.get_news_without_trade_date(symbol, limit=limit)
|
||||
updates = []
|
||||
for row in pending:
|
||||
trade_date = resolve_trade_date(row.get("published_utc"))
|
||||
if trade_date:
|
||||
updates.append(
|
||||
{
|
||||
"news_id": row["news_id"],
|
||||
"symbol": row["symbol"],
|
||||
"trade_date": trade_date,
|
||||
}
|
||||
)
|
||||
if not updates:
|
||||
return 0
|
||||
return store.set_trade_dates(updates)
|
||||
292
backend/data/polling_price_manager.py
Normal file
292
backend/data/polling_price_manager.py
Normal file
@@ -0,0 +1,292 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Polling-based Price Manager with provider-aware quote polling.
|
||||
Supports Finnhub and yfinance for near real-time price fetching.
|
||||
"""
|
||||
import logging
|
||||
import threading
|
||||
import time
|
||||
from typing import Callable, Dict, List, Optional
|
||||
|
||||
import finnhub
|
||||
import yfinance as yf
|
||||
from backend.data.provider_utils import normalize_symbol
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
_SUPPRESSED_LOG_EVERY = 20
|
||||
|
||||
|
||||
class PollingPriceManager:
|
||||
"""Polling-based price manager using Finnhub or yfinance."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
api_key: Optional[str] = None,
|
||||
poll_interval: int = 30,
|
||||
provider: str = "finnhub",
|
||||
):
|
||||
"""
|
||||
Args:
|
||||
api_key: Finnhub API Key
|
||||
poll_interval: Polling interval in seconds (default 30s)
|
||||
provider: Quote provider (`finnhub` or `yfinance`)
|
||||
"""
|
||||
self.api_key = api_key
|
||||
self.poll_interval = poll_interval
|
||||
self.provider = provider
|
||||
self.finnhub_client = (
|
||||
finnhub.Client(api_key=api_key)
|
||||
if provider == "finnhub" and api_key
|
||||
else None
|
||||
)
|
||||
|
||||
self.subscribed_symbols: List[str] = []
|
||||
self.latest_prices: Dict[str, float] = {}
|
||||
self.open_prices: Dict[str, float] = {}
|
||||
self.price_callbacks: List[Callable] = []
|
||||
self._failure_counts: Dict[str, int] = {}
|
||||
|
||||
self.running = False
|
||||
self._thread: Optional[threading.Thread] = None
|
||||
|
||||
logger.info(
|
||||
"PollingPriceManager initialized "
|
||||
f"(provider: {provider}, interval: {poll_interval}s)",
|
||||
)
|
||||
|
||||
def subscribe(self, symbols: List[str]):
|
||||
"""Subscribe to stock symbols"""
|
||||
for symbol in symbols:
|
||||
symbol = normalize_symbol(symbol)
|
||||
if symbol not in self.subscribed_symbols:
|
||||
self.subscribed_symbols.append(symbol)
|
||||
logger.info(f"Subscribed to: {symbol}")
|
||||
|
||||
def unsubscribe(self, symbols: List[str]):
|
||||
"""Unsubscribe from symbols"""
|
||||
for symbol in symbols:
|
||||
symbol = normalize_symbol(symbol)
|
||||
if symbol in self.subscribed_symbols:
|
||||
self.subscribed_symbols.remove(symbol)
|
||||
logger.info(f"Unsubscribed: {symbol}")
|
||||
|
||||
def add_price_callback(self, callback: Callable):
|
||||
"""Add price update callback"""
|
||||
self.price_callbacks.append(callback)
|
||||
|
||||
def _fetch_prices(self):
|
||||
"""Fetch latest prices for all subscribed stocks"""
|
||||
for symbol in self.subscribed_symbols:
|
||||
try:
|
||||
quote_data = self._fetch_quote(symbol)
|
||||
if not isinstance(quote_data, dict):
|
||||
raise ValueError(f"{symbol}: Empty quote payload")
|
||||
|
||||
current_price = quote_data.get("c")
|
||||
open_price = quote_data.get("o")
|
||||
timestamp = quote_data.get("t", int(time.time()))
|
||||
|
||||
if not current_price or current_price <= 0:
|
||||
logger.warning(f"{symbol}: Invalid price data")
|
||||
continue
|
||||
|
||||
# Store open price on first fetch
|
||||
if (
|
||||
symbol not in self.open_prices
|
||||
and open_price
|
||||
and open_price > 0
|
||||
):
|
||||
self.open_prices[symbol] = open_price
|
||||
logger.info(f"{symbol} open price: ${open_price:.2f}")
|
||||
|
||||
stored_open = self.open_prices.get(symbol, open_price)
|
||||
ret = (
|
||||
((current_price - stored_open) / stored_open) * 100
|
||||
if stored_open > 0
|
||||
else 0
|
||||
)
|
||||
|
||||
self.latest_prices[symbol] = current_price
|
||||
previous_failures = self._failure_counts.pop(symbol, 0)
|
||||
if previous_failures > 0:
|
||||
logger.info(
|
||||
"%s quote polling recovered after %d consecutive failures",
|
||||
symbol,
|
||||
previous_failures,
|
||||
)
|
||||
|
||||
price_data = {
|
||||
"symbol": symbol,
|
||||
"price": current_price,
|
||||
"timestamp": timestamp * 1000,
|
||||
"open": stored_open,
|
||||
"high": quote_data.get("h"),
|
||||
"low": quote_data.get("l"),
|
||||
"previous_close": quote_data.get("pc"),
|
||||
"ret": ret,
|
||||
"change": quote_data.get("d"),
|
||||
"change_percent": quote_data.get("dp"),
|
||||
}
|
||||
|
||||
for callback in self.price_callbacks:
|
||||
try:
|
||||
callback(price_data)
|
||||
except Exception as e:
|
||||
logger.error(f"Price callback error ({symbol}): {e}")
|
||||
|
||||
logger.debug(
|
||||
f"{symbol}: ${current_price:.2f} [ret: {ret:+.2f}%]",
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
failure_count = self._failure_counts.get(symbol, 0) + 1
|
||||
self._failure_counts[symbol] = failure_count
|
||||
message = f"Failed to fetch {symbol} price: {e}"
|
||||
|
||||
if failure_count == 1:
|
||||
logger.warning(message)
|
||||
elif failure_count % _SUPPRESSED_LOG_EVERY == 0:
|
||||
logger.warning(
|
||||
"%s (repeated %d times; suppressing intermediate failures)",
|
||||
message,
|
||||
failure_count,
|
||||
)
|
||||
else:
|
||||
logger.debug(message)
|
||||
|
||||
def _fetch_quote(self, symbol: str) -> Dict[str, float]:
|
||||
"""Fetch a normalized quote payload from the configured provider."""
|
||||
if self.provider == "yfinance":
|
||||
return self._fetch_yfinance_quote(symbol)
|
||||
if not self.finnhub_client:
|
||||
raise ValueError("Finnhub API key required for finnhub polling")
|
||||
quote = self.finnhub_client.quote(symbol)
|
||||
if not isinstance(quote, dict):
|
||||
raise ValueError(f"{symbol}: Invalid Finnhub quote payload")
|
||||
return quote
|
||||
|
||||
def _fetch_yfinance_quote(self, symbol: str) -> Dict[str, float]:
|
||||
"""Fetch quote data from yfinance and normalize to Finnhub-like keys."""
|
||||
ticker = yf.Ticker(symbol)
|
||||
fast_info = dict(getattr(ticker, "fast_info", {}) or {})
|
||||
|
||||
current_price = _coerce_float(
|
||||
fast_info.get("lastPrice") or fast_info.get("regularMarketPrice"),
|
||||
)
|
||||
open_price = _coerce_float(
|
||||
fast_info.get("open") or fast_info.get("regularMarketOpen"),
|
||||
)
|
||||
previous_close = _coerce_float(
|
||||
fast_info.get("previousClose")
|
||||
or fast_info.get("regularMarketPreviousClose"),
|
||||
)
|
||||
high_price = _coerce_float(
|
||||
fast_info.get("dayHigh") or fast_info.get("regularMarketDayHigh"),
|
||||
)
|
||||
low_price = _coerce_float(
|
||||
fast_info.get("dayLow") or fast_info.get("regularMarketDayLow"),
|
||||
)
|
||||
|
||||
if current_price is None:
|
||||
history = ticker.history(period="1d", interval="1m", auto_adjust=False)
|
||||
if history is None:
|
||||
raise ValueError(f"{symbol}: yfinance returned no history frame")
|
||||
if history.empty:
|
||||
raise ValueError(f"{symbol}: No yfinance quote data")
|
||||
latest = history.iloc[-1]
|
||||
current_price = _coerce_float(latest.get("Close"))
|
||||
open_price = open_price or _coerce_float(history.iloc[0].get("Open"))
|
||||
high_price = high_price or _coerce_float(history["High"].max())
|
||||
low_price = low_price or _coerce_float(history["Low"].min())
|
||||
|
||||
if current_price is None:
|
||||
raise ValueError(f"{symbol}: Invalid yfinance quote data")
|
||||
|
||||
effective_open = open_price or previous_close or current_price
|
||||
effective_prev_close = previous_close or effective_open or current_price
|
||||
change = current_price - effective_prev_close
|
||||
change_percent = (
|
||||
(change / effective_prev_close) * 100 if effective_prev_close else 0.0
|
||||
)
|
||||
|
||||
return {
|
||||
"c": current_price,
|
||||
"o": effective_open,
|
||||
"h": high_price or max(current_price, effective_open),
|
||||
"l": low_price or min(current_price, effective_open),
|
||||
"pc": effective_prev_close,
|
||||
"d": change,
|
||||
"dp": change_percent,
|
||||
"t": int(time.time()),
|
||||
}
|
||||
|
||||
def _polling_loop(self):
|
||||
"""Main polling loop"""
|
||||
logger.info(f"Price polling started (interval: {self.poll_interval}s)")
|
||||
|
||||
while self.running:
|
||||
try:
|
||||
start_time = time.time()
|
||||
self._fetch_prices()
|
||||
|
||||
elapsed = time.time() - start_time
|
||||
sleep_time = max(0, self.poll_interval - elapsed)
|
||||
if sleep_time > 0:
|
||||
time.sleep(sleep_time)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Polling loop error: {e}")
|
||||
time.sleep(5)
|
||||
|
||||
def start(self):
|
||||
"""Start price polling"""
|
||||
if self.running:
|
||||
logger.warning("Price polling already running")
|
||||
return
|
||||
|
||||
if not self.subscribed_symbols:
|
||||
logger.warning("No stocks subscribed")
|
||||
return
|
||||
|
||||
self.running = True
|
||||
self._thread = threading.Thread(target=self._polling_loop, daemon=True)
|
||||
self._thread.start()
|
||||
|
||||
logger.info(
|
||||
f"Price polling started: {', '.join(self.subscribed_symbols)}",
|
||||
)
|
||||
|
||||
def stop(self):
|
||||
"""Stop price polling"""
|
||||
self.running = False
|
||||
if self._thread:
|
||||
self._thread.join(timeout=5)
|
||||
logger.info("Price polling stopped")
|
||||
|
||||
def get_latest_price(self, symbol: str) -> Optional[float]:
|
||||
"""Get latest price for symbol"""
|
||||
return self.latest_prices.get(symbol)
|
||||
|
||||
def get_all_latest_prices(self) -> Dict[str, float]:
|
||||
"""Get all latest prices"""
|
||||
return self.latest_prices.copy()
|
||||
|
||||
def get_open_price(self, symbol: str) -> Optional[float]:
|
||||
"""Get open price for symbol"""
|
||||
return self.open_prices.get(symbol)
|
||||
|
||||
def reset_open_prices(self):
|
||||
"""Reset open prices for new trading day"""
|
||||
self.open_prices.clear()
|
||||
logger.info("Open prices reset")
|
||||
|
||||
|
||||
def _coerce_float(value) -> Optional[float]:
|
||||
try:
|
||||
if value is None:
|
||||
return None
|
||||
return float(value)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
161
backend/data/polygon_client.py
Normal file
161
backend/data/polygon_client.py
Normal file
@@ -0,0 +1,161 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Polygon client used for long-lived market research ingestion."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, Optional
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
BASE = "https://api.polygon.io"
|
||||
|
||||
|
||||
def _headers() -> dict[str, str]:
|
||||
api_key = os.getenv("POLYGON_API_KEY", "").strip()
|
||||
if not api_key:
|
||||
raise ValueError("Missing required API key: POLYGON_API_KEY")
|
||||
return {"Authorization": f"Bearer {api_key}"}
|
||||
|
||||
|
||||
def http_get(
|
||||
url: str,
|
||||
params: Optional[dict[str, Any]] = None,
|
||||
*,
|
||||
max_retries: int = 8,
|
||||
backoff: float = 2.0,
|
||||
) -> requests.Response:
|
||||
"""HTTP GET with exponential backoff and 429 handling."""
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
response = requests.get(
|
||||
url,
|
||||
params=params or {},
|
||||
headers=_headers(),
|
||||
timeout=30,
|
||||
)
|
||||
except requests.RequestException:
|
||||
time.sleep((backoff**attempt) + 0.5)
|
||||
if attempt == max_retries - 1:
|
||||
raise
|
||||
continue
|
||||
|
||||
if response.status_code == 429:
|
||||
retry_after = response.headers.get("Retry-After")
|
||||
wait = (
|
||||
float(retry_after)
|
||||
if retry_after and retry_after.isdigit()
|
||||
else min((backoff**attempt) + 1.0, 60.0)
|
||||
)
|
||||
time.sleep(wait)
|
||||
if attempt == max_retries - 1:
|
||||
response.raise_for_status()
|
||||
continue
|
||||
|
||||
if 500 <= response.status_code < 600:
|
||||
time.sleep(min((backoff**attempt) + 1.0, 60.0))
|
||||
if attempt == max_retries - 1:
|
||||
response.raise_for_status()
|
||||
continue
|
||||
|
||||
response.raise_for_status()
|
||||
return response
|
||||
raise RuntimeError("Unreachable")
|
||||
|
||||
|
||||
def fetch_ticker_details(symbol: str) -> dict[str, Any]:
|
||||
"""Fetch company metadata from Polygon."""
|
||||
response = http_get(f"{BASE}/v3/reference/tickers/{symbol}")
|
||||
return response.json().get("results", {}) or {}
|
||||
|
||||
|
||||
def fetch_ohlc(symbol: str, start_date: str, end_date: str) -> list[dict[str, Any]]:
|
||||
"""Fetch daily OHLC data from Polygon."""
|
||||
response = http_get(
|
||||
f"{BASE}/v2/aggs/ticker/{symbol}/range/1/day/{start_date}/{end_date}",
|
||||
params={"adjusted": "true", "sort": "asc", "limit": 50000},
|
||||
)
|
||||
results = response.json().get("results") or []
|
||||
rows: list[dict[str, Any]] = []
|
||||
for item in results:
|
||||
rows.append(
|
||||
{
|
||||
"date": datetime.fromtimestamp(
|
||||
int(item["t"]) / 1000,
|
||||
tz=timezone.utc,
|
||||
).date().isoformat(),
|
||||
"open": item.get("o"),
|
||||
"high": item.get("h"),
|
||||
"low": item.get("l"),
|
||||
"close": item.get("c"),
|
||||
"volume": item.get("v"),
|
||||
"vwap": item.get("vw"),
|
||||
"transactions": item.get("n"),
|
||||
}
|
||||
)
|
||||
return rows
|
||||
|
||||
|
||||
def fetch_news(
|
||||
symbol: str,
|
||||
start_date: str,
|
||||
end_date: str,
|
||||
*,
|
||||
per_page: int = 50,
|
||||
page_sleep: float = 1.2,
|
||||
max_pages: Optional[int] = None,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Fetch all Polygon news for a ticker, with pagination."""
|
||||
url = f"{BASE}/v2/reference/news"
|
||||
params = {
|
||||
"ticker": symbol,
|
||||
"published_utc.gte": start_date,
|
||||
"published_utc.lte": end_date,
|
||||
"limit": per_page,
|
||||
"order": "asc",
|
||||
}
|
||||
next_url: Optional[str] = None
|
||||
pages = 0
|
||||
all_articles: list[dict[str, Any]] = []
|
||||
seen_ids: set[str] = set()
|
||||
|
||||
while True:
|
||||
response = http_get(next_url or url, params=None if next_url else params)
|
||||
data = response.json()
|
||||
results = data.get("results") or []
|
||||
if not results:
|
||||
break
|
||||
|
||||
for item in results:
|
||||
article_id = item.get("id")
|
||||
if article_id and article_id in seen_ids:
|
||||
continue
|
||||
all_articles.append(
|
||||
{
|
||||
"id": article_id,
|
||||
"publisher": (item.get("publisher") or {}).get("name"),
|
||||
"title": item.get("title"),
|
||||
"author": item.get("author"),
|
||||
"published_utc": item.get("published_utc"),
|
||||
"amp_url": item.get("amp_url"),
|
||||
"article_url": item.get("article_url"),
|
||||
"tickers": item.get("tickers"),
|
||||
"description": item.get("description"),
|
||||
"insights": item.get("insights"),
|
||||
}
|
||||
)
|
||||
if article_id:
|
||||
seen_ids.add(article_id)
|
||||
|
||||
next_url = data.get("next_url")
|
||||
pages += 1
|
||||
if max_pages is not None and pages >= max_pages:
|
||||
break
|
||||
if not next_url:
|
||||
break
|
||||
time.sleep(page_sleep)
|
||||
|
||||
return all_articles
|
||||
910
backend/data/provider_router.py
Normal file
910
backend/data/provider_router.py
Normal file
@@ -0,0 +1,910 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Unified data provider router with fallback support."""
|
||||
|
||||
import datetime
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Callable, Optional
|
||||
|
||||
import finnhub
|
||||
import pandas as pd
|
||||
import yfinance as yf
|
||||
|
||||
from backend.config.data_config import DataSource, get_data_sources
|
||||
from shared.schema import (
|
||||
CompanyFactsResponse,
|
||||
CompanyNews,
|
||||
CompanyNewsResponse,
|
||||
FinancialMetrics,
|
||||
FinancialMetricsResponse,
|
||||
InsiderTrade,
|
||||
InsiderTradeResponse,
|
||||
LineItem,
|
||||
LineItemResponse,
|
||||
Price,
|
||||
PriceResponse,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_DATA_DIR = Path(__file__).parent / "ret_data"
|
||||
|
||||
|
||||
def _format_provider_error(exc: Exception) -> str:
|
||||
"""Condense common provider failures into short, readable messages."""
|
||||
message = str(exc).strip().replace("\n", " ")
|
||||
if "429" in message:
|
||||
return "rate limit reached"
|
||||
if "402" in message:
|
||||
return "insufficient credits"
|
||||
if "422" in message or "Missing parameters" in message:
|
||||
return "invalid request parameters"
|
||||
if "Quote not found" in message:
|
||||
return "quote not found"
|
||||
return message
|
||||
|
||||
|
||||
def _has_valid_ticker(ticker: str) -> bool:
|
||||
"""Return whether the normalized ticker is non-empty."""
|
||||
return bool((ticker or "").strip())
|
||||
|
||||
|
||||
class DataProviderRouter:
|
||||
"""Route data requests across configured providers with fallbacks."""
|
||||
|
||||
def __init__(self):
|
||||
self.sources = get_data_sources()
|
||||
self._usage = {
|
||||
"preferred": list(self.sources),
|
||||
"last_success": {},
|
||||
}
|
||||
self._listeners: list[Callable[[dict], None]] = []
|
||||
|
||||
def price_sources(self) -> list[DataSource]:
|
||||
"""Price lookup order, always allowing local CSV fallback."""
|
||||
return self.sources
|
||||
|
||||
def api_sources(self) -> list[DataSource]:
|
||||
"""Providers that can serve network-backed data."""
|
||||
return [source for source in self.sources if source != "local_csv"]
|
||||
|
||||
def get_prices(
|
||||
self,
|
||||
ticker: str,
|
||||
start_date: str,
|
||||
end_date: str,
|
||||
) -> tuple[list[Price], DataSource]:
|
||||
"""Fetch prices using preferred providers with fallback."""
|
||||
if not _has_valid_ticker(ticker):
|
||||
return [], "local_csv"
|
||||
last_error: Optional[Exception] = None
|
||||
|
||||
for source in self.price_sources():
|
||||
try:
|
||||
if source == "finnhub":
|
||||
prices = _fetch_finnhub_prices(ticker, start_date, end_date)
|
||||
self._record_success("prices", source)
|
||||
return prices, source
|
||||
if source == "financial_datasets":
|
||||
prices = _fetch_fd_prices(ticker, start_date, end_date)
|
||||
self._record_success("prices", source)
|
||||
return prices, source
|
||||
if source == "yfinance":
|
||||
prices = _fetch_yfinance_prices(ticker, start_date, end_date)
|
||||
self._record_success("prices", source)
|
||||
return prices, source
|
||||
prices = _fetch_local_prices(ticker, start_date, end_date)
|
||||
if prices:
|
||||
self._record_success("prices", source)
|
||||
return prices, source
|
||||
except Exception as exc:
|
||||
last_error = exc
|
||||
logger.warning(
|
||||
"Price source %s failed for %s: %s",
|
||||
source,
|
||||
ticker,
|
||||
_format_provider_error(exc),
|
||||
)
|
||||
|
||||
if last_error:
|
||||
raise last_error
|
||||
return [], "local_csv"
|
||||
|
||||
def get_financial_metrics(
|
||||
self,
|
||||
ticker: str,
|
||||
end_date: str,
|
||||
period: str = "ttm",
|
||||
limit: int = 10,
|
||||
) -> tuple[list[FinancialMetrics], DataSource]:
|
||||
"""Fetch financial metrics with API provider fallback."""
|
||||
if not _has_valid_ticker(ticker):
|
||||
return [], "local_csv"
|
||||
last_error: Optional[Exception] = None
|
||||
|
||||
for source in self.api_sources():
|
||||
try:
|
||||
if source == "finnhub":
|
||||
metrics = _fetch_finnhub_financial_metrics(
|
||||
ticker,
|
||||
end_date,
|
||||
period,
|
||||
)
|
||||
self._record_success("financial_metrics", source)
|
||||
return metrics, source
|
||||
if source == "yfinance":
|
||||
metrics = _fetch_yfinance_financial_metrics(
|
||||
ticker,
|
||||
end_date,
|
||||
period,
|
||||
)
|
||||
self._record_success("financial_metrics", source)
|
||||
return metrics, source
|
||||
metrics = _fetch_fd_financial_metrics(
|
||||
ticker,
|
||||
end_date,
|
||||
period,
|
||||
limit,
|
||||
)
|
||||
self._record_success("financial_metrics", source)
|
||||
return metrics, source
|
||||
except Exception as exc:
|
||||
last_error = exc
|
||||
logger.warning(
|
||||
"Financial metrics source %s failed for %s: %s",
|
||||
source,
|
||||
ticker,
|
||||
_format_provider_error(exc),
|
||||
)
|
||||
|
||||
if last_error:
|
||||
raise last_error
|
||||
return [], "local_csv"
|
||||
|
||||
def search_line_items(
|
||||
self,
|
||||
ticker: str,
|
||||
line_items: list[str],
|
||||
end_date: str,
|
||||
period: str = "ttm",
|
||||
limit: int = 10,
|
||||
) -> list[LineItem]:
|
||||
"""Line items are only supported via Financial Datasets."""
|
||||
if not _has_valid_ticker(ticker):
|
||||
return []
|
||||
if "financial_datasets" not in self.api_sources():
|
||||
return []
|
||||
try:
|
||||
results = _fetch_fd_line_items(
|
||||
ticker=ticker,
|
||||
line_items=line_items,
|
||||
end_date=end_date,
|
||||
period=period,
|
||||
limit=limit,
|
||||
)
|
||||
self._record_success("line_items", "financial_datasets")
|
||||
return results
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"Line items source failed for %s: %s",
|
||||
ticker,
|
||||
_format_provider_error(exc),
|
||||
)
|
||||
return []
|
||||
|
||||
def get_insider_trades(
|
||||
self,
|
||||
ticker: str,
|
||||
end_date: str,
|
||||
start_date: Optional[str] = None,
|
||||
limit: int = 1000,
|
||||
) -> tuple[list[InsiderTrade], DataSource]:
|
||||
"""Fetch insider trades with provider fallback."""
|
||||
if not _has_valid_ticker(ticker):
|
||||
return [], "local_csv"
|
||||
last_error: Optional[Exception] = None
|
||||
|
||||
for source in self.api_sources():
|
||||
try:
|
||||
if source == "finnhub":
|
||||
trades = _fetch_finnhub_insider_trades(
|
||||
ticker,
|
||||
start_date,
|
||||
end_date,
|
||||
limit,
|
||||
)
|
||||
self._record_success("insider_trades", source)
|
||||
return trades, source
|
||||
trades = _fetch_fd_insider_trades(
|
||||
ticker,
|
||||
start_date,
|
||||
end_date,
|
||||
limit,
|
||||
)
|
||||
self._record_success("insider_trades", source)
|
||||
return trades, source
|
||||
except Exception as exc:
|
||||
last_error = exc
|
||||
logger.warning(
|
||||
"Insider trades source %s failed for %s: %s",
|
||||
source,
|
||||
ticker,
|
||||
_format_provider_error(exc),
|
||||
)
|
||||
|
||||
if last_error:
|
||||
raise last_error
|
||||
return [], "local_csv"
|
||||
|
||||
def get_company_news(
|
||||
self,
|
||||
ticker: str,
|
||||
end_date: str,
|
||||
start_date: Optional[str] = None,
|
||||
limit: int = 1000,
|
||||
) -> tuple[list[CompanyNews], DataSource]:
|
||||
"""Fetch company news with provider fallback."""
|
||||
if not _has_valid_ticker(ticker):
|
||||
return [], "local_csv"
|
||||
last_error: Optional[Exception] = None
|
||||
|
||||
for source in self.api_sources():
|
||||
try:
|
||||
if source == "finnhub":
|
||||
news = _fetch_finnhub_company_news(
|
||||
ticker,
|
||||
start_date,
|
||||
end_date,
|
||||
limit,
|
||||
)
|
||||
self._record_success("company_news", source)
|
||||
return news, source
|
||||
if source == "yfinance":
|
||||
news = _fetch_yfinance_company_news(
|
||||
ticker,
|
||||
start_date,
|
||||
end_date,
|
||||
limit,
|
||||
)
|
||||
self._record_success("company_news", source)
|
||||
return news, source
|
||||
news = _fetch_fd_company_news(
|
||||
ticker,
|
||||
start_date,
|
||||
end_date,
|
||||
limit,
|
||||
)
|
||||
self._record_success("company_news", source)
|
||||
return news, source
|
||||
except Exception as exc:
|
||||
last_error = exc
|
||||
logger.warning(
|
||||
"Company news source %s failed for %s: %s",
|
||||
source,
|
||||
ticker,
|
||||
_format_provider_error(exc),
|
||||
)
|
||||
|
||||
if last_error:
|
||||
raise last_error
|
||||
return [], "local_csv"
|
||||
|
||||
def get_market_cap(
|
||||
self,
|
||||
ticker: str,
|
||||
end_date: str,
|
||||
metrics_lookup,
|
||||
) -> tuple[Optional[float], DataSource]:
|
||||
"""Fetch market cap using facts API or financial metrics fallback."""
|
||||
if not _has_valid_ticker(ticker):
|
||||
return None, "local_csv"
|
||||
today = datetime.datetime.now().strftime("%Y-%m-%d")
|
||||
if end_date == today and "financial_datasets" in self.api_sources():
|
||||
try:
|
||||
self._record_success("market_cap", "financial_datasets")
|
||||
return _fetch_fd_market_cap_today(ticker), "financial_datasets"
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"Market cap facts source failed for %s: %s",
|
||||
ticker,
|
||||
_format_provider_error(exc),
|
||||
)
|
||||
|
||||
metrics, source = metrics_lookup(ticker, end_date)
|
||||
if not metrics:
|
||||
return None, source
|
||||
market_cap = metrics[0].market_cap
|
||||
if market_cap is None:
|
||||
return None, source
|
||||
if source == "finnhub":
|
||||
self._record_success("market_cap", source)
|
||||
return market_cap * 1_000_000, source
|
||||
self._record_success("market_cap", source)
|
||||
return market_cap, source
|
||||
|
||||
def get_usage_snapshot(self) -> dict:
|
||||
"""Return provider usage metadata for UI/debugging."""
|
||||
return {
|
||||
"preferred": list(self._usage["preferred"]),
|
||||
"last_success": dict(self._usage["last_success"]),
|
||||
}
|
||||
|
||||
def add_listener(self, listener: Callable[[dict], None]) -> None:
|
||||
"""Register a callback for provider usage changes."""
|
||||
if listener not in self._listeners:
|
||||
self._listeners.append(listener)
|
||||
|
||||
def remove_listener(self, listener: Callable[[dict], None]) -> None:
|
||||
"""Remove a previously registered listener."""
|
||||
if listener in self._listeners:
|
||||
self._listeners.remove(listener)
|
||||
|
||||
def load_local_price_frame(
|
||||
self,
|
||||
ticker: str,
|
||||
start_date: Optional[str] = None,
|
||||
end_date: Optional[str] = None,
|
||||
) -> pd.DataFrame:
|
||||
"""Load local CSV prices as a DataFrame for backtest managers."""
|
||||
csv_path = _DATA_DIR / f"{ticker}.csv"
|
||||
if not csv_path.exists():
|
||||
return pd.DataFrame()
|
||||
df = pd.read_csv(csv_path)
|
||||
if df.empty or "time" not in df.columns:
|
||||
return pd.DataFrame()
|
||||
df["time"] = pd.to_datetime(df["time"])
|
||||
if start_date:
|
||||
df = df[df["time"] >= pd.to_datetime(start_date)]
|
||||
if end_date:
|
||||
df = df[df["time"] <= pd.to_datetime(end_date)]
|
||||
if df.empty:
|
||||
return pd.DataFrame()
|
||||
df["Date"] = pd.to_datetime(df["time"])
|
||||
df.set_index("Date", inplace=True)
|
||||
df.sort_index(inplace=True)
|
||||
self._record_success("historical_prices", "local_csv")
|
||||
return df
|
||||
|
||||
def _record_success(self, data_type: str, source: DataSource) -> None:
|
||||
previous = self._usage["last_success"].get(data_type)
|
||||
self._usage["last_success"][data_type] = source
|
||||
if previous != source:
|
||||
snapshot = self.get_usage_snapshot()
|
||||
for listener in list(self._listeners):
|
||||
try:
|
||||
listener(snapshot)
|
||||
except Exception as exc:
|
||||
logger.warning("Provider listener failed: %s", exc)
|
||||
|
||||
|
||||
_router_instance: Optional[DataProviderRouter] = None
|
||||
|
||||
|
||||
def get_provider_router() -> DataProviderRouter:
|
||||
"""Return a shared provider router instance."""
|
||||
global _router_instance
|
||||
if _router_instance is None:
|
||||
_router_instance = DataProviderRouter()
|
||||
return _router_instance
|
||||
|
||||
|
||||
def _get_finnhub_client() -> finnhub.Client:
|
||||
api_key = _env_required("FINNHUB_API_KEY")
|
||||
return finnhub.Client(api_key=api_key)
|
||||
|
||||
|
||||
def _env_required(key: str) -> str:
|
||||
import os
|
||||
|
||||
value = os.getenv(key, "").strip()
|
||||
if not value:
|
||||
raise ValueError(f"Missing required API key: {key}")
|
||||
return value
|
||||
|
||||
|
||||
def _make_api_request(url: str, headers: dict, method: str = "GET", json_data: dict = None):
|
||||
import requests
|
||||
|
||||
response = (
|
||||
requests.post(url, headers=headers, json=json_data)
|
||||
if method.upper() == "POST"
|
||||
else requests.get(url, headers=headers)
|
||||
)
|
||||
if response.status_code != 200:
|
||||
raise ValueError(f"{response.status_code} - {response.text}")
|
||||
return response
|
||||
|
||||
|
||||
def _fetch_local_prices(
|
||||
ticker: str,
|
||||
start_date: str,
|
||||
end_date: str,
|
||||
) -> list[Price]:
|
||||
csv_path = _DATA_DIR / f"{ticker}.csv"
|
||||
if not csv_path.exists():
|
||||
return []
|
||||
df = pd.read_csv(csv_path)
|
||||
if df.empty or "time" not in df.columns:
|
||||
return []
|
||||
df["time"] = pd.to_datetime(df["time"])
|
||||
start = pd.to_datetime(start_date)
|
||||
end = pd.to_datetime(end_date)
|
||||
df = df[(df["time"] >= start) & (df["time"] <= end)].copy()
|
||||
if df.empty:
|
||||
return []
|
||||
return [
|
||||
Price(
|
||||
open=float(row["open"]),
|
||||
close=float(row["close"]),
|
||||
high=float(row["high"]),
|
||||
low=float(row["low"]),
|
||||
volume=int(float(row["volume"])),
|
||||
time=row["time"].strftime("%Y-%m-%d"),
|
||||
)
|
||||
for _, row in df.iterrows()
|
||||
]
|
||||
|
||||
|
||||
def _fetch_finnhub_prices(
|
||||
ticker: str,
|
||||
start_date: str,
|
||||
end_date: str,
|
||||
) -> list[Price]:
|
||||
client = _get_finnhub_client()
|
||||
start_timestamp = int(
|
||||
datetime.datetime.strptime(start_date, "%Y-%m-%d").timestamp(),
|
||||
)
|
||||
end_timestamp = int(
|
||||
(
|
||||
datetime.datetime.strptime(end_date, "%Y-%m-%d")
|
||||
+ datetime.timedelta(days=1)
|
||||
).timestamp(),
|
||||
)
|
||||
candles = client.stock_candles(ticker, "D", start_timestamp, end_timestamp)
|
||||
return [
|
||||
Price(
|
||||
open=candles["o"][i],
|
||||
close=candles["c"][i],
|
||||
high=candles["h"][i],
|
||||
low=candles["l"][i],
|
||||
volume=int(candles["v"][i]),
|
||||
time=datetime.datetime.fromtimestamp(candles["t"][i]).strftime(
|
||||
"%Y-%m-%d",
|
||||
),
|
||||
)
|
||||
for i in range(len(candles.get("t", [])))
|
||||
]
|
||||
|
||||
|
||||
def _fetch_yfinance_prices(
|
||||
ticker: str,
|
||||
start_date: str,
|
||||
end_date: str,
|
||||
) -> list[Price]:
|
||||
history = yf.Ticker(ticker).history(
|
||||
start=start_date,
|
||||
end=(
|
||||
datetime.datetime.strptime(end_date, "%Y-%m-%d")
|
||||
+ datetime.timedelta(days=1)
|
||||
).strftime("%Y-%m-%d"),
|
||||
auto_adjust=False,
|
||||
actions=False,
|
||||
)
|
||||
if history.empty:
|
||||
return []
|
||||
history = history.reset_index()
|
||||
date_column = "Date" if "Date" in history.columns else history.columns[0]
|
||||
return [
|
||||
Price(
|
||||
open=float(row["Open"]),
|
||||
close=float(row["Close"]),
|
||||
high=float(row["High"]),
|
||||
low=float(row["Low"]),
|
||||
volume=int(float(row["Volume"])),
|
||||
time=pd.to_datetime(row[date_column]).strftime("%Y-%m-%d"),
|
||||
)
|
||||
for _, row in history.iterrows()
|
||||
]
|
||||
|
||||
|
||||
def _fetch_fd_prices(
|
||||
ticker: str,
|
||||
start_date: str,
|
||||
end_date: str,
|
||||
) -> list[Price]:
|
||||
headers = {"X-API-KEY": _env_required("FINANCIAL_DATASETS_API_KEY")}
|
||||
url = (
|
||||
"https://api.financialdatasets.ai/prices/"
|
||||
f"?ticker={ticker}&interval=day&interval_multiplier=1"
|
||||
f"&start_date={start_date}&end_date={end_date}"
|
||||
)
|
||||
response = _make_api_request(url, headers)
|
||||
return PriceResponse(**response.json()).prices
|
||||
|
||||
|
||||
def _fetch_finnhub_financial_metrics(
|
||||
ticker: str,
|
||||
end_date: str,
|
||||
period: str,
|
||||
) -> list[FinancialMetrics]:
|
||||
client = _get_finnhub_client()
|
||||
financials = client.company_basic_financials(ticker, "all")
|
||||
metric_data = financials.get("metric", {})
|
||||
if not metric_data:
|
||||
return []
|
||||
return [_map_finnhub_metrics(ticker, end_date, period, metric_data)]
|
||||
|
||||
|
||||
def _fetch_fd_financial_metrics(
|
||||
ticker: str,
|
||||
end_date: str,
|
||||
period: str,
|
||||
limit: int,
|
||||
) -> list[FinancialMetrics]:
|
||||
headers = {"X-API-KEY": _env_required("FINANCIAL_DATASETS_API_KEY")}
|
||||
url = (
|
||||
"https://api.financialdatasets.ai/financial-metrics/"
|
||||
f"?ticker={ticker}&report_period_lte={end_date}&limit={limit}&period={period}"
|
||||
)
|
||||
response = _make_api_request(url, headers)
|
||||
return FinancialMetricsResponse(**response.json()).financial_metrics
|
||||
|
||||
|
||||
def _fetch_yfinance_financial_metrics(
|
||||
ticker: str,
|
||||
end_date: str,
|
||||
period: str,
|
||||
) -> list[FinancialMetrics]:
|
||||
info = yf.Ticker(ticker).info or {}
|
||||
shares_outstanding = _coerce_float(info.get("sharesOutstanding"))
|
||||
free_cashflow = _coerce_float(info.get("freeCashflow"))
|
||||
return [
|
||||
FinancialMetrics(
|
||||
ticker=ticker,
|
||||
report_period=end_date,
|
||||
period=period,
|
||||
currency=str(info.get("currency") or "USD"),
|
||||
market_cap=_coerce_float(info.get("marketCap")),
|
||||
enterprise_value=_coerce_float(info.get("enterpriseValue")),
|
||||
price_to_earnings_ratio=_coerce_float(info.get("trailingPE")),
|
||||
price_to_book_ratio=_coerce_float(info.get("priceToBook")),
|
||||
price_to_sales_ratio=_coerce_float(
|
||||
info.get("priceToSalesTrailing12Months"),
|
||||
),
|
||||
enterprise_value_to_ebitda_ratio=_coerce_float(
|
||||
info.get("enterpriseToEbitda"),
|
||||
),
|
||||
enterprise_value_to_revenue_ratio=_coerce_float(
|
||||
info.get("enterpriseToRevenue"),
|
||||
),
|
||||
free_cash_flow_yield=_ratio_or_none(free_cashflow, info.get("marketCap")),
|
||||
peg_ratio=_coerce_float(info.get("pegRatio")),
|
||||
gross_margin=_coerce_float(info.get("grossMargins")),
|
||||
operating_margin=_coerce_float(info.get("operatingMargins")),
|
||||
net_margin=_coerce_float(info.get("profitMargins")),
|
||||
return_on_equity=_coerce_float(info.get("returnOnEquity")),
|
||||
return_on_assets=_coerce_float(info.get("returnOnAssets")),
|
||||
return_on_invested_capital=None,
|
||||
asset_turnover=None,
|
||||
inventory_turnover=None,
|
||||
receivables_turnover=None,
|
||||
days_sales_outstanding=None,
|
||||
operating_cycle=None,
|
||||
working_capital_turnover=None,
|
||||
current_ratio=_coerce_float(info.get("currentRatio")),
|
||||
quick_ratio=_coerce_float(info.get("quickRatio")),
|
||||
cash_ratio=None,
|
||||
operating_cash_flow_ratio=None,
|
||||
debt_to_equity=_coerce_float(info.get("debtToEquity")),
|
||||
debt_to_assets=None,
|
||||
interest_coverage=None,
|
||||
revenue_growth=_coerce_float(info.get("revenueGrowth")),
|
||||
earnings_growth=_coerce_float(
|
||||
info.get("earningsGrowth") or info.get("earningsQuarterlyGrowth"),
|
||||
),
|
||||
book_value_growth=None,
|
||||
earnings_per_share_growth=_coerce_float(
|
||||
info.get("earningsQuarterlyGrowth"),
|
||||
),
|
||||
free_cash_flow_growth=None,
|
||||
operating_income_growth=None,
|
||||
ebitda_growth=None,
|
||||
payout_ratio=_coerce_float(info.get("payoutRatio")),
|
||||
earnings_per_share=_coerce_float(info.get("trailingEps")),
|
||||
book_value_per_share=_coerce_float(info.get("bookValue")),
|
||||
free_cash_flow_per_share=_ratio_or_none(free_cashflow, shares_outstanding),
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
def _fetch_fd_line_items(
|
||||
ticker: str,
|
||||
line_items: list[str],
|
||||
end_date: str,
|
||||
period: str,
|
||||
limit: int,
|
||||
) -> list[LineItem]:
|
||||
headers = {"X-API-KEY": _env_required("FINANCIAL_DATASETS_API_KEY")}
|
||||
body = {
|
||||
"tickers": [ticker],
|
||||
"line_items": line_items,
|
||||
"end_date": end_date,
|
||||
"period": period,
|
||||
"limit": limit,
|
||||
}
|
||||
response = _make_api_request(
|
||||
"https://api.financialdatasets.ai/financials/search/line-items",
|
||||
headers,
|
||||
method="POST",
|
||||
json_data=body,
|
||||
)
|
||||
return LineItemResponse(**response.json()).search_results[:limit]
|
||||
|
||||
|
||||
def _fetch_finnhub_insider_trades(
|
||||
ticker: str,
|
||||
start_date: Optional[str],
|
||||
end_date: str,
|
||||
limit: int,
|
||||
) -> list[InsiderTrade]:
|
||||
client = _get_finnhub_client()
|
||||
from_date = start_date or (
|
||||
datetime.datetime.strptime(end_date, "%Y-%m-%d")
|
||||
- datetime.timedelta(days=365)
|
||||
).strftime("%Y-%m-%d")
|
||||
insider_data = client.stock_insider_transactions(ticker, from_date, end_date)
|
||||
return [
|
||||
_convert_finnhub_insider_trade(ticker, trade)
|
||||
for trade in insider_data.get("data", [])[:limit]
|
||||
]
|
||||
|
||||
|
||||
def _fetch_yfinance_company_news(
|
||||
ticker: str,
|
||||
start_date: Optional[str],
|
||||
end_date: str,
|
||||
limit: int,
|
||||
) -> list[CompanyNews]:
|
||||
news_items = getattr(yf.Ticker(ticker), "news", None) or []
|
||||
start_bound = _normalize_timestamp(pd.to_datetime(start_date)) if start_date else None
|
||||
end_bound = _normalize_timestamp(pd.to_datetime(end_date))
|
||||
results: list[CompanyNews] = []
|
||||
|
||||
for item in news_items:
|
||||
content = item.get("content", item)
|
||||
published = (
|
||||
content.get("pubDate")
|
||||
or content.get("displayTime")
|
||||
or item.get("providerPublishTime")
|
||||
)
|
||||
published_dt = _normalize_timestamp(_parse_news_datetime(published))
|
||||
if published_dt is not None and published_dt > end_bound:
|
||||
continue
|
||||
if start_bound is not None and published_dt is not None and published_dt < start_bound:
|
||||
continue
|
||||
|
||||
url = (
|
||||
_nested_get(content, "canonicalUrl", "url")
|
||||
or content.get("clickThroughUrl")
|
||||
or content.get("url")
|
||||
or item.get("link")
|
||||
)
|
||||
title = content.get("title") or item.get("title")
|
||||
if not title or not url:
|
||||
continue
|
||||
|
||||
results.append(
|
||||
CompanyNews(
|
||||
category=content.get("contentType") or item.get("type"),
|
||||
ticker=ticker,
|
||||
title=title,
|
||||
related=item.get("relatedTickers", [ticker])[0]
|
||||
if item.get("relatedTickers")
|
||||
else ticker,
|
||||
source=_nested_get(content, "provider", "displayName")
|
||||
or item.get("publisher")
|
||||
or "Yahoo Finance",
|
||||
date=published_dt.strftime("%Y-%m-%d") if published_dt else None,
|
||||
url=url,
|
||||
summary=content.get("summary") or item.get("summary"),
|
||||
),
|
||||
)
|
||||
if len(results) >= limit:
|
||||
break
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def _map_finnhub_metrics(
|
||||
ticker: str,
|
||||
end_date: str,
|
||||
period: str,
|
||||
metric_data: dict,
|
||||
) -> FinancialMetrics:
|
||||
"""Map Finnhub metric data to FinancialMetrics model."""
|
||||
return FinancialMetrics(
|
||||
ticker=ticker,
|
||||
report_period=end_date,
|
||||
period=period,
|
||||
currency="USD",
|
||||
market_cap=metric_data.get("marketCapitalization"),
|
||||
enterprise_value=None,
|
||||
price_to_earnings_ratio=metric_data.get("peBasicExclExtraTTM"),
|
||||
price_to_book_ratio=metric_data.get("pbAnnual"),
|
||||
price_to_sales_ratio=metric_data.get("psAnnual"),
|
||||
enterprise_value_to_ebitda_ratio=None,
|
||||
enterprise_value_to_revenue_ratio=None,
|
||||
free_cash_flow_yield=None,
|
||||
peg_ratio=None,
|
||||
gross_margin=metric_data.get("grossMarginTTM"),
|
||||
operating_margin=metric_data.get("operatingMarginTTM"),
|
||||
net_margin=metric_data.get("netProfitMarginTTM"),
|
||||
return_on_equity=metric_data.get("roeTTM"),
|
||||
return_on_assets=metric_data.get("roaTTM"),
|
||||
return_on_invested_capital=metric_data.get("roicTTM"),
|
||||
asset_turnover=metric_data.get("assetTurnoverTTM"),
|
||||
inventory_turnover=metric_data.get("inventoryTurnoverTTM"),
|
||||
receivables_turnover=metric_data.get("receivablesTurnoverTTM"),
|
||||
days_sales_outstanding=None,
|
||||
operating_cycle=None,
|
||||
working_capital_turnover=None,
|
||||
current_ratio=metric_data.get("currentRatioAnnual"),
|
||||
quick_ratio=metric_data.get("quickRatioAnnual"),
|
||||
cash_ratio=None,
|
||||
operating_cash_flow_ratio=None,
|
||||
debt_to_equity=metric_data.get("totalDebt/totalEquityAnnual"),
|
||||
debt_to_assets=None,
|
||||
interest_coverage=None,
|
||||
revenue_growth=metric_data.get("revenueGrowthTTMYoy"),
|
||||
earnings_growth=None,
|
||||
book_value_growth=None,
|
||||
earnings_per_share_growth=metric_data.get("epsGrowthTTMYoy"),
|
||||
free_cash_flow_growth=None,
|
||||
operating_income_growth=None,
|
||||
ebitda_growth=None,
|
||||
payout_ratio=metric_data.get("payoutRatioAnnual"),
|
||||
earnings_per_share=metric_data.get("epsBasicExclExtraItemsTTM"),
|
||||
book_value_per_share=metric_data.get("bookValuePerShareAnnual"),
|
||||
free_cash_flow_per_share=None,
|
||||
)
|
||||
|
||||
|
||||
def _coerce_float(value) -> Optional[float]:
|
||||
try:
|
||||
if value is None:
|
||||
return None
|
||||
return float(value)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
def _ratio_or_none(numerator, denominator) -> Optional[float]:
|
||||
top = _coerce_float(numerator)
|
||||
bottom = _coerce_float(denominator)
|
||||
if top is None or bottom in (None, 0.0):
|
||||
return None
|
||||
return top / bottom
|
||||
|
||||
|
||||
def _nested_get(payload: dict, *keys: str):
|
||||
current = payload
|
||||
for key in keys:
|
||||
if not isinstance(current, dict):
|
||||
return None
|
||||
current = current.get(key)
|
||||
return current
|
||||
|
||||
|
||||
def _parse_news_datetime(value) -> Optional[pd.Timestamp]:
|
||||
if value is None:
|
||||
return None
|
||||
try:
|
||||
if isinstance(value, (int, float)):
|
||||
return pd.to_datetime(int(value), unit="s")
|
||||
return pd.to_datetime(value)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
def _normalize_timestamp(value: Optional[pd.Timestamp]) -> Optional[pd.Timestamp]:
|
||||
if value is None:
|
||||
return None
|
||||
if value.tzinfo is not None:
|
||||
return value.tz_convert(None)
|
||||
return value
|
||||
|
||||
|
||||
def _convert_finnhub_insider_trade(ticker: str, trade: dict) -> InsiderTrade:
|
||||
"""Convert Finnhub insider trade format to InsiderTrade model."""
|
||||
shares_after = trade.get("share", 0)
|
||||
change = trade.get("change", 0)
|
||||
|
||||
return InsiderTrade(
|
||||
ticker=ticker,
|
||||
issuer=None,
|
||||
name=trade.get("name", ""),
|
||||
title=None,
|
||||
is_board_director=None,
|
||||
transaction_date=trade.get("transactionDate", ""),
|
||||
transaction_shares=abs(change),
|
||||
transaction_price_per_share=trade.get("transactionPrice", 0.0),
|
||||
transaction_value=abs(change) * trade.get("transactionPrice", 0.0),
|
||||
shares_owned_before_transaction=(
|
||||
shares_after - change if shares_after and change else None
|
||||
),
|
||||
shares_owned_after_transaction=float(shares_after)
|
||||
if shares_after
|
||||
else None,
|
||||
security_title=None,
|
||||
filing_date=trade.get("filingDate", ""),
|
||||
)
|
||||
|
||||
|
||||
def _fetch_fd_insider_trades(
|
||||
ticker: str,
|
||||
start_date: Optional[str],
|
||||
end_date: str,
|
||||
limit: int,
|
||||
) -> list[InsiderTrade]:
|
||||
headers = {"X-API-KEY": _env_required("FINANCIAL_DATASETS_API_KEY")}
|
||||
url = f"https://api.financialdatasets.ai/insider-trades/?ticker={ticker}&filing_date_lte={end_date}"
|
||||
if start_date:
|
||||
url += f"&filing_date_gte={start_date}"
|
||||
url += f"&limit={limit}"
|
||||
response = _make_api_request(url, headers)
|
||||
return InsiderTradeResponse(**response.json()).insider_trades
|
||||
|
||||
|
||||
def _fetch_finnhub_company_news(
|
||||
ticker: str,
|
||||
start_date: Optional[str],
|
||||
end_date: str,
|
||||
limit: int,
|
||||
) -> list[CompanyNews]:
|
||||
client = _get_finnhub_client()
|
||||
from_date = start_date or (
|
||||
datetime.datetime.strptime(end_date, "%Y-%m-%d")
|
||||
- datetime.timedelta(days=30)
|
||||
).strftime("%Y-%m-%d")
|
||||
news_data = client.company_news(ticker, _from=from_date, to=end_date)
|
||||
return [
|
||||
CompanyNews(
|
||||
ticker=ticker,
|
||||
title=news_item.get("headline", ""),
|
||||
related=news_item.get("related", ""),
|
||||
source=news_item.get("source", ""),
|
||||
date=(
|
||||
datetime.datetime.fromtimestamp(
|
||||
news_item.get("datetime", 0),
|
||||
datetime.timezone.utc,
|
||||
).strftime("%Y-%m-%d")
|
||||
if news_item.get("datetime")
|
||||
else None
|
||||
),
|
||||
url=news_item.get("url", ""),
|
||||
summary=news_item.get("summary", ""),
|
||||
category=news_item.get("category", ""),
|
||||
)
|
||||
for news_item in news_data[:limit]
|
||||
]
|
||||
|
||||
|
||||
def _fetch_fd_company_news(
|
||||
ticker: str,
|
||||
start_date: Optional[str],
|
||||
end_date: str,
|
||||
limit: int,
|
||||
) -> list[CompanyNews]:
|
||||
headers = {"X-API-KEY": _env_required("FINANCIAL_DATASETS_API_KEY")}
|
||||
url = f"https://api.financialdatasets.ai/news/?ticker={ticker}&end_date={end_date}&limit={limit}"
|
||||
if start_date:
|
||||
url += f"&start_date={start_date}"
|
||||
response = _make_api_request(url, headers)
|
||||
return CompanyNewsResponse(**response.json()).news
|
||||
|
||||
|
||||
def _fetch_fd_market_cap_today(ticker: str) -> Optional[float]:
|
||||
headers = {"X-API-KEY": _env_required("FINANCIAL_DATASETS_API_KEY")}
|
||||
url = f"https://api.financialdatasets.ai/company/facts/?ticker={ticker}"
|
||||
response = _make_api_request(url, headers)
|
||||
return CompanyFactsResponse(**response.json()).company_facts.market_cap
|
||||
67
backend/data/provider_utils.py
Normal file
67
backend/data/provider_utils.py
Normal file
@@ -0,0 +1,67 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Shared market symbol normalization helpers."""
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class MarketSymbol:
|
||||
"""Normalized symbol metadata."""
|
||||
|
||||
raw: str
|
||||
canonical: str
|
||||
market: str
|
||||
|
||||
|
||||
def canonical_symbol(symbol: str) -> str:
|
||||
"""Return canonical uppercase symbol for storage and routing."""
|
||||
return (symbol or "").strip().upper()
|
||||
|
||||
|
||||
def normalize_symbol(symbol: str) -> str:
|
||||
"""
|
||||
Normalize symbols across US and exchange-prefixed formats.
|
||||
|
||||
Examples:
|
||||
- sh600519 -> 600519
|
||||
- 600519.SH -> 600519
|
||||
- aapl -> AAPL
|
||||
- hk00700 -> HK00700
|
||||
"""
|
||||
canonical = canonical_symbol(symbol)
|
||||
|
||||
if canonical.startswith(("SH", "SZ", "BJ")) and len(canonical) > 2:
|
||||
candidate = canonical[2:]
|
||||
if candidate.isdigit() and len(candidate) in (5, 6):
|
||||
return candidate
|
||||
|
||||
if "." in canonical:
|
||||
base, suffix = canonical.rsplit(".", 1)
|
||||
if suffix in {"SH", "SZ", "SS", "BJ"} and base.isdigit():
|
||||
return base
|
||||
|
||||
return canonical
|
||||
|
||||
|
||||
def detect_market(symbol: str) -> str:
|
||||
"""Infer market tag from normalized symbol."""
|
||||
normalized = normalize_symbol(symbol)
|
||||
if normalized.startswith("HK") or (
|
||||
normalized.isdigit() and len(normalized) == 5
|
||||
):
|
||||
return "hk"
|
||||
if normalized.isalpha() or (
|
||||
"/" not in normalized and not normalized.isdigit()
|
||||
):
|
||||
return "us"
|
||||
return "cn"
|
||||
|
||||
|
||||
def describe_symbol(symbol: str) -> MarketSymbol:
|
||||
"""Return normalized symbol metadata."""
|
||||
normalized = normalize_symbol(symbol)
|
||||
return MarketSymbol(
|
||||
raw=symbol,
|
||||
canonical=normalized,
|
||||
market=detect_market(normalized),
|
||||
)
|
||||
387
backend/data/ret_data_updater.py
Normal file
387
backend/data/ret_data_updater.py
Normal file
@@ -0,0 +1,387 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Automatic Incremental Historical Data Update Module
|
||||
|
||||
Features:
|
||||
1. Fetch stock historical data from configured API (Finnhub or Financial Datasets)
|
||||
2. Incrementally update CSV files in ret_data directory
|
||||
3. Automatically detect last update date, only download new data
|
||||
4. Calculate returns (ret)
|
||||
5. Support batch updates for multiple stocks
|
||||
"""
|
||||
|
||||
# flake8: noqa: E501
|
||||
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
import exchange_calendars as xcals
|
||||
import pandas as pd
|
||||
import pandas_market_calendars as mcal
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from backend.config.data_config import (
|
||||
get_config,
|
||||
)
|
||||
from backend.tools.data_tools import get_prices, prices_to_df
|
||||
|
||||
# Add project root directory to path
|
||||
BASE_DIR = Path(__file__).resolve().parents[2]
|
||||
if str(BASE_DIR) not in sys.path:
|
||||
sys.path.append(str(BASE_DIR))
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s [%(levelname)s] %(message)s",
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class DataUpdater:
|
||||
"""Data updater"""
|
||||
|
||||
data_dir: Path
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
data_dir: str = None,
|
||||
start_date: str = "2022-01-01",
|
||||
):
|
||||
"""
|
||||
Initialize data updater
|
||||
|
||||
Args:
|
||||
data_dir: Data storage directory, defaults to backend/data/ret_data
|
||||
start_date: Historical data start date (YYYY-MM-DD)
|
||||
"""
|
||||
# Get config from centralized source
|
||||
config = get_config()
|
||||
self.data_source = config.source
|
||||
self.api_key = config.api_key
|
||||
|
||||
# Set data directory
|
||||
if data_dir is None:
|
||||
self.data_dir = BASE_DIR / "backend" / "data" / "ret_data"
|
||||
else:
|
||||
self.data_dir = Path(data_dir)
|
||||
|
||||
# Ensure directory exists
|
||||
self.data_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
self.start_date = start_date
|
||||
|
||||
# Initialize Finnhub client if needed
|
||||
if self.data_source == "finnhub":
|
||||
import finnhub
|
||||
|
||||
self.client = finnhub.Client(api_key=self.api_key)
|
||||
logger.info("Finnhub client initialized")
|
||||
else:
|
||||
self.client = None
|
||||
logger.info("Financial Datasets API configured")
|
||||
|
||||
def get_trading_dates(self, start_date: str, end_date: str) -> List[str]:
|
||||
"""Get US stock market trading date sequence."""
|
||||
try:
|
||||
if mcal is not None:
|
||||
nyse = mcal.get_calendar("NYSE")
|
||||
trading_dates = nyse.valid_days(
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
return [date.strftime("%Y-%m-%d") for date in trading_dates]
|
||||
|
||||
elif xcals is not None:
|
||||
nyse = xcals.get_calendar("XNYS")
|
||||
trading_dates = nyse.sessions_in_range(start_date, end_date)
|
||||
return [date.strftime("%Y-%m-%d") for date in trading_dates]
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Failed to get US trading calendar, using business days: {e}",
|
||||
)
|
||||
|
||||
# Fallback to simple business day method
|
||||
date_range = pd.date_range(start_date, end_date, freq="B")
|
||||
return [date.strftime("%Y-%m-%d") for date in date_range]
|
||||
|
||||
def get_last_date_from_csv(self, ticker: str) -> Optional[datetime]:
|
||||
"""Get last data date from CSV file."""
|
||||
csv_path = self.data_dir / f"{ticker}.csv"
|
||||
|
||||
if not csv_path.exists():
|
||||
logger.info(f"{ticker}.csv does not exist, will create new file")
|
||||
return None
|
||||
|
||||
try:
|
||||
df = pd.read_csv(csv_path)
|
||||
if df.empty or "time" not in df.columns:
|
||||
return None
|
||||
|
||||
last_date_str = df["time"].iloc[-1]
|
||||
last_date = datetime.strptime(last_date_str, "%Y-%m-%d")
|
||||
logger.info(f"{ticker} last data date: {last_date_str}")
|
||||
return last_date
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to read {ticker}.csv: {e}")
|
||||
return None
|
||||
|
||||
def fetch_data_from_api(
|
||||
self,
|
||||
ticker: str,
|
||||
start_date: datetime,
|
||||
end_date: datetime,
|
||||
) -> Optional[pd.DataFrame]:
|
||||
"""Fetch data from configured API."""
|
||||
start_date_str = start_date.strftime("%Y-%m-%d")
|
||||
end_date_str = end_date.strftime("%Y-%m-%d")
|
||||
|
||||
logger.info(
|
||||
f"Fetching {ticker} data from {self.data_source}: {start_date_str} to {end_date_str}",
|
||||
)
|
||||
|
||||
prices = get_prices(
|
||||
ticker=ticker,
|
||||
start_date=start_date_str,
|
||||
end_date=end_date_str,
|
||||
)
|
||||
|
||||
if not prices:
|
||||
logger.warning(f"{ticker} no data returned from API")
|
||||
return None
|
||||
|
||||
# Convert to DataFrame
|
||||
df = prices_to_df(prices)
|
||||
df = df.reset_index()
|
||||
df["time"] = df["Date"].dt.strftime("%Y-%m-%d")
|
||||
|
||||
# Calculate returns (next day return)
|
||||
df["ret"] = df["close"].pct_change().shift(-1)
|
||||
|
||||
# Select needed columns
|
||||
df = df[["open", "close", "high", "low", "volume", "time", "ret"]]
|
||||
|
||||
logger.info(f"Successfully fetched {ticker} data: {len(df)} records")
|
||||
return df
|
||||
|
||||
def merge_and_save(self, ticker: str, new_data: pd.DataFrame) -> bool:
|
||||
"""Merge old and new data and save."""
|
||||
csv_path = self.data_dir / f"{ticker}.csv"
|
||||
|
||||
try:
|
||||
if csv_path.exists():
|
||||
old_data = pd.read_csv(csv_path)
|
||||
logger.info(f"{ticker} existing data: {len(old_data)} records")
|
||||
|
||||
# Merge and deduplicate
|
||||
combined = pd.concat([old_data, new_data], ignore_index=True)
|
||||
combined = combined.drop_duplicates(
|
||||
subset=["time"],
|
||||
keep="last",
|
||||
)
|
||||
combined = combined.sort_values("time").reset_index(drop=True)
|
||||
|
||||
# Recalculate returns
|
||||
combined["ret"] = combined["close"].pct_change().shift(-1)
|
||||
|
||||
logger.info(f"{ticker} merged data: {len(combined)} records")
|
||||
else:
|
||||
combined = new_data
|
||||
logger.info(f"{ticker} new file: {len(combined)} records")
|
||||
|
||||
combined.to_csv(csv_path, index=False)
|
||||
logger.info(f"{ticker} data saved to: {csv_path}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to save {ticker} data: {e}")
|
||||
return False
|
||||
|
||||
def update_ticker(
|
||||
self,
|
||||
ticker: str,
|
||||
force_full_update: bool = False,
|
||||
) -> bool:
|
||||
"""Update data for a single stock."""
|
||||
logger.info(f"{'='*60}")
|
||||
logger.info(f"Starting update for {ticker}")
|
||||
logger.info(f"{'='*60}")
|
||||
|
||||
# Determine start date
|
||||
if force_full_update:
|
||||
start_date = datetime.strptime(self.start_date, "%Y-%m-%d")
|
||||
logger.info(f"Force full update, start date: {start_date.date()}")
|
||||
else:
|
||||
last_date = self.get_last_date_from_csv(ticker)
|
||||
if last_date:
|
||||
start_date = last_date + timedelta(days=1)
|
||||
logger.info(
|
||||
f"Incremental update, start date: {start_date.date()}",
|
||||
)
|
||||
else:
|
||||
start_date = datetime.strptime(self.start_date, "%Y-%m-%d")
|
||||
logger.info(f"First update, start date: {start_date.date()}")
|
||||
|
||||
end_date = datetime.now()
|
||||
|
||||
if start_date.date() >= end_date.date():
|
||||
logger.info(f"{ticker} data is up to date, no update needed")
|
||||
return True
|
||||
|
||||
new_data = self.fetch_data_from_api(ticker, start_date, end_date)
|
||||
|
||||
if new_data is None or new_data.empty:
|
||||
days_diff = (end_date - start_date).days
|
||||
if days_diff <= 3:
|
||||
logger.info(
|
||||
f"{ticker} has no new data (may be weekend/holiday)",
|
||||
)
|
||||
return True
|
||||
else:
|
||||
logger.warning(f"{ticker} has no new data")
|
||||
return False
|
||||
|
||||
success = self.merge_and_save(ticker, new_data)
|
||||
|
||||
if success:
|
||||
logger.info(f"{ticker} update completed")
|
||||
else:
|
||||
logger.error(f"{ticker} update failed")
|
||||
|
||||
return success
|
||||
|
||||
def update_all_tickers(
|
||||
self,
|
||||
tickers: List[str],
|
||||
force_full_update: bool = False,
|
||||
) -> Dict[str, bool]:
|
||||
"""Batch update multiple stocks."""
|
||||
results = {}
|
||||
|
||||
logger.info(f"{'='*60}")
|
||||
logger.info(f"Starting batch update for {len(tickers)} stocks")
|
||||
logger.info(f"Stock list: {', '.join(tickers)}")
|
||||
logger.info(f"{'='*60}")
|
||||
|
||||
for i, ticker in enumerate(tickers, 1):
|
||||
logger.info(f"[{i}/{len(tickers)}] Processing {ticker}")
|
||||
results[ticker] = self.update_ticker(ticker, force_full_update)
|
||||
|
||||
# API rate limiting
|
||||
if i < len(tickers):
|
||||
time.sleep(1)
|
||||
|
||||
# Print summary
|
||||
logger.info(f"{'='*60}")
|
||||
logger.info("Update Summary")
|
||||
logger.info(f"{'='*60}")
|
||||
|
||||
success_count = sum(results.values())
|
||||
fail_count = len(results) - success_count
|
||||
|
||||
logger.info(f"Success: {success_count}")
|
||||
logger.info(f"Failed: {fail_count}")
|
||||
|
||||
if fail_count > 0:
|
||||
failed_tickers = [t for t, s in results.items() if not s]
|
||||
logger.warning(f"Failed stocks: {', '.join(failed_tickers)}")
|
||||
|
||||
logger.info(f"{'='*60}\n")
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def main():
|
||||
"""Command line entry point"""
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Automatically update stock historical data",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tickers",
|
||||
type=str,
|
||||
help="Stock ticker list (comma-separated), e.g.: AAPL,MSFT,GOOGL",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--data-dir",
|
||||
type=str,
|
||||
help="Data storage directory (default: backend/data/ret_data)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--start-date",
|
||||
type=str,
|
||||
default="2022-01-01",
|
||||
help="Historical data start date (YYYY-MM-DD, default: 2022-01-01)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--force",
|
||||
action="store_true",
|
||||
help="Force full update (re-download all data)",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Load environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Validate API key is available
|
||||
try:
|
||||
config = get_config()
|
||||
logger.info(f"Using data source: {config.source}")
|
||||
except ValueError as e:
|
||||
logger.error(str(e))
|
||||
sys.exit(1)
|
||||
|
||||
# Get stock list
|
||||
if args.tickers:
|
||||
tickers = [t.strip().upper() for t in args.tickers.split(",")]
|
||||
else:
|
||||
tickers_env = os.getenv("TICKERS", "")
|
||||
if tickers_env:
|
||||
tickers = [t.strip().upper() for t in tickers_env.split(",")]
|
||||
else:
|
||||
logger.error("Stock list not provided")
|
||||
logger.error(
|
||||
"Please set via --tickers parameter or TICKERS environment variable",
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
# Create updater
|
||||
updater = DataUpdater(
|
||||
data_dir=args.data_dir,
|
||||
start_date=args.start_date,
|
||||
)
|
||||
|
||||
# Execute update
|
||||
try:
|
||||
results = updater.update_all_tickers(
|
||||
tickers,
|
||||
force_full_update=args.force,
|
||||
)
|
||||
except Exception:
|
||||
# API error (e.g., weekend/holiday with no data)
|
||||
sys.exit(1)
|
||||
|
||||
# Return status code
|
||||
success_count = sum(results.values())
|
||||
if success_count == len(results):
|
||||
logger.info("All stocks updated successfully!")
|
||||
sys.exit(0)
|
||||
elif success_count == 0:
|
||||
logger.warning("All stocks have no new data (may be weekend/holiday)")
|
||||
sys.exit(0)
|
||||
else:
|
||||
logger.warning("Some stocks failed to update, but will continue")
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
50
backend/data/schema.py
Normal file
50
backend/data/schema.py
Normal file
@@ -0,0 +1,50 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Compatibility schema bridge.
|
||||
|
||||
This module preserves the legacy ``backend.data.schema`` import path while
|
||||
delegating the actual schema definitions to ``shared.schema``. Keeping one
|
||||
canonical DTO set avoids drift as the monolith is split into service-specific
|
||||
packages.
|
||||
"""
|
||||
|
||||
from shared.schema import (
|
||||
AgentStateData,
|
||||
AgentStateMetadata,
|
||||
AnalystSignal,
|
||||
CompanyFacts,
|
||||
CompanyFactsResponse,
|
||||
CompanyNews,
|
||||
CompanyNewsResponse,
|
||||
FinancialMetrics,
|
||||
FinancialMetricsResponse,
|
||||
InsiderTrade,
|
||||
InsiderTradeResponse,
|
||||
LineItem,
|
||||
LineItemResponse,
|
||||
Portfolio,
|
||||
Position,
|
||||
Price,
|
||||
PriceResponse,
|
||||
TickerAnalysis,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"Price",
|
||||
"PriceResponse",
|
||||
"FinancialMetrics",
|
||||
"FinancialMetricsResponse",
|
||||
"LineItem",
|
||||
"LineItemResponse",
|
||||
"InsiderTrade",
|
||||
"InsiderTradeResponse",
|
||||
"CompanyNews",
|
||||
"CompanyNewsResponse",
|
||||
"CompanyFacts",
|
||||
"CompanyFactsResponse",
|
||||
"Position",
|
||||
"Portfolio",
|
||||
"AnalystSignal",
|
||||
"TickerAnalysis",
|
||||
"AgentStateData",
|
||||
"AgentStateMetadata",
|
||||
]
|
||||
Reference in New Issue
Block a user