Files
evotraders/backend/tools/data_tools.py
cillin 12de93aa30 feat: initial commit - EvoTraders project
量化交易多智能体系统,包含:
- 分析师、投资组合经理、风险经理等智能体
- 股票分析、投资组合管理、风险控制工具
- React 前端界面
- FastAPI 后端服务

Co-Authored-By: Claude <noreply@anthropic.com>
2026-03-13 04:34:06 +08:00

743 lines
22 KiB
Python

# -*- coding: utf-8 -*-
# flake8: noqa: E501
# pylint: disable=C0301
"""
Data fetching tools for financial data.
All functions use centralized data source configuration from data_config.py.
The data source is automatically determined based on available API keys:
- Priority: FINNHUB_API_KEY > FINANCIAL_DATASETS_API_KEY
"""
import datetime
import time
import finnhub
import pandas as pd
import pandas_market_calendars as mcal
import requests
from backend.config.data_config import (
get_config,
get_api_key,
)
from backend.data.cache import get_cache
from backend.data.schema import (
CompanyFactsResponse,
CompanyNews,
CompanyNewsResponse,
FinancialMetrics,
FinancialMetricsResponse,
InsiderTrade,
InsiderTradeResponse,
LineItem,
LineItemResponse,
Price,
PriceResponse,
)
from backend.utils.settlement import logger
# Global cache instance
_cache = get_cache()
def get_last_tradeday(date: str) -> str:
"""
Get the previous trading day for the specified date
Args:
date: Date string (YYYY-MM-DD)
Returns:
Previous trading day date string (YYYY-MM-DD)
"""
current_date = datetime.datetime.strptime(date, "%Y-%m-%d")
_NYSE_CALENDAR = mcal.get_calendar("NYSE")
if _NYSE_CALENDAR is not None:
# Get trading days before current date
# Go back 90 days from current date to get all trading days
start_search = current_date - datetime.timedelta(days=90)
if hasattr(_NYSE_CALENDAR, "valid_days"):
# pandas_market_calendars
trading_dates = _NYSE_CALENDAR.valid_days(
start_date=start_search.strftime("%Y-%m-%d"),
end_date=current_date.strftime("%Y-%m-%d"),
)
else:
# exchange_calendars
trading_dates = _NYSE_CALENDAR.sessions_in_range(
start_search.strftime("%Y-%m-%d"),
current_date.strftime("%Y-%m-%d"),
)
# Convert to date list
trading_dates_list = [
pd.Timestamp(d).strftime("%Y-%m-%d") for d in trading_dates
]
# Find current date position in the list
if date in trading_dates_list:
# If current date is a trading day, return previous trading day
idx = trading_dates_list.index(date)
if idx > 0:
return trading_dates_list[idx - 1]
else:
# If it's the first trading day, go back further
prev_date = current_date - datetime.timedelta(days=1)
return get_last_tradeday(prev_date.strftime("%Y-%m-%d"))
else:
# If current date is not a trading day, return the nearest trading day
if trading_dates_list:
return trading_dates_list[-1]
return prev_date.strftime("%Y-%m-%d")
def _make_api_request(
url: str,
headers: dict,
method: str = "GET",
json_data: dict = None,
max_retries: int = 3,
) -> requests.Response:
"""
Make an API request with rate limiting handling and moderate backoff.
Args:
url: The URL to request
headers: Headers to include in the request
method: HTTP method (GET or POST)
json_data: JSON data for POST requests
max_retries: Maximum number of retries (default: 3)
Returns:
requests.Response: The response object
Raises:
Exception: If the request fails with a non-429 error
"""
for attempt in range(max_retries + 1): # +1 for initial attempt
if method.upper() == "POST":
response = requests.post(url, headers=headers, json=json_data)
else:
response = requests.get(url, headers=headers)
if response.status_code == 429 and attempt < max_retries:
# Linear backoff: 60s, 90s, 120s, 150s...
delay = 60 + (30 * attempt)
print(
f"Rate limited (429). Attempt {attempt + 1}/{max_retries + 1}. Waiting {delay}s before retrying...",
)
time.sleep(delay)
continue
# Return the response (whether success, other errors, or final 429)
return response
def get_prices(
ticker: str,
start_date: str,
end_date: str,
) -> list[Price]:
"""
Fetch price data from cache or API.
Uses centralized data source configuration (FINNHUB_API_KEY prioritized).
Args:
ticker: Stock ticker symbol
start_date: Start date (YYYY-MM-DD)
end_date: End date (YYYY-MM-DD)
Returns:
list[Price]: List of Price objects
"""
config = get_config()
data_source = config.source
api_key = config.api_key
# Create a cache key that includes all parameters to ensure exact matches
cache_key = f"{ticker}_{start_date}_{end_date}_{data_source}"
# Check cache first - simple exact match
if cached_data := _cache.get_prices(cache_key):
return [Price(**price) for price in cached_data]
prices = []
if data_source == "finnhub":
# Use Finnhub API
client = finnhub.Client(api_key=api_key)
# Convert dates to timestamps
start_timestamp = int(
datetime.datetime.strptime(start_date, "%Y-%m-%d").timestamp(),
)
end_timestamp = int(
(
datetime.datetime.strptime(end_date, "%Y-%m-%d")
+ datetime.timedelta(days=1)
).timestamp(),
)
# Fetch candle data from Finnhub
candles = client.stock_candles(
ticker,
"D",
start_timestamp,
end_timestamp,
)
# Convert to Price objects
for i in range(len(candles["t"])):
price = Price(
open=candles["o"][i],
close=candles["c"][i],
high=candles["h"][i],
low=candles["l"][i],
volume=int(candles["v"][i]),
time=datetime.datetime.fromtimestamp(candles["t"][i]).strftime(
"%Y-%m-%d",
),
)
prices.append(price)
else: # financial_datasets
# Use Financial Datasets API
headers = {"X-API-KEY": api_key}
url = f"https://api.financialdatasets.ai/prices/?ticker={ticker}&interval=day&interval_multiplier=1&start_date={start_date}&end_date={end_date}"
response = _make_api_request(url, headers)
if response.status_code != 200:
raise ValueError(
f"Error fetching data: {ticker} - {response.status_code} - {response.text}",
)
# Parse response with Pydantic model
price_response = PriceResponse(**response.json())
prices = price_response.prices
if not prices:
return []
# Cache the results using the comprehensive cache key
_cache.set_prices(cache_key, [p.model_dump() for p in prices])
return prices
def get_financial_metrics(
ticker: str,
end_date: str,
period: str = "ttm",
limit: int = 10,
) -> list[FinancialMetrics]:
"""
Fetch financial metrics from cache or API.
Uses centralized data source configuration (FINNHUB_API_KEY prioritized).
Args:
ticker: Stock ticker symbol
end_date: End date (YYYY-MM-DD)
period: Period type (default: "ttm")
limit: Number of records to fetch
Returns:
list[FinancialMetrics]: List of financial metrics
"""
config = get_config()
data_source = config.source
api_key = config.api_key
# Create a cache key that includes all parameters to ensure exact matches
cache_key = f"{ticker}_{period}_{end_date}_{limit}_{data_source}"
# Check cache first - simple exact match
if cached_data := _cache.get_financial_metrics(cache_key):
return [FinancialMetrics(**metric) for metric in cached_data]
financial_metrics = []
if data_source == "finnhub":
# Use Finnhub API - Basic Financials
client = finnhub.Client(api_key=api_key)
# Fetch basic financials from Finnhub
# metric='all' returns all available metrics
financials = client.company_basic_financials(ticker, "all")
if not financials or "metric" not in financials:
return []
# Finnhub returns {series: {...}, metric: {...}, metricType: ..., symbol: ...}
# We need to create a FinancialMetrics object from this
metric_data = financials.get("metric", {})
# Create a FinancialMetrics object with available data
metric = _map_finnhub_metrics(ticker, end_date, period, metric_data)
financial_metrics = [metric]
else: # financial_datasets
# Use Financial Datasets API
headers = {"X-API-KEY": api_key}
url = f"https://api.financialdatasets.ai/financial-metrics/?ticker={ticker}&report_period_lte={end_date}&limit={limit}&period={period}"
response = _make_api_request(url, headers)
if response.status_code != 200:
raise ValueError(
f"Error fetching data: {ticker} - {response.status_code} - {response.text}",
)
# Parse response with Pydantic model
metrics_response = FinancialMetricsResponse(**response.json())
financial_metrics = metrics_response.financial_metrics
if not financial_metrics:
return []
# Cache the results as dicts using the comprehensive cache key
_cache.set_financial_metrics(
cache_key,
[m.model_dump() for m in financial_metrics],
)
return financial_metrics
def _map_finnhub_metrics(
ticker: str,
end_date: str,
period: str,
metric_data: dict,
) -> FinancialMetrics:
"""Map Finnhub metric data to FinancialMetrics model."""
return FinancialMetrics(
ticker=ticker,
report_period=end_date,
period=period,
currency="USD",
market_cap=metric_data.get("marketCapitalization"),
enterprise_value=None,
price_to_earnings_ratio=metric_data.get("peBasicExclExtraTTM"),
price_to_book_ratio=metric_data.get("pbAnnual"),
price_to_sales_ratio=metric_data.get("psAnnual"),
enterprise_value_to_ebitda_ratio=None,
enterprise_value_to_revenue_ratio=None,
free_cash_flow_yield=None,
peg_ratio=None,
gross_margin=metric_data.get("grossMarginTTM"),
operating_margin=metric_data.get("operatingMarginTTM"),
net_margin=metric_data.get("netProfitMarginTTM"),
return_on_equity=metric_data.get("roeTTM"),
return_on_assets=metric_data.get("roaTTM"),
return_on_invested_capital=metric_data.get("roicTTM"),
asset_turnover=metric_data.get("assetTurnoverTTM"),
inventory_turnover=metric_data.get("inventoryTurnoverTTM"),
receivables_turnover=metric_data.get("receivablesTurnoverTTM"),
days_sales_outstanding=None,
operating_cycle=None,
working_capital_turnover=None,
current_ratio=metric_data.get("currentRatioAnnual"),
quick_ratio=metric_data.get("quickRatioAnnual"),
cash_ratio=None,
operating_cash_flow_ratio=None,
debt_to_equity=metric_data.get("totalDebt/totalEquityAnnual"),
debt_to_assets=None,
interest_coverage=None,
revenue_growth=metric_data.get("revenueGrowthTTMYoy"),
earnings_growth=None,
book_value_growth=None,
earnings_per_share_growth=metric_data.get("epsGrowthTTMYoy"),
free_cash_flow_growth=None,
operating_income_growth=None,
ebitda_growth=None,
payout_ratio=metric_data.get("payoutRatioAnnual"),
earnings_per_share=metric_data.get("epsBasicExclExtraItemsTTM"),
book_value_per_share=metric_data.get("bookValuePerShareAnnual"),
free_cash_flow_per_share=None,
)
def search_line_items(
ticker: str,
line_items: list[str],
end_date: str,
period: str = "ttm",
limit: int = 10,
) -> list[LineItem]:
"""
Fetch line items from Financial Datasets API (only supported source).
Returns empty list on API errors to allow graceful degradation.
"""
try:
api_key = get_api_key()
headers = {"X-API-KEY": api_key}
url = "https://api.financialdatasets.ai/financials/search/line-items"
body = {
"tickers": [ticker],
"line_items": line_items,
"end_date": end_date,
"period": period,
"limit": limit,
}
response = _make_api_request(
url,
headers,
method="POST",
json_data=body,
)
if response.status_code != 200:
logger.info(
f"Warning: Failed to fetch line items for {ticker}: "
f"{response.status_code} - {response.text}",
)
return []
data = response.json()
response_model = LineItemResponse(**data)
search_results = response_model.search_results
if not search_results:
return []
return search_results[:limit]
except Exception as e:
logger.info(
f"Warning: Exception while fetching line items for {ticker}: {str(e)}",
)
return []
def _fetch_finnhub_insider_trades(
ticker: str,
start_date: str | None,
end_date: str,
limit: int,
api_key: str,
) -> list[InsiderTrade]:
"""Fetch insider trades from Finnhub API."""
client = finnhub.Client(api_key=api_key)
from_date = start_date or (
datetime.datetime.strptime(end_date, "%Y-%m-%d")
- datetime.timedelta(days=365)
).strftime("%Y-%m-%d")
insider_data = client.stock_insider_transactions(
ticker,
from_date,
end_date,
)
if not insider_data or "data" not in insider_data:
return []
return [
_convert_finnhub_insider_trade(ticker, trade)
for trade in insider_data["data"][:limit]
]
def _fetch_fd_insider_trades(
ticker: str,
start_date: str | None,
end_date: str,
limit: int,
api_key: str,
) -> list[InsiderTrade]:
"""Fetch insider trades from Financial Datasets API."""
headers = {"X-API-KEY": api_key}
all_trades = []
current_end_date = end_date
while True:
url = f"https://api.financialdatasets.ai/insider-trades/?ticker={ticker}&filing_date_lte={current_end_date}"
if start_date:
url += f"&filing_date_gte={start_date}"
url += f"&limit={limit}"
response = _make_api_request(url, headers)
if response.status_code != 200:
raise ValueError(
f"Error fetching data: {ticker} - {response.status_code} - {response.text}",
)
data = response.json()
response_model = InsiderTradeResponse(**data)
insider_trades = response_model.insider_trades
if not insider_trades:
break
all_trades.extend(insider_trades)
if not start_date or len(insider_trades) < limit:
break
current_end_date = min(
trade.filing_date for trade in insider_trades
).split("T")[0]
if current_end_date <= start_date:
break
return all_trades
def get_insider_trades(
ticker: str,
end_date: str,
start_date: str | None = None,
limit: int = 1000,
) -> list[InsiderTrade]:
"""Fetch insider trades from cache or API."""
config = get_config()
data_source = config.source
api_key = config.api_key
cache_key = (
f"{ticker}_{start_date or 'none'}_{end_date}_{limit}_{data_source}"
)
if cached_data := _cache.get_insider_trades(cache_key):
return [InsiderTrade(**trade) for trade in cached_data]
if data_source == "finnhub":
all_trades = _fetch_finnhub_insider_trades(
ticker,
start_date,
end_date,
limit,
api_key,
)
else:
all_trades = _fetch_fd_insider_trades(
ticker,
start_date,
end_date,
limit,
api_key,
)
if not all_trades:
return []
_cache.set_insider_trades(
cache_key,
[trade.model_dump() for trade in all_trades],
)
return all_trades
def _fetch_finnhub_company_news(
ticker: str,
start_date: str | None,
end_date: str,
limit: int,
api_key: str,
) -> list[CompanyNews]:
"""Fetch company news from Finnhub API."""
client = finnhub.Client(api_key=api_key)
from_date = start_date or (
datetime.datetime.strptime(end_date, "%Y-%m-%d")
- datetime.timedelta(days=30)
).strftime("%Y-%m-%d")
news_data = client.company_news(ticker, _from=from_date, to=end_date)
if not news_data:
return []
all_news = []
for news_item in news_data[:limit]:
company_news = CompanyNews(
ticker=ticker,
title=news_item.get("headline", ""),
related=news_item.get("related", ""),
source=news_item.get("source", ""),
date=(
datetime.datetime.fromtimestamp(
news_item.get("datetime", 0),
datetime.timezone.utc,
).strftime("%Y-%m-%d")
if news_item.get("datetime")
else None
),
url=news_item.get("url", ""),
summary=news_item.get("summary", ""),
category=news_item.get("category", ""),
)
all_news.append(company_news)
return all_news
def _fetch_fd_company_news(
ticker: str,
start_date: str | None,
end_date: str,
limit: int,
api_key: str,
) -> list[CompanyNews]:
"""Fetch company news from Financial Datasets API."""
headers = {"X-API-KEY": api_key}
all_news = []
current_end_date = end_date
while True:
url = f"https://api.financialdatasets.ai/news/?ticker={ticker}&end_date={current_end_date}"
if start_date:
url += f"&start_date={start_date}"
url += f"&limit={limit}"
response = _make_api_request(url, headers)
if response.status_code != 200:
raise ValueError(
f"Error fetching data: {ticker} - {response.status_code} - {response.text}",
)
data = response.json()
response_model = CompanyNewsResponse(**data)
company_news = response_model.news
if not company_news:
break
all_news.extend(company_news)
if not start_date or len(company_news) < limit:
break
current_end_date = min(
news.date for news in company_news if news.date is not None
).split("T")[0]
if current_end_date <= start_date:
break
return all_news
def get_company_news(
ticker: str,
end_date: str,
start_date: str | None = None,
limit: int = 1000,
) -> list[CompanyNews]:
"""Fetch company news from cache or API."""
config = get_config()
data_source = config.source
api_key = config.api_key
cache_key = (
f"{ticker}_{start_date or 'none'}_{end_date}_{limit}_{data_source}"
)
if cached_data := _cache.get_company_news(cache_key):
return [CompanyNews(**news) for news in cached_data]
if data_source == "finnhub":
all_news = _fetch_finnhub_company_news(
ticker,
start_date,
end_date,
limit,
api_key,
)
else:
all_news = _fetch_fd_company_news(
ticker,
start_date,
end_date,
limit,
api_key,
)
if not all_news:
return []
_cache.set_company_news(
cache_key,
[news.model_dump() for news in all_news],
)
return all_news
def _convert_finnhub_insider_trade(ticker: str, trade: dict) -> InsiderTrade:
"""Convert Finnhub insider trade format to InsiderTrade model."""
shares_after = trade.get("share", 0)
change = trade.get("change", 0)
return InsiderTrade(
ticker=ticker,
issuer=None,
name=trade.get("name", ""),
title=None,
is_board_director=None,
transaction_date=trade.get("transactionDate", ""),
transaction_shares=abs(change),
transaction_price_per_share=trade.get("transactionPrice", 0.0),
transaction_value=abs(change) * trade.get("transactionPrice", 0.0),
shares_owned_before_transaction=(
shares_after - change if shares_after and change else None
),
shares_owned_after_transaction=float(shares_after)
if shares_after
else None,
security_title=None,
filing_date=trade.get("filingDate", ""),
)
def get_market_cap(ticker: str, end_date: str) -> float | None:
"""Fetch market cap from the API. Finnhub values are converted from millions."""
config = get_config()
data_source = config.source
api_key = config.api_key
# For today's date, use company facts API
if end_date == datetime.datetime.now().strftime("%Y-%m-%d"):
headers = {"X-API-KEY": api_key}
url = (
f"https://api.financialdatasets.ai/company/facts/?ticker={ticker}"
)
response = _make_api_request(url, headers)
if response.status_code != 200:
return None
data = response.json()
response_model = CompanyFactsResponse(**data)
return response_model.company_facts.market_cap
financial_metrics = get_financial_metrics(ticker, end_date)
if not financial_metrics:
return None
market_cap = financial_metrics[0].market_cap
if not market_cap:
return None
# Finnhub returns market cap in millions
if data_source == "finnhub":
market_cap = market_cap * 1_000_000
return market_cap
def prices_to_df(prices: list[Price]) -> pd.DataFrame:
"""Convert prices to a DataFrame."""
df = pd.DataFrame([p.model_dump() for p in prices])
df["Date"] = pd.to_datetime(df["time"])
df.set_index("Date", inplace=True)
numeric_cols = ["open", "close", "high", "low", "volume"]
for col in numeric_cols:
df[col] = pd.to_numeric(df[col], errors="coerce")
df.sort_index(inplace=True)
return df