feat: initial commit - EvoTraders project

量化交易多智能体系统,包含:
- 分析师、投资组合经理、风险经理等智能体
- 股票分析、投资组合管理、风险控制工具
- React 前端界面
- FastAPI 后端服务

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2026-03-13 04:34:06 +08:00
commit 12de93aa30
115 changed files with 29304 additions and 0 deletions

Binary file not shown.
1 ����Mac OS X ���� ���2���q������£��������������������������������������ATTR�������£���˜��� ������������������˜��� ��com.apple.provenance��Äî §¢(â[

Binary file not shown.
1 ����Mac OS X ���� ���2���q������£��������������������������������������ATTR�������£���˜��� ������������������˜��� ��com.apple.provenance��Äî §¢(â[

Binary file not shown.
1 ����Mac OS X ���� ���2���q������£��������������������������������������ATTR�������£���˜��� ������������������˜��� ��com.apple.provenance��Äî §¢(â[

Binary file not shown.
1 ����Mac OS X ���� ���2���q������£��������������������������������������ATTR�������£���˜��� ������������������˜��� ��com.apple.provenance��Äî §¢(â[

Binary file not shown.
1 ����Mac OS X ���� ���2���q������£��������������������������������������ATTR�������£���˜��� ������������������˜��� ��com.apple.provenance��Äî §¢(â[

Binary file not shown.
1 ����Mac OS X ���� ���2���q������£��������������������������������������ATTR�������£���˜��� ������������������˜��� ��com.apple.provenance��Äî §¢(â[

Binary file not shown.
1 ����Mac OS X ���� ���2���q������£��������������������������������������ATTR�������£���˜��� ������������������˜��� ��com.apple.provenance��Äî §¢(â[

6
backend/data/__init__.py Normal file
View File

@@ -0,0 +1,6 @@
# -*- coding: utf-8 -*-
from backend.data.historical_price_manager import HistoricalPriceManager
from backend.data.mock_price_manager import MockPriceManager
from backend.data.polling_price_manager import PollingPriceManager
__all__ = ["MockPriceManager", "PollingPriceManager", "HistoricalPriceManager"]

107
backend/data/cache.py Normal file
View File

@@ -0,0 +1,107 @@
# -*- coding: utf-8 -*-
from typing_extensions import Any
class Cache:
"""In-memory cache for API responses."""
def __init__(self):
self._prices_cache = {}
self._financial_metrics_cache = {}
self._line_items_cache = {}
self._insider_trades_cache = {}
self._company_news_cache = {}
def _merge_data(
self,
existing: list[dict] | None,
new_data: list[dict],
key_field: str,
) -> list[dict]:
"""Merge existing and new data"""
if not existing:
return new_data
# Create a set of existing keys for O(1) lookup
existing_keys = {item[key_field] for item in existing}
# Only add items that don't exist yet
merged = existing.copy()
merged.extend(
[
item
for item in new_data
if item[key_field] not in existing_keys
],
)
return merged
def get_prices(self, ticker: str) -> list[dict[str, Any]] | None:
"""Get cached price data if available."""
return self._prices_cache.get(ticker)
def set_prices(self, ticker: str, data: list[dict[str, Any]]):
"""Append new price data to cache."""
self._prices_cache[ticker] = self._merge_data(
self._prices_cache.get(ticker),
data,
key_field="time",
)
def get_financial_metrics(self, ticker: str) -> list[dict[str, Any]]:
"""Get cached financial metrics if available."""
return self._financial_metrics_cache.get(ticker)
def set_financial_metrics(self, ticker: str, data: list[dict[str, Any]]):
"""Append new financial metrics to cache."""
self._financial_metrics_cache[ticker] = self._merge_data(
self._financial_metrics_cache.get(ticker),
data,
key_field="report_period",
)
def get_line_items(self, ticker: str) -> list[dict[str, Any]] | None:
"""Get cached line items if available."""
return self._line_items_cache.get(ticker)
def set_line_items(self, ticker: str, data: list[dict[str, Any]]):
"""Append new line items to cache."""
self._line_items_cache[ticker] = self._merge_data(
self._line_items_cache.get(ticker),
data,
key_field="report_period",
)
def get_insider_trades(self, ticker: str) -> list[dict[str, Any]] | None:
"""Get cached insider trades if available."""
return self._insider_trades_cache.get(ticker)
def set_insider_trades(self, ticker: str, data: list[dict[str, Any]]):
"""Append new insider trades to cache."""
self._insider_trades_cache[ticker] = self._merge_data(
self._insider_trades_cache.get(ticker),
data,
key_field="filing_date",
) # Could also use transaction_date if preferred
def get_company_news(self, ticker: str) -> list[dict[str, Any]] | None:
"""Get cached company news if available."""
return self._company_news_cache.get(ticker)
def set_company_news(self, ticker: str, data: list[dict[str, Any]]):
"""Append new company news to cache."""
self._company_news_cache[ticker] = self._merge_data(
self._company_news_cache.get(ticker),
data,
key_field="date",
)
# Global cache instance
_cache = Cache()
def get_cache() -> Cache:
"""Get the global cache instance."""
return _cache

View File

@@ -0,0 +1,233 @@
# -*- coding: utf-8 -*-
"""
Historical Price Manager for backtest mode
"""
import logging
from datetime import datetime
from pathlib import Path
from typing import Callable, Dict, List, Optional
import pandas as pd
logger = logging.getLogger(__name__)
# Path to local CSV data directory
_DATA_DIR = Path(__file__).parent / "ret_data"
class HistoricalPriceManager:
"""Provides historical prices for backtest mode"""
def __init__(self):
self.subscribed_symbols = []
self.price_callbacks = []
self._price_cache = {}
self._current_date = None
self.latest_prices = {}
self.open_prices = {}
self.close_prices = {}
self.running = False
def subscribe(
self,
symbols: List[str],
):
"""Subscribe to symbols"""
for symbol in symbols:
if symbol not in self.subscribed_symbols:
self.subscribed_symbols.append(symbol)
def unsubscribe(self, symbols: List[str]):
"""Unsubscribe from symbols"""
for symbol in symbols:
if symbol in self.subscribed_symbols:
self.subscribed_symbols.remove(symbol)
self._price_cache.pop(symbol, None)
def add_price_callback(self, callback: Callable):
"""Add price update callback"""
self.price_callbacks.append(callback)
def _load_from_csv(self, symbol: str) -> Optional[pd.DataFrame]:
"""Load price data from local CSV file."""
csv_path = _DATA_DIR / f"{symbol}.csv"
if not csv_path.exists():
return None
try:
df = pd.read_csv(csv_path)
if df.empty or "time" not in df.columns:
return None
df["Date"] = pd.to_datetime(df["time"])
df.set_index("Date", inplace=True)
df.sort_index(inplace=True)
return df
except Exception as e:
logger.warning(f"Failed to load CSV for {symbol}: {e}")
return None
def preload_data(self, start_date: str, end_date: str):
"""Preload historical data from local CSV files."""
logger.info(f"Preloading data: {start_date} to {end_date}")
for symbol in self.subscribed_symbols:
if symbol in self._price_cache:
continue
# Load from local CSV file directly
df = self._load_from_csv(symbol)
if df is not None and not df.empty:
self._price_cache[symbol] = df
logger.info(f"Loaded {symbol} from CSV: {len(df)} records")
else:
logger.warning(f"No CSV data for {symbol}")
def set_date(self, date: str):
"""Set current trading date and update prices"""
self._current_date = date
date_dt = pd.Timestamp(date)
for symbol in self.subscribed_symbols:
df = self._price_cache.get(symbol)
if df is None or df.empty:
# Keep previous prices if no data available
logger.warning(f"No cached data for {symbol} on {date}")
continue
# Find exact date or closest earlier date
if date_dt in df.index:
row = df.loc[date_dt]
else:
valid_dates = df.index[df.index <= date_dt]
if len(valid_dates) == 0:
logger.warning(f"No data for {symbol} on or before {date}")
continue
row = df.loc[valid_dates[-1]]
open_price = float(row["open"])
close_price = float(row["close"])
self.open_prices[symbol] = open_price
self.close_prices[symbol] = close_price
self.latest_prices[symbol] = open_price
logger.debug(
f"{symbol} @ {date}: open={open_price:.2f}, close={close_price:.2f}", # noqa: E501
)
def emit_open_prices(self):
"""Emit open prices to callbacks"""
if not self._current_date:
return
timestamp = int(
datetime.strptime(self._current_date, "%Y-%m-%d").timestamp()
* 1000,
)
for symbol in self.subscribed_symbols:
price = self.open_prices.get(symbol)
if price is None or price <= 0:
logger.warning(f"Invalid open price for {symbol}: {price}")
continue
self.latest_prices[symbol] = price
self._emit_price(symbol, price, timestamp)
def emit_close_prices(self):
"""Emit close prices to callbacks"""
if not self._current_date:
return
timestamp = int(
datetime.strptime(self._current_date, "%Y-%m-%d").timestamp()
* 1000,
)
timestamp += 23400000 # Add 6.5 hours
for symbol in self.subscribed_symbols:
price = self.close_prices.get(symbol)
if price is None or price <= 0:
logger.warning(f"Invalid close price for {symbol}: {price}")
continue
self.latest_prices[symbol] = price
self._emit_price(symbol, price, timestamp)
def _emit_price(self, symbol: str, price: float, timestamp: int):
"""Emit single price to callbacks"""
open_price = self.open_prices.get(symbol, price)
close_price = self.close_prices.get(symbol, price)
ret = (
((price - open_price) / open_price) * 100 if open_price > 0 else 0
)
price_data = {
"symbol": symbol,
"price": price,
"timestamp": timestamp,
"open": open_price,
"close": close_price,
"high": max(open_price, close_price),
"low": min(open_price, close_price),
"ret": ret,
}
for callback in self.price_callbacks:
try:
callback(price_data)
except Exception as e:
logger.error(f"Callback error for {symbol}: {e}")
def get_price_for_date(
self,
symbol: str,
date: str,
price_type: str = "close",
) -> Optional[float]:
"""Get price for a specific date"""
df = self._price_cache.get(symbol)
if df is None or df.empty:
return self.latest_prices.get(symbol)
date_dt = pd.Timestamp(date)
if date_dt in df.index:
return float(df.loc[date_dt, price_type])
valid_dates = df.index[df.index <= date_dt]
if len(valid_dates) == 0:
return self.latest_prices.get(symbol)
return float(df.loc[valid_dates[-1], price_type])
def start(self):
"""Start manager"""
self.running = True
def stop(self):
"""Stop manager"""
self.running = False
def get_latest_price(self, symbol: str) -> Optional[float]:
return self.latest_prices.get(symbol)
def get_all_latest_prices(self) -> Dict[str, float]:
return self.latest_prices.copy()
def get_open_price(self, symbol: str) -> Optional[float]:
# Return open price, fallback to latest if not set
price = self.open_prices.get(symbol)
if price is None or price <= 0:
return self.latest_prices.get(symbol)
return price
def get_close_price(self, symbol: str) -> Optional[float]:
# Return close price, fallback to latest if not set
price = self.close_prices.get(symbol)
if price is None or price <= 0:
return self.latest_prices.get(symbol)
return price
def reset_open_prices(self):
# Don't clear prices - keep them for continuity
pass

View File

@@ -0,0 +1,241 @@
# -*- coding: utf-8 -*-
"""
Mock Price Manager - For testing during non-trading hours
Generates virtual real-time price data
"""
import logging
import os
import random
import threading
import time
from typing import Callable, Dict, List, Optional
logger = logging.getLogger(__name__)
class MockPriceManager:
"""Mock Price Manager - Generates virtual prices for testing"""
def __init__(self, poll_interval: int = 10, volatility: float = 0.5):
"""
Args:
poll_interval: Price update interval in seconds
volatility: Price volatility percentage
"""
if poll_interval is None:
poll_interval = int(os.getenv("MOCK_POLL_INTERVAL", "5"))
if volatility is None:
volatility = float(os.getenv("MOCK_VOLATILITY", "0.5"))
self.poll_interval = poll_interval
self.volatility = volatility
self.subscribed_symbols: List[str] = []
self.base_prices: Dict[str, float] = {}
self.open_prices: Dict[str, float] = {}
self.latest_prices: Dict[str, float] = {}
self.price_callbacks: List[Callable] = []
self.running = False
self._thread: Optional[threading.Thread] = None
self.default_base_prices = {
"AAPL": 237.50,
"MSFT": 425.30,
"GOOGL": 161.50,
"AMZN": 218.45,
"NVDA": 950.00,
"META": 573.22,
"TSLA": 342.15,
"AMD": 168.90,
"NFLX": 688.25,
"INTC": 42.18,
"COIN": 285.50,
"PLTR": 45.80,
"BABA": 88.30,
"DIS": 112.50,
"BKNG": 4850.00,
}
logger.info(
f"MockPriceManager initialized (interval: {self.poll_interval}s, "
f"volatility: {self.volatility}%)",
)
def subscribe(
self,
symbols: List[str],
base_prices: Dict[str, float] = None,
):
"""Subscribe to stock symbols"""
for symbol in symbols:
if symbol not in self.subscribed_symbols:
self.subscribed_symbols.append(symbol)
if base_prices and symbol in base_prices:
base_price = base_prices[symbol]
elif symbol in self.default_base_prices:
base_price = self.default_base_prices[symbol]
else:
base_price = random.uniform(50, 500)
self.base_prices[symbol] = base_price
self.open_prices[symbol] = base_price
self.latest_prices[symbol] = base_price
logger.info(
f"Subscribed to mock price: {symbol} (base: ${base_price:.2f})", # noqa: E501
)
def unsubscribe(self, symbols: List[str]):
"""Unsubscribe from symbols"""
for symbol in symbols:
if symbol in self.subscribed_symbols:
self.subscribed_symbols.remove(symbol)
self.base_prices.pop(symbol, None)
self.open_prices.pop(symbol, None)
self.latest_prices.pop(symbol, None)
logger.info(f"Unsubscribed: {symbol}")
def add_price_callback(self, callback: Callable):
"""Add price update callback"""
self.price_callbacks.append(callback)
def _generate_price_update(self, symbol: str) -> float:
"""Generate price update based on random walk"""
current_price = self.latest_prices.get(
symbol,
self.base_prices[symbol],
)
change_percent = random.uniform(-self.volatility, self.volatility)
new_price = current_price * (1 + change_percent / 100)
# 10% chance of larger movement
if random.random() < 0.1:
trend_factor = random.uniform(-2, 2)
new_price = new_price * (1 + trend_factor / 100)
# Limit intraday movement to +/-10%
open_price = self.open_prices[symbol]
max_price = open_price * 1.10
min_price = open_price * 0.90
new_price = max(min_price, min(max_price, new_price))
return new_price
def _update_prices(self):
"""Update prices for all subscribed stocks"""
timestamp = int(time.time() * 1000)
for symbol in self.subscribed_symbols:
try:
new_price = self._generate_price_update(symbol)
self.latest_prices[symbol] = new_price
open_price = self.open_prices[symbol]
ret = ((new_price - open_price) / open_price) * 100
price_data = {
"symbol": symbol,
"price": new_price,
"timestamp": timestamp,
"volume": random.randint(1000000, 10000000),
"open": open_price,
"high": max(new_price, open_price),
"low": min(new_price, open_price),
"previous_close": open_price,
"ret": ret,
}
for callback in self.price_callbacks:
try:
callback(price_data)
except Exception as e:
logger.error(
f"Mock price callback error ({symbol}): {e}",
)
logger.debug(
f"Mock {symbol}: ${new_price:.2f} [ret: {ret:+.2f}%]",
)
except Exception as e:
logger.error(f"Failed to generate mock price ({symbol}): {e}")
def _polling_loop(self):
"""Main polling loop"""
logger.info(
f"Mock price generation started (interval: {self.poll_interval}s)",
)
while self.running:
try:
start_time = time.time()
self._update_prices()
elapsed = time.time() - start_time
sleep_time = max(0, self.poll_interval - elapsed)
if sleep_time > 0:
time.sleep(sleep_time)
except Exception as e:
logger.error(f"Mock polling loop error: {e}")
time.sleep(5)
def start(self):
"""Start mock price generation"""
if self.running:
logger.warning("Mock price manager already running")
return
if not self.subscribed_symbols:
logger.warning("No stocks subscribed")
return
self.running = True
self._thread = threading.Thread(target=self._polling_loop, daemon=True)
self._thread.start()
logger.info(
f"Mock price manager started: {', '.join(self.subscribed_symbols)}", # noqa: E501
)
def stop(self):
"""Stop mock price generation"""
self.running = False
if self._thread:
self._thread.join(timeout=5)
logger.info("Mock price manager stopped")
def get_latest_price(self, symbol: str) -> Optional[float]:
"""Get latest price for symbol"""
return self.latest_prices.get(symbol)
def get_all_latest_prices(self) -> Dict[str, float]:
"""Get all latest prices"""
return self.latest_prices.copy()
def get_open_price(self, symbol: str) -> Optional[float]:
"""Get open price for symbol"""
return self.open_prices.get(symbol)
def reset_open_prices(self):
"""Reset open prices for new trading day"""
for symbol in self.subscribed_symbols:
last_close = self.latest_prices[symbol]
gap_percent = random.uniform(-1, 1)
new_open = last_close * (1 + gap_percent / 100)
self.open_prices[symbol] = new_open
self.latest_prices[symbol] = new_open
logger.info("Open prices reset")
def set_base_price(self, symbol: str, price: float):
"""Manually set base price for testing"""
if symbol in self.subscribed_symbols:
self.base_prices[symbol] = price
self.open_prices[symbol] = price
self.latest_prices[symbol] = price
logger.info(f"{symbol} base price set to: ${price:.2f}")
else:
logger.warning(f"{symbol} not subscribed")

View File

@@ -0,0 +1,175 @@
# -*- coding: utf-8 -*-
"""
Polling-based Price Manager - Uses Finnhub REST API
Supports real-time price fetching via polling
"""
import logging
import threading
import time
from typing import Callable, Dict, List, Optional
import finnhub
logger = logging.getLogger(__name__)
class PollingPriceManager:
"""Polling-based price manager using Finnhub Quote API"""
def __init__(self, api_key: str, poll_interval: int = 30):
"""
Args:
api_key: Finnhub API Key
poll_interval: Polling interval in seconds (default 30s)
"""
self.api_key = api_key
self.poll_interval = poll_interval
self.finnhub_client = finnhub.Client(api_key=api_key)
self.subscribed_symbols: List[str] = []
self.latest_prices: Dict[str, float] = {}
self.open_prices: Dict[str, float] = {}
self.price_callbacks: List[Callable] = []
self.running = False
self._thread: Optional[threading.Thread] = None
logger.info(
f"PollingPriceManager initialized (interval: {poll_interval}s)",
)
def subscribe(self, symbols: List[str]):
"""Subscribe to stock symbols"""
for symbol in symbols:
if symbol not in self.subscribed_symbols:
self.subscribed_symbols.append(symbol)
logger.info(f"Subscribed to: {symbol}")
def unsubscribe(self, symbols: List[str]):
"""Unsubscribe from symbols"""
for symbol in symbols:
if symbol in self.subscribed_symbols:
self.subscribed_symbols.remove(symbol)
logger.info(f"Unsubscribed: {symbol}")
def add_price_callback(self, callback: Callable):
"""Add price update callback"""
self.price_callbacks.append(callback)
def _fetch_prices(self):
"""Fetch latest prices for all subscribed stocks"""
for symbol in self.subscribed_symbols:
try:
quote_data = self.finnhub_client.quote(symbol)
current_price = quote_data.get("c")
open_price = quote_data.get("o")
timestamp = quote_data.get("t", int(time.time()))
if not current_price or current_price <= 0:
logger.warning(f"{symbol}: Invalid price data")
continue
# Store open price on first fetch
if (
symbol not in self.open_prices
and open_price
and open_price > 0
):
self.open_prices[symbol] = open_price
logger.info(f"{symbol} open price: ${open_price:.2f}")
stored_open = self.open_prices.get(symbol, open_price)
ret = (
((current_price - stored_open) / stored_open) * 100
if stored_open > 0
else 0
)
self.latest_prices[symbol] = current_price
price_data = {
"symbol": symbol,
"price": current_price,
"timestamp": timestamp * 1000,
"open": stored_open,
"high": quote_data.get("h"),
"low": quote_data.get("l"),
"previous_close": quote_data.get("pc"),
"ret": ret,
"change": quote_data.get("d"),
"change_percent": quote_data.get("dp"),
}
for callback in self.price_callbacks:
try:
callback(price_data)
except Exception as e:
logger.error(f"Price callback error ({symbol}): {e}")
logger.debug(
f"{symbol}: ${current_price:.2f} [ret: {ret:+.2f}%]",
)
except Exception as e:
logger.error(f"Failed to fetch {symbol} price: {e}")
def _polling_loop(self):
"""Main polling loop"""
logger.info(f"Price polling started (interval: {self.poll_interval}s)")
while self.running:
try:
start_time = time.time()
self._fetch_prices()
elapsed = time.time() - start_time
sleep_time = max(0, self.poll_interval - elapsed)
if sleep_time > 0:
time.sleep(sleep_time)
except Exception as e:
logger.error(f"Polling loop error: {e}")
time.sleep(5)
def start(self):
"""Start price polling"""
if self.running:
logger.warning("Price polling already running")
return
if not self.subscribed_symbols:
logger.warning("No stocks subscribed")
return
self.running = True
self._thread = threading.Thread(target=self._polling_loop, daemon=True)
self._thread.start()
logger.info(
f"Price polling started: {', '.join(self.subscribed_symbols)}",
)
def stop(self):
"""Stop price polling"""
self.running = False
if self._thread:
self._thread.join(timeout=5)
logger.info("Price polling stopped")
def get_latest_price(self, symbol: str) -> Optional[float]:
"""Get latest price for symbol"""
return self.latest_prices.get(symbol)
def get_all_latest_prices(self) -> Dict[str, float]:
"""Get all latest prices"""
return self.latest_prices.copy()
def get_open_price(self, symbol: str) -> Optional[float]:
"""Get open price for symbol"""
return self.open_prices.get(symbol)
def reset_open_prices(self):
"""Reset open prices for new trading day"""
self.open_prices.clear()
logger.info("Open prices reset")

View File

@@ -0,0 +1,387 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Automatic Incremental Historical Data Update Module
Features:
1. Fetch stock historical data from configured API (Finnhub or Financial Datasets)
2. Incrementally update CSV files in ret_data directory
3. Automatically detect last update date, only download new data
4. Calculate returns (ret)
5. Support batch updates for multiple stocks
"""
# flake8: noqa: E501
import logging
import os
import sys
import time
from datetime import datetime, timedelta
from pathlib import Path
from typing import Dict, List, Optional
import exchange_calendars as xcals
import pandas as pd
import pandas_market_calendars as mcal
from dotenv import load_dotenv
from backend.config.data_config import (
get_config,
)
from backend.tools.data_tools import get_prices, prices_to_df
# Add project root directory to path
BASE_DIR = Path(__file__).resolve().parents[2]
if str(BASE_DIR) not in sys.path:
sys.path.append(str(BASE_DIR))
# Configure logging
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
)
logger = logging.getLogger(__name__)
class DataUpdater:
"""Data updater"""
data_dir: Path
def __init__(
self,
data_dir: str = None,
start_date: str = "2022-01-01",
):
"""
Initialize data updater
Args:
data_dir: Data storage directory, defaults to backend/data/ret_data
start_date: Historical data start date (YYYY-MM-DD)
"""
# Get config from centralized source
config = get_config()
self.data_source = config.source
self.api_key = config.api_key
# Set data directory
if data_dir is None:
self.data_dir = BASE_DIR / "backend" / "data" / "ret_data"
else:
self.data_dir = Path(data_dir)
# Ensure directory exists
self.data_dir.mkdir(parents=True, exist_ok=True)
self.start_date = start_date
# Initialize Finnhub client if needed
if self.data_source == "finnhub":
import finnhub
self.client = finnhub.Client(api_key=self.api_key)
logger.info("Finnhub client initialized")
else:
self.client = None
logger.info("Financial Datasets API configured")
def get_trading_dates(self, start_date: str, end_date: str) -> List[str]:
"""Get US stock market trading date sequence."""
try:
if mcal is not None:
nyse = mcal.get_calendar("NYSE")
trading_dates = nyse.valid_days(
start_date=start_date,
end_date=end_date,
)
return [date.strftime("%Y-%m-%d") for date in trading_dates]
elif xcals is not None:
nyse = xcals.get_calendar("XNYS")
trading_dates = nyse.sessions_in_range(start_date, end_date)
return [date.strftime("%Y-%m-%d") for date in trading_dates]
except Exception as e:
logger.warning(
f"Failed to get US trading calendar, using business days: {e}",
)
# Fallback to simple business day method
date_range = pd.date_range(start_date, end_date, freq="B")
return [date.strftime("%Y-%m-%d") for date in date_range]
def get_last_date_from_csv(self, ticker: str) -> Optional[datetime]:
"""Get last data date from CSV file."""
csv_path = self.data_dir / f"{ticker}.csv"
if not csv_path.exists():
logger.info(f"{ticker}.csv does not exist, will create new file")
return None
try:
df = pd.read_csv(csv_path)
if df.empty or "time" not in df.columns:
return None
last_date_str = df["time"].iloc[-1]
last_date = datetime.strptime(last_date_str, "%Y-%m-%d")
logger.info(f"{ticker} last data date: {last_date_str}")
return last_date
except Exception as e:
logger.warning(f"Failed to read {ticker}.csv: {e}")
return None
def fetch_data_from_api(
self,
ticker: str,
start_date: datetime,
end_date: datetime,
) -> Optional[pd.DataFrame]:
"""Fetch data from configured API."""
start_date_str = start_date.strftime("%Y-%m-%d")
end_date_str = end_date.strftime("%Y-%m-%d")
logger.info(
f"Fetching {ticker} data from {self.data_source}: {start_date_str} to {end_date_str}",
)
prices = get_prices(
ticker=ticker,
start_date=start_date_str,
end_date=end_date_str,
)
if not prices:
logger.warning(f"{ticker} no data returned from API")
return None
# Convert to DataFrame
df = prices_to_df(prices)
df = df.reset_index()
df["time"] = df["Date"].dt.strftime("%Y-%m-%d")
# Calculate returns (next day return)
df["ret"] = df["close"].pct_change().shift(-1)
# Select needed columns
df = df[["open", "close", "high", "low", "volume", "time", "ret"]]
logger.info(f"Successfully fetched {ticker} data: {len(df)} records")
return df
def merge_and_save(self, ticker: str, new_data: pd.DataFrame) -> bool:
"""Merge old and new data and save."""
csv_path = self.data_dir / f"{ticker}.csv"
try:
if csv_path.exists():
old_data = pd.read_csv(csv_path)
logger.info(f"{ticker} existing data: {len(old_data)} records")
# Merge and deduplicate
combined = pd.concat([old_data, new_data], ignore_index=True)
combined = combined.drop_duplicates(
subset=["time"],
keep="last",
)
combined = combined.sort_values("time").reset_index(drop=True)
# Recalculate returns
combined["ret"] = combined["close"].pct_change().shift(-1)
logger.info(f"{ticker} merged data: {len(combined)} records")
else:
combined = new_data
logger.info(f"{ticker} new file: {len(combined)} records")
combined.to_csv(csv_path, index=False)
logger.info(f"{ticker} data saved to: {csv_path}")
return True
except Exception as e:
logger.error(f"Failed to save {ticker} data: {e}")
return False
def update_ticker(
self,
ticker: str,
force_full_update: bool = False,
) -> bool:
"""Update data for a single stock."""
logger.info(f"{'='*60}")
logger.info(f"Starting update for {ticker}")
logger.info(f"{'='*60}")
# Determine start date
if force_full_update:
start_date = datetime.strptime(self.start_date, "%Y-%m-%d")
logger.info(f"Force full update, start date: {start_date.date()}")
else:
last_date = self.get_last_date_from_csv(ticker)
if last_date:
start_date = last_date + timedelta(days=1)
logger.info(
f"Incremental update, start date: {start_date.date()}",
)
else:
start_date = datetime.strptime(self.start_date, "%Y-%m-%d")
logger.info(f"First update, start date: {start_date.date()}")
end_date = datetime.now()
if start_date.date() >= end_date.date():
logger.info(f"{ticker} data is up to date, no update needed")
return True
new_data = self.fetch_data_from_api(ticker, start_date, end_date)
if new_data is None or new_data.empty:
days_diff = (end_date - start_date).days
if days_diff <= 3:
logger.info(
f"{ticker} has no new data (may be weekend/holiday)",
)
return True
else:
logger.warning(f"{ticker} has no new data")
return False
success = self.merge_and_save(ticker, new_data)
if success:
logger.info(f"{ticker} update completed")
else:
logger.error(f"{ticker} update failed")
return success
def update_all_tickers(
self,
tickers: List[str],
force_full_update: bool = False,
) -> Dict[str, bool]:
"""Batch update multiple stocks."""
results = {}
logger.info(f"{'='*60}")
logger.info(f"Starting batch update for {len(tickers)} stocks")
logger.info(f"Stock list: {', '.join(tickers)}")
logger.info(f"{'='*60}")
for i, ticker in enumerate(tickers, 1):
logger.info(f"[{i}/{len(tickers)}] Processing {ticker}")
results[ticker] = self.update_ticker(ticker, force_full_update)
# API rate limiting
if i < len(tickers):
time.sleep(1)
# Print summary
logger.info(f"{'='*60}")
logger.info("Update Summary")
logger.info(f"{'='*60}")
success_count = sum(results.values())
fail_count = len(results) - success_count
logger.info(f"Success: {success_count}")
logger.info(f"Failed: {fail_count}")
if fail_count > 0:
failed_tickers = [t for t, s in results.items() if not s]
logger.warning(f"Failed stocks: {', '.join(failed_tickers)}")
logger.info(f"{'='*60}\n")
return results
def main():
"""Command line entry point"""
import argparse
parser = argparse.ArgumentParser(
description="Automatically update stock historical data",
)
parser.add_argument(
"--tickers",
type=str,
help="Stock ticker list (comma-separated), e.g.: AAPL,MSFT,GOOGL",
)
parser.add_argument(
"--data-dir",
type=str,
help="Data storage directory (default: backend/data/ret_data)",
)
parser.add_argument(
"--start-date",
type=str,
default="2022-01-01",
help="Historical data start date (YYYY-MM-DD, default: 2022-01-01)",
)
parser.add_argument(
"--force",
action="store_true",
help="Force full update (re-download all data)",
)
args = parser.parse_args()
# Load environment variables
load_dotenv()
# Validate API key is available
try:
config = get_config()
logger.info(f"Using data source: {config.source}")
except ValueError as e:
logger.error(str(e))
sys.exit(1)
# Get stock list
if args.tickers:
tickers = [t.strip().upper() for t in args.tickers.split(",")]
else:
tickers_env = os.getenv("TICKERS", "")
if tickers_env:
tickers = [t.strip().upper() for t in tickers_env.split(",")]
else:
logger.error("Stock list not provided")
logger.error(
"Please set via --tickers parameter or TICKERS environment variable",
)
sys.exit(1)
# Create updater
updater = DataUpdater(
data_dir=args.data_dir,
start_date=args.start_date,
)
# Execute update
try:
results = updater.update_all_tickers(
tickers,
force_full_update=args.force,
)
except Exception:
# API error (e.g., weekend/holiday with no data)
sys.exit(1)
# Return status code
success_count = sum(results.values())
if success_count == len(results):
logger.info("All stocks updated successfully!")
sys.exit(0)
elif success_count == 0:
logger.warning("All stocks have no new data (may be weekend/holiday)")
sys.exit(0)
else:
logger.warning("Some stocks failed to update, but will continue")
sys.exit(0)
if __name__ == "__main__":
main()

184
backend/data/schema.py Normal file
View File

@@ -0,0 +1,184 @@
# -*- coding: utf-8 -*-
from pydantic import BaseModel
class Price(BaseModel):
open: float
close: float
high: float
low: float
volume: int
time: str
class PriceResponse(BaseModel):
ticker: str
prices: list[Price]
class FinancialMetrics(BaseModel):
ticker: str
report_period: str
period: str
currency: str
market_cap: float | None
enterprise_value: float | None
price_to_earnings_ratio: float | None
price_to_book_ratio: float | None
price_to_sales_ratio: float | None
enterprise_value_to_ebitda_ratio: float | None
enterprise_value_to_revenue_ratio: float | None
free_cash_flow_yield: float | None
peg_ratio: float | None
gross_margin: float | None
operating_margin: float | None
net_margin: float | None
return_on_equity: float | None
return_on_assets: float | None
return_on_invested_capital: float | None
asset_turnover: float | None
inventory_turnover: float | None
receivables_turnover: float | None
days_sales_outstanding: float | None
operating_cycle: float | None
working_capital_turnover: float | None
current_ratio: float | None
quick_ratio: float | None
cash_ratio: float | None
operating_cash_flow_ratio: float | None
debt_to_equity: float | None
debt_to_assets: float | None
interest_coverage: float | None
revenue_growth: float | None
earnings_growth: float | None
book_value_growth: float | None
earnings_per_share_growth: float | None
free_cash_flow_growth: float | None
operating_income_growth: float | None
ebitda_growth: float | None
payout_ratio: float | None
earnings_per_share: float | None
book_value_per_share: float | None
free_cash_flow_per_share: float | None
class FinancialMetricsResponse(BaseModel):
financial_metrics: list[FinancialMetrics]
class LineItem(BaseModel):
ticker: str
report_period: str
period: str
currency: str
# Allow additional fields dynamically
model_config = {"extra": "allow"}
class LineItemResponse(BaseModel):
search_results: list[LineItem]
class InsiderTrade(BaseModel):
ticker: str
issuer: str | None
name: str | None
title: str | None
is_board_director: bool | None
transaction_date: str | None
transaction_shares: float | None
transaction_price_per_share: float | None
transaction_value: float | None
shares_owned_before_transaction: float | None
shares_owned_after_transaction: float | None
security_title: str | None
filing_date: str
class InsiderTradeResponse(BaseModel):
insider_trades: list[InsiderTrade]
class CompanyNews(BaseModel):
category: str | None = None
ticker: str
title: str
related: str | None = None
source: str
date: str | None = None
url: str
summary: str | None = None
class CompanyNewsResponse(BaseModel):
news: list[CompanyNews]
class CompanyFacts(BaseModel):
ticker: str
name: str
cik: str | None = None
industry: str | None = None
sector: str | None = None
category: str | None = None
exchange: str | None = None
is_active: bool | None = None
listing_date: str | None = None
location: str | None = None
market_cap: float | None = None
number_of_employees: int | None = None
sec_filings_url: str | None = None
sic_code: str | None = None
sic_industry: str | None = None
sic_sector: str | None = None
website_url: str | None = None
weighted_average_shares: int | None = None
class CompanyFactsResponse(BaseModel):
company_facts: CompanyFacts
class Position(BaseModel):
"""Position information - for Portfolio mode"""
long: int = 0 # Long position quantity (shares)
short: int = 0 # Short position quantity (shares)
long_cost_basis: float = 0.0 # Long position average cost
short_cost_basis: float = 0.0 # Short position average cost
class Portfolio(BaseModel):
"""Portfolio - for Portfolio mode"""
cash: float = 100000.0 # Available cash
positions: dict[str, Position] = {} # ticker -> Position mapping
# Margin requirement (0.0 means shorting disabled, 0.5 means 50% margin)
margin_requirement: float = 0.0
margin_used: float = 0.0 # Margin used
class AnalystSignal(BaseModel):
signal: str | None = None
confidence: float | None = None
reasoning: dict | str | None = None
max_position_size: float | None = None # For risk management signals
class TickerAnalysis(BaseModel):
ticker: str
analyst_signals: dict[str, AnalystSignal] # agent_name -> signal mapping
class AgentStateData(BaseModel):
tickers: list[str]
portfolio: Portfolio
start_date: str
end_date: str
ticker_analyses: dict[str, TickerAnalysis] # ticker -> analysis mapping
class AgentStateMetadata(BaseModel):
show_reasoning: bool = False
model_config = {"extra": "allow"}