feat: initial commit - EvoTraders project

量化交易多智能体系统,包含:
- 分析师、投资组合经理、风险经理等智能体
- 股票分析、投资组合管理、风险控制工具
- React 前端界面
- FastAPI 后端服务

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2026-03-13 04:34:06 +08:00
commit 12de93aa30
115 changed files with 29304 additions and 0 deletions

View File

@@ -0,0 +1,387 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Automatic Incremental Historical Data Update Module
Features:
1. Fetch stock historical data from configured API (Finnhub or Financial Datasets)
2. Incrementally update CSV files in ret_data directory
3. Automatically detect last update date, only download new data
4. Calculate returns (ret)
5. Support batch updates for multiple stocks
"""
# flake8: noqa: E501
import logging
import os
import sys
import time
from datetime import datetime, timedelta
from pathlib import Path
from typing import Dict, List, Optional
import exchange_calendars as xcals
import pandas as pd
import pandas_market_calendars as mcal
from dotenv import load_dotenv
from backend.config.data_config import (
get_config,
)
from backend.tools.data_tools import get_prices, prices_to_df
# Add project root directory to path
BASE_DIR = Path(__file__).resolve().parents[2]
if str(BASE_DIR) not in sys.path:
sys.path.append(str(BASE_DIR))
# Configure logging
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
)
logger = logging.getLogger(__name__)
class DataUpdater:
"""Data updater"""
data_dir: Path
def __init__(
self,
data_dir: str = None,
start_date: str = "2022-01-01",
):
"""
Initialize data updater
Args:
data_dir: Data storage directory, defaults to backend/data/ret_data
start_date: Historical data start date (YYYY-MM-DD)
"""
# Get config from centralized source
config = get_config()
self.data_source = config.source
self.api_key = config.api_key
# Set data directory
if data_dir is None:
self.data_dir = BASE_DIR / "backend" / "data" / "ret_data"
else:
self.data_dir = Path(data_dir)
# Ensure directory exists
self.data_dir.mkdir(parents=True, exist_ok=True)
self.start_date = start_date
# Initialize Finnhub client if needed
if self.data_source == "finnhub":
import finnhub
self.client = finnhub.Client(api_key=self.api_key)
logger.info("Finnhub client initialized")
else:
self.client = None
logger.info("Financial Datasets API configured")
def get_trading_dates(self, start_date: str, end_date: str) -> List[str]:
"""Get US stock market trading date sequence."""
try:
if mcal is not None:
nyse = mcal.get_calendar("NYSE")
trading_dates = nyse.valid_days(
start_date=start_date,
end_date=end_date,
)
return [date.strftime("%Y-%m-%d") for date in trading_dates]
elif xcals is not None:
nyse = xcals.get_calendar("XNYS")
trading_dates = nyse.sessions_in_range(start_date, end_date)
return [date.strftime("%Y-%m-%d") for date in trading_dates]
except Exception as e:
logger.warning(
f"Failed to get US trading calendar, using business days: {e}",
)
# Fallback to simple business day method
date_range = pd.date_range(start_date, end_date, freq="B")
return [date.strftime("%Y-%m-%d") for date in date_range]
def get_last_date_from_csv(self, ticker: str) -> Optional[datetime]:
"""Get last data date from CSV file."""
csv_path = self.data_dir / f"{ticker}.csv"
if not csv_path.exists():
logger.info(f"{ticker}.csv does not exist, will create new file")
return None
try:
df = pd.read_csv(csv_path)
if df.empty or "time" not in df.columns:
return None
last_date_str = df["time"].iloc[-1]
last_date = datetime.strptime(last_date_str, "%Y-%m-%d")
logger.info(f"{ticker} last data date: {last_date_str}")
return last_date
except Exception as e:
logger.warning(f"Failed to read {ticker}.csv: {e}")
return None
def fetch_data_from_api(
self,
ticker: str,
start_date: datetime,
end_date: datetime,
) -> Optional[pd.DataFrame]:
"""Fetch data from configured API."""
start_date_str = start_date.strftime("%Y-%m-%d")
end_date_str = end_date.strftime("%Y-%m-%d")
logger.info(
f"Fetching {ticker} data from {self.data_source}: {start_date_str} to {end_date_str}",
)
prices = get_prices(
ticker=ticker,
start_date=start_date_str,
end_date=end_date_str,
)
if not prices:
logger.warning(f"{ticker} no data returned from API")
return None
# Convert to DataFrame
df = prices_to_df(prices)
df = df.reset_index()
df["time"] = df["Date"].dt.strftime("%Y-%m-%d")
# Calculate returns (next day return)
df["ret"] = df["close"].pct_change().shift(-1)
# Select needed columns
df = df[["open", "close", "high", "low", "volume", "time", "ret"]]
logger.info(f"Successfully fetched {ticker} data: {len(df)} records")
return df
def merge_and_save(self, ticker: str, new_data: pd.DataFrame) -> bool:
"""Merge old and new data and save."""
csv_path = self.data_dir / f"{ticker}.csv"
try:
if csv_path.exists():
old_data = pd.read_csv(csv_path)
logger.info(f"{ticker} existing data: {len(old_data)} records")
# Merge and deduplicate
combined = pd.concat([old_data, new_data], ignore_index=True)
combined = combined.drop_duplicates(
subset=["time"],
keep="last",
)
combined = combined.sort_values("time").reset_index(drop=True)
# Recalculate returns
combined["ret"] = combined["close"].pct_change().shift(-1)
logger.info(f"{ticker} merged data: {len(combined)} records")
else:
combined = new_data
logger.info(f"{ticker} new file: {len(combined)} records")
combined.to_csv(csv_path, index=False)
logger.info(f"{ticker} data saved to: {csv_path}")
return True
except Exception as e:
logger.error(f"Failed to save {ticker} data: {e}")
return False
def update_ticker(
self,
ticker: str,
force_full_update: bool = False,
) -> bool:
"""Update data for a single stock."""
logger.info(f"{'='*60}")
logger.info(f"Starting update for {ticker}")
logger.info(f"{'='*60}")
# Determine start date
if force_full_update:
start_date = datetime.strptime(self.start_date, "%Y-%m-%d")
logger.info(f"Force full update, start date: {start_date.date()}")
else:
last_date = self.get_last_date_from_csv(ticker)
if last_date:
start_date = last_date + timedelta(days=1)
logger.info(
f"Incremental update, start date: {start_date.date()}",
)
else:
start_date = datetime.strptime(self.start_date, "%Y-%m-%d")
logger.info(f"First update, start date: {start_date.date()}")
end_date = datetime.now()
if start_date.date() >= end_date.date():
logger.info(f"{ticker} data is up to date, no update needed")
return True
new_data = self.fetch_data_from_api(ticker, start_date, end_date)
if new_data is None or new_data.empty:
days_diff = (end_date - start_date).days
if days_diff <= 3:
logger.info(
f"{ticker} has no new data (may be weekend/holiday)",
)
return True
else:
logger.warning(f"{ticker} has no new data")
return False
success = self.merge_and_save(ticker, new_data)
if success:
logger.info(f"{ticker} update completed")
else:
logger.error(f"{ticker} update failed")
return success
def update_all_tickers(
self,
tickers: List[str],
force_full_update: bool = False,
) -> Dict[str, bool]:
"""Batch update multiple stocks."""
results = {}
logger.info(f"{'='*60}")
logger.info(f"Starting batch update for {len(tickers)} stocks")
logger.info(f"Stock list: {', '.join(tickers)}")
logger.info(f"{'='*60}")
for i, ticker in enumerate(tickers, 1):
logger.info(f"[{i}/{len(tickers)}] Processing {ticker}")
results[ticker] = self.update_ticker(ticker, force_full_update)
# API rate limiting
if i < len(tickers):
time.sleep(1)
# Print summary
logger.info(f"{'='*60}")
logger.info("Update Summary")
logger.info(f"{'='*60}")
success_count = sum(results.values())
fail_count = len(results) - success_count
logger.info(f"Success: {success_count}")
logger.info(f"Failed: {fail_count}")
if fail_count > 0:
failed_tickers = [t for t, s in results.items() if not s]
logger.warning(f"Failed stocks: {', '.join(failed_tickers)}")
logger.info(f"{'='*60}\n")
return results
def main():
"""Command line entry point"""
import argparse
parser = argparse.ArgumentParser(
description="Automatically update stock historical data",
)
parser.add_argument(
"--tickers",
type=str,
help="Stock ticker list (comma-separated), e.g.: AAPL,MSFT,GOOGL",
)
parser.add_argument(
"--data-dir",
type=str,
help="Data storage directory (default: backend/data/ret_data)",
)
parser.add_argument(
"--start-date",
type=str,
default="2022-01-01",
help="Historical data start date (YYYY-MM-DD, default: 2022-01-01)",
)
parser.add_argument(
"--force",
action="store_true",
help="Force full update (re-download all data)",
)
args = parser.parse_args()
# Load environment variables
load_dotenv()
# Validate API key is available
try:
config = get_config()
logger.info(f"Using data source: {config.source}")
except ValueError as e:
logger.error(str(e))
sys.exit(1)
# Get stock list
if args.tickers:
tickers = [t.strip().upper() for t in args.tickers.split(",")]
else:
tickers_env = os.getenv("TICKERS", "")
if tickers_env:
tickers = [t.strip().upper() for t in tickers_env.split(",")]
else:
logger.error("Stock list not provided")
logger.error(
"Please set via --tickers parameter or TICKERS environment variable",
)
sys.exit(1)
# Create updater
updater = DataUpdater(
data_dir=args.data_dir,
start_date=args.start_date,
)
# Execute update
try:
results = updater.update_all_tickers(
tickers,
force_full_update=args.force,
)
except Exception:
# API error (e.g., weekend/holiday with no data)
sys.exit(1)
# Return status code
success_count = sum(results.values())
if success_count == len(results):
logger.info("All stocks updated successfully!")
sys.exit(0)
elif success_count == 0:
logger.warning("All stocks have no new data (may be weekend/holiday)")
sys.exit(0)
else:
logger.warning("Some stocks failed to update, but will continue")
sys.exit(0)
if __name__ == "__main__":
main()