Files
evotraders/backend/data/news_alignment.py
2026-03-30 17:46:44 +08:00

65 lines
2.1 KiB
Python

# -*- coding: utf-8 -*-
"""Align persisted news to the nearest NYSE trading date."""
from __future__ import annotations
from datetime import time
import pandas as pd
import pandas_market_calendars as mcal
from backend.data.market_store import MarketStore
NYSE_CALENDAR = mcal.get_calendar("NYSE")
def _next_trading_day(date_str: str) -> str:
start = pd.Timestamp(date_str).tz_localize(None)
sessions = NYSE_CALENDAR.valid_days(
start_date=(start - pd.Timedelta(days=1)).strftime("%Y-%m-%d"),
end_date=(start + pd.Timedelta(days=10)).strftime("%Y-%m-%d"),
)
future = [
pd.Timestamp(day).tz_localize(None).strftime("%Y-%m-%d")
for day in sessions
if pd.Timestamp(day).tz_localize(None) >= start
]
return future[0] if future else date_str
def resolve_trade_date(published_utc: str | None) -> str | None:
"""Map a published timestamp to an NYSE trade date."""
if not published_utc:
return None
timestamp = pd.to_datetime(published_utc, utc=True, errors="coerce")
if pd.isna(timestamp):
return None
nyse_time = timestamp.tz_convert("America/New_York")
candidate = nyse_time.date().isoformat()
valid_days = NYSE_CALENDAR.valid_days(start_date=candidate, end_date=candidate)
if len(valid_days) == 0:
return _next_trading_day(candidate)
if nyse_time.time() >= time(16, 0):
return _next_trading_day((nyse_time + pd.Timedelta(days=1)).date().isoformat())
return candidate
def align_news_for_symbol(store: MarketStore, symbol: str, *, limit: int = 5000) -> int:
"""Fill missing trade_date values for one ticker."""
pending = store.get_news_without_trade_date(symbol, limit=limit)
updates = []
for row in pending:
trade_date = resolve_trade_date(row.get("published_utc"))
if trade_date:
updates.append(
{
"news_id": row["news_id"],
"symbol": row["symbol"],
"trade_date": trade_date,
}
)
if not updates:
return 0
return store.set_trade_dates(updates)