# -*- coding: utf-8 -*- """Align persisted news to the nearest NYSE trading date.""" from __future__ import annotations from datetime import time import pandas as pd import pandas_market_calendars as mcal from backend.data.market_store import MarketStore NYSE_CALENDAR = mcal.get_calendar("NYSE") def _next_trading_day(date_str: str) -> str: start = pd.Timestamp(date_str).tz_localize(None) sessions = NYSE_CALENDAR.valid_days( start_date=(start - pd.Timedelta(days=1)).strftime("%Y-%m-%d"), end_date=(start + pd.Timedelta(days=10)).strftime("%Y-%m-%d"), ) future = [ pd.Timestamp(day).tz_localize(None).strftime("%Y-%m-%d") for day in sessions if pd.Timestamp(day).tz_localize(None) >= start ] return future[0] if future else date_str def resolve_trade_date(published_utc: str | None) -> str | None: """Map a published timestamp to an NYSE trade date.""" if not published_utc: return None timestamp = pd.to_datetime(published_utc, utc=True, errors="coerce") if pd.isna(timestamp): return None nyse_time = timestamp.tz_convert("America/New_York") candidate = nyse_time.date().isoformat() valid_days = NYSE_CALENDAR.valid_days(start_date=candidate, end_date=candidate) if len(valid_days) == 0: return _next_trading_day(candidate) if nyse_time.time() >= time(16, 0): return _next_trading_day((nyse_time + pd.Timedelta(days=1)).date().isoformat()) return candidate def align_news_for_symbol(store: MarketStore, symbol: str, *, limit: int = 5000) -> int: """Fill missing trade_date values for one ticker.""" pending = store.get_news_without_trade_date(symbol, limit=limit) updates = [] for row in pending: trade_date = resolve_trade_date(row.get("published_utc")) if trade_date: updates.append( { "news_id": row["news_id"], "symbol": row["symbol"], "trade_date": trade_date, } ) if not updates: return 0 return store.set_trade_dates(updates)