Add explain analysis workflow and UI
This commit is contained in:
64
backend/data/news_alignment.py
Normal file
64
backend/data/news_alignment.py
Normal file
@@ -0,0 +1,64 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Align persisted news to the nearest NYSE trading date."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import time
|
||||
|
||||
import pandas as pd
|
||||
import pandas_market_calendars as mcal
|
||||
|
||||
from backend.data.market_store import MarketStore
|
||||
|
||||
|
||||
NYSE_CALENDAR = mcal.get_calendar("NYSE")
|
||||
|
||||
|
||||
def _next_trading_day(date_str: str) -> str:
|
||||
start = pd.Timestamp(date_str).tz_localize(None)
|
||||
sessions = NYSE_CALENDAR.valid_days(
|
||||
start_date=(start - pd.Timedelta(days=1)).strftime("%Y-%m-%d"),
|
||||
end_date=(start + pd.Timedelta(days=10)).strftime("%Y-%m-%d"),
|
||||
)
|
||||
future = [
|
||||
pd.Timestamp(day).tz_localize(None).strftime("%Y-%m-%d")
|
||||
for day in sessions
|
||||
if pd.Timestamp(day).tz_localize(None) >= start
|
||||
]
|
||||
return future[0] if future else date_str
|
||||
|
||||
|
||||
def resolve_trade_date(published_utc: str | None) -> str | None:
|
||||
"""Map a published timestamp to an NYSE trade date."""
|
||||
if not published_utc:
|
||||
return None
|
||||
timestamp = pd.to_datetime(published_utc, utc=True, errors="coerce")
|
||||
if pd.isna(timestamp):
|
||||
return None
|
||||
nyse_time = timestamp.tz_convert("America/New_York")
|
||||
candidate = nyse_time.date().isoformat()
|
||||
valid_days = NYSE_CALENDAR.valid_days(start_date=candidate, end_date=candidate)
|
||||
if len(valid_days) == 0:
|
||||
return _next_trading_day(candidate)
|
||||
if nyse_time.time() >= time(16, 0):
|
||||
return _next_trading_day((nyse_time + pd.Timedelta(days=1)).date().isoformat())
|
||||
return candidate
|
||||
|
||||
|
||||
def align_news_for_symbol(store: MarketStore, symbol: str, *, limit: int = 5000) -> int:
|
||||
"""Fill missing trade_date values for one ticker."""
|
||||
pending = store.get_news_without_trade_date(symbol, limit=limit)
|
||||
updates = []
|
||||
for row in pending:
|
||||
trade_date = resolve_trade_date(row.get("published_utc"))
|
||||
if trade_date:
|
||||
updates.append(
|
||||
{
|
||||
"news_id": row["news_id"],
|
||||
"symbol": row["symbol"],
|
||||
"trade_date": trade_date,
|
||||
}
|
||||
)
|
||||
if not updates:
|
||||
return 0
|
||||
return store.set_trade_dates(updates)
|
||||
Reference in New Issue
Block a user