Initial commit of integrated agent system

This commit is contained in:
cillin
2026-03-30 17:46:44 +08:00
commit 0fa413380c
337 changed files with 75268 additions and 0 deletions

View File

@@ -0,0 +1,2 @@
# -*- coding: utf-8 -*-
"""Explain-oriented services for stock narratives and news research."""

View File

@@ -0,0 +1,69 @@
# -*- coding: utf-8 -*-
"""Rule-based news categorization for explain UI."""
from __future__ import annotations
from typing import Any, Dict, Iterable
CATEGORY_KEYWORDS = {
"market": [
"market", "stock", "rally", "sell-off", "selloff", "trading",
"wall street", "s&p", "nasdaq", "dow", "index", "bull", "bear",
"correction", "volatility",
],
"policy": [
"regulation", "fed", "federal reserve", "tariff", "sanction",
"interest rate", "policy", "government", "congress", "sec",
"trade war", "ban", "legislation", "tax",
],
"earnings": [
"earnings", "revenue", "profit", "quarter", "eps", "guidance",
"forecast", "income", "sales", "beat", "miss", "outlook",
"financial results",
],
"product_tech": [
"product", "ai", "chip", "cloud", "launch", "patent",
"technology", "innovation", "release", "platform", "model",
"software", "hardware", "gpu", "autonomous",
],
"competition": [
"competitor", "rival", "market share", "overtake", "compete",
"competition", "vs", "versus", "battle", "challenge",
],
"management": [
"ceo", "executive", "resign", "layoff", "restructure",
"management", "leadership", "appoint", "hire", "board",
"chairman",
],
}
def categorize_news_rows(rows: Iterable[Dict[str, Any]]) -> Dict[str, Dict[str, Any]]:
"""Bucket news rows by keyword categories."""
categories: Dict[str, Dict[str, Any]] = {
key: {
"label": key,
"count": 0,
"article_ids": [],
}
for key in CATEGORY_KEYWORDS
}
for row in rows:
text = " ".join(
[
str(row.get("title") or ""),
str(row.get("summary") or ""),
str(row.get("related") or ""),
str(row.get("category") or ""),
]
).lower()
article_id = row.get("id")
for category, keywords in CATEGORY_KEYWORDS.items():
if any(keyword in text for keyword in keywords):
categories[category]["count"] += 1
if article_id:
categories[category]["article_ids"].append(article_id)
return categories

View File

@@ -0,0 +1,214 @@
# -*- coding: utf-8 -*-
"""Local range explanation built from price and persisted news."""
from __future__ import annotations
from typing import Any, Dict
from backend.enrich.llm_enricher import analyze_range_with_llm
from backend.explain.category_engine import categorize_news_rows
from backend.tools.data_tools import get_prices
def _rank_event_score(row: Dict[str, Any]) -> float:
relevance = str(row.get("relevance") or "").strip().lower()
relevance_score = {"high": 3.0, "relevant": 3.0, "medium": 2.0, "low": 1.0}.get(
relevance,
0.5,
)
impact_score = abs(float(row.get("ret_t0") or 0.0)) * 100
return relevance_score + impact_score
def summarize_bullish_factors(
news_rows: list[Dict[str, Any]],
*,
limit: int = 5,
) -> list[str]:
factors = []
for row in news_rows:
if str(row.get("sentiment") or "").strip().lower() != "positive":
continue
candidate = row.get("reason_growth") or row.get("key_discussion") or row.get("summary") or row.get("title")
if candidate:
factors.append(str(candidate).strip())
seen = set()
output = []
for factor in factors:
if factor in seen:
continue
seen.add(factor)
output.append(factor[:200])
if len(output) >= limit:
break
return output
def summarize_bearish_factors(
news_rows: list[Dict[str, Any]],
*,
limit: int = 5,
) -> list[str]:
factors = []
for row in news_rows:
if str(row.get("sentiment") or "").strip().lower() != "negative":
continue
candidate = row.get("reason_decrease") or row.get("key_discussion") or row.get("summary") or row.get("title")
if candidate:
factors.append(str(candidate).strip())
seen = set()
output = []
for factor in factors:
if factor in seen:
continue
seen.add(factor)
output.append(factor[:200])
if len(output) >= limit:
break
return output
def build_trend_analysis(prices: list[Any]) -> str:
if len(prices) < 2:
return "区间样本较短,暂不具备足够趋势信息。"
if len(prices) < 3:
open_price = float(prices[0].open)
close_price = float(prices[-1].close)
change = ((close_price - open_price) / open_price) * 100 if open_price else 0.0
return f"短区间内价格变动 {change:+.2f}%,趋势信息有限。"
mid = len(prices) // 2
first_open = float(prices[0].open)
first_close = float(prices[mid].close)
second_open = float(prices[mid].open)
second_close = float(prices[-1].close)
first_half = ((first_close - first_open) / first_open) * 100 if first_open else 0.0
second_half = ((second_close - second_open) / second_open) * 100 if second_open else 0.0
return (
f"前半段{'上涨' if first_half >= 0 else '下跌'} {abs(first_half):.2f}%"
f"后半段{'上涨' if second_half >= 0 else '下跌'} {abs(second_half):.2f}%"
"说明价格驱动在区间内部出现了阶段性切换。"
)
def build_range_explanation(
*,
ticker: str,
start_date: str,
end_date: str,
news_rows: list[Dict[str, Any]],
) -> Dict[str, Any]:
"""Explain a price range with local price and news heuristics."""
prices = get_prices(ticker, start_date, end_date)
if not prices:
return {
"symbol": ticker,
"start_date": start_date,
"end_date": end_date,
"error": "No OHLC data for this range",
}
open_price = float(prices[0].open)
close_price = float(prices[-1].close)
high_price = max(float(price.high) for price in prices)
low_price = min(float(price.low) for price in prices)
total_volume = sum(int(price.volume) for price in prices)
price_change_pct = (
((close_price - open_price) / open_price) * 100 if open_price else 0.0
)
categories = categorize_news_rows(news_rows)
news_count = len(news_rows)
dominant_categories = sorted(
(
{"category": key, "count": value["count"]}
for key, value in categories.items()
if value["count"] > 0
),
key=lambda item: item["count"],
reverse=True,
)
direction = "上涨" if price_change_pct > 0 else "下跌" if price_change_pct < 0 else "横盘"
category_text = (
f"主要主题集中在 {', '.join(item['category'] for item in dominant_categories[:3])}"
if dominant_categories
else "区间内未识别出明显的主题聚类。"
)
summary = (
f"{ticker}{start_date}{end_date} 区间内{direction} {abs(price_change_pct):.2f}%"
f"区间覆盖 {len(prices)} 个交易日,关联新闻 {news_count} 条。{category_text}"
)
bullish_factors = summarize_bullish_factors(news_rows)
bearish_factors = summarize_bearish_factors(news_rows)
trend_analysis = build_trend_analysis(prices)
llm_source = "local"
range_payload = {
"ticker": ticker,
"start_date": start_date,
"end_date": end_date,
"price_change_pct": round(price_change_pct, 2),
"trading_days": len(prices),
"news_count": news_count,
"dominant_categories": dominant_categories[:5],
"bullish_factors": bullish_factors[:3],
"bearish_factors": bearish_factors[:3],
"trend_analysis": trend_analysis,
"top_news": [
{
"date": row.get("trade_date") or str(row.get("date") or "")[:10],
"title": row.get("title") or "",
"summary": row.get("summary") or "",
"sentiment": row.get("sentiment") or "",
"relevance": row.get("relevance") or "",
"ret_t0": row.get("ret_t0"),
}
for row in sorted(news_rows, key=_rank_event_score, reverse=True)[:5]
],
}
llm_analysis = analyze_range_with_llm(range_payload)
if isinstance(llm_analysis, dict):
summary = llm_analysis.get("summary") or summary
trend_analysis = llm_analysis.get("trend_analysis") or trend_analysis
bullish_factors = llm_analysis.get("bullish_factors") or bullish_factors
bearish_factors = llm_analysis.get("bearish_factors") or bearish_factors
llm_source = "llm"
key_events = [
{
"date": row.get("trade_date") or str(row.get("date") or "")[:10],
"title": row.get("title") or "Untitled news",
"summary": row.get("summary") or "",
"category": row.get("category") or "",
"id": row.get("id"),
"sentiment": row.get("sentiment"),
"ret_t0": row.get("ret_t0"),
}
for row in sorted(news_rows, key=_rank_event_score, reverse=True)[:8]
]
return {
"symbol": ticker,
"start_date": start_date,
"end_date": end_date,
"price_change_pct": round(price_change_pct, 2),
"open_price": open_price,
"close_price": close_price,
"high_price": high_price,
"low_price": low_price,
"total_volume": total_volume,
"trading_days": len(prices),
"news_count": news_count,
"dominant_categories": dominant_categories[:5],
"analysis": {
"summary": summary,
"key_events": key_events,
"bullish_factors": bullish_factors,
"bearish_factors": bearish_factors,
"trend_analysis": trend_analysis,
"analysis_source": llm_source,
"analysis_model_label": llm_analysis.get("model_label") if isinstance(llm_analysis, dict) else None,
},
}

View File

@@ -0,0 +1,202 @@
# -*- coding: utf-8 -*-
"""Same-ticker historical similar day search for explain view."""
from __future__ import annotations
from math import sqrt
from typing import Any
from backend.data.market_store import MarketStore
def _safe_float(value: Any, default: float = 0.0) -> float:
try:
parsed = float(value)
except (TypeError, ValueError):
return default
return parsed
def build_daily_feature_rows(
*,
symbol: str,
ohlc_rows: list[dict[str, Any]],
news_rows: list[dict[str, Any]],
) -> list[dict[str, Any]]:
"""Aggregate price/news context into daily feature rows."""
price_by_date = {str(row.get("date")): row for row in ohlc_rows if row.get("date")}
ordered_dates = [str(row.get("date")) for row in ohlc_rows if row.get("date")]
news_by_date: dict[str, list[dict[str, Any]]] = {}
for row in news_rows:
trade_date = str(row.get("trade_date") or "")[:10] or str(row.get("date") or "")[:10]
if not trade_date:
continue
news_by_date.setdefault(trade_date, []).append(row)
features: list[dict[str, Any]] = []
previous_close: float | None = None
for idx, date in enumerate(ordered_dates):
price_row = price_by_date[date]
close_price = _safe_float(price_row.get("close"))
open_price = _safe_float(price_row.get("open"), close_price)
day_news = news_by_date.get(date, [])
positive_count = sum(1 for item in day_news if str(item.get("sentiment") or "").lower() == "positive")
negative_count = sum(1 for item in day_news if str(item.get("sentiment") or "").lower() == "negative")
high_relevance_count = sum(
1 for item in day_news if str(item.get("relevance") or "").lower() in {"high", "relevant"}
)
ret_1d = (
((close_price - previous_close) / previous_close)
if previous_close not in (None, 0)
else 0.0
)
intraday_ret = ((close_price - open_price) / open_price) if open_price else 0.0
sentiment_score = (
(positive_count - negative_count) / max(len(day_news), 1)
if day_news
else 0.0
)
future_t1 = None
future_t3 = None
if idx + 1 < len(ordered_dates) and close_price:
next_close = _safe_float(price_by_date[ordered_dates[idx + 1]].get("close"))
future_t1 = ((next_close - close_price) / close_price) if next_close else None
if idx + 3 < len(ordered_dates) and close_price:
next_close = _safe_float(price_by_date[ordered_dates[idx + 3]].get("close"))
future_t3 = ((next_close - close_price) / close_price) if next_close else None
features.append(
{
"date": date,
"symbol": symbol,
"n_articles": len(day_news),
"positive_count": positive_count,
"negative_count": negative_count,
"high_relevance_count": high_relevance_count,
"sentiment_score": sentiment_score,
"ret_1d": ret_1d,
"intraday_ret": intraday_ret,
"close": close_price,
"ret_t1_after": future_t1,
"ret_t3_after": future_t3,
"news": [
{
"title": row.get("title") or "",
"sentiment": row.get("sentiment") or "neutral",
}
for row in day_news[:3]
],
}
)
previous_close = close_price
return features
def compute_similarity_scores(
target_vector: list[float],
candidate_vectors: list[tuple[str, list[float], dict[str, Any]]],
) -> list[dict[str, Any]]:
"""Return sorted similarity matches based on normalized Euclidean distance."""
if not candidate_vectors:
return []
dimensions = len(target_vector)
ranges = []
for dimension in range(dimensions):
values = [vector[1][dimension] for vector in candidate_vectors] + [target_vector[dimension]]
min_value = min(values)
max_value = max(values)
ranges.append(max(max_value - min_value, 1e-9))
scored = []
for date, vector, payload in candidate_vectors:
distance = sqrt(
sum(
((target_vector[i] - vector[i]) / ranges[i]) ** 2
for i in range(dimensions)
)
)
similarity = 1.0 / (1.0 + distance)
scored.append(
{
"date": date,
"score": round(similarity, 4),
**payload,
}
)
return sorted(scored, key=lambda item: item["score"], reverse=True)
def find_similar_days(
store: MarketStore,
*,
symbol: str,
target_date: str,
top_k: int = 10,
) -> dict[str, Any]:
"""Find same-ticker historical days most similar to a target day."""
cached = store.get_similar_day_cache(symbol, target_date=target_date)
if cached and cached.get("payload"):
return cached["payload"]
ohlc_rows = store.get_ohlc(symbol, "1900-01-01", target_date)
news_rows = store.get_news_items_enriched(symbol, end_date=target_date, limit=500)
daily_rows = build_daily_feature_rows(symbol=symbol, ohlc_rows=ohlc_rows, news_rows=news_rows)
feature_map = {row["date"]: row for row in daily_rows}
target_row = feature_map.get(target_date)
if not target_row:
return {
"symbol": symbol,
"target_date": target_date,
"items": [],
"error": "No feature row for target date",
}
vector_keys = [
"sentiment_score",
"n_articles",
"positive_count",
"negative_count",
"high_relevance_count",
"ret_1d",
"intraday_ret",
]
target_vector = [_safe_float(target_row.get(key)) for key in vector_keys]
candidates = []
for row in daily_rows:
date = row["date"]
if date == target_date:
continue
payload = {
"n_articles": row["n_articles"],
"sentiment_score": round(row["sentiment_score"], 4),
"ret_1d": round(row["ret_1d"] * 100, 2),
"intraday_ret": round(row["intraday_ret"] * 100, 2),
"ret_t1_after": round(row["ret_t1_after"] * 100, 2) if row["ret_t1_after"] is not None else None,
"ret_t3_after": round(row["ret_t3_after"] * 100, 2) if row["ret_t3_after"] is not None else None,
"top_reasons": [item["title"] for item in row["news"][:2] if item.get("title")],
"news": row["news"],
}
candidates.append(
(
date,
[_safe_float(row.get(key)) for key in vector_keys],
payload,
)
)
items = compute_similarity_scores(target_vector, candidates)[: max(1, min(int(top_k), 20))]
result = {
"symbol": symbol,
"target_date": target_date,
"target_features": {
"sentiment_score": round(target_row["sentiment_score"], 4),
"n_articles": target_row["n_articles"],
"ret_1d": round(target_row["ret_1d"] * 100, 2),
"intraday_ret": round(target_row["intraday_ret"] * 100, 2),
"high_relevance_count": target_row["high_relevance_count"],
},
"items": items,
}
store.upsert_similar_day_cache(symbol, target_date=target_date, payload=result, source="local")
return result

View File

@@ -0,0 +1,127 @@
# -*- coding: utf-8 -*-
"""Stock story generation for explain view."""
from __future__ import annotations
from datetime import datetime, timedelta
from typing import Any
from backend.data.market_store import MarketStore
def build_stock_story(
*,
symbol: str,
as_of_date: str,
price_rows: list[dict[str, Any]],
news_rows: list[dict[str, Any]],
) -> str:
"""Build a compact markdown story from enriched news and recent price action."""
lines = [f"## {symbol} Story", f"As of `{as_of_date}`"]
if not price_rows:
lines.append("")
lines.append("No OHLC data available for story generation.")
return "\n".join(lines)
open_price = float(price_rows[0].get("open") or price_rows[0].get("close") or 0.0)
close_price = float(price_rows[-1].get("close") or 0.0)
price_change = ((close_price - open_price) / open_price) * 100 if open_price else 0.0
high_price = max(float(row.get("high") or row.get("close") or 0.0) for row in price_rows)
low_price = min(float(row.get("low") or row.get("close") or 0.0) for row in price_rows)
lines.append("")
lines.append(
f"The stock moved {'up' if price_change >= 0 else 'down'} "
f"{abs(price_change):.2f}% over the recent window, trading between "
f"${low_price:.2f} and ${high_price:.2f}."
)
positive = [row for row in news_rows if str(row.get("sentiment") or "").lower() == "positive"]
negative = [row for row in news_rows if str(row.get("sentiment") or "").lower() == "negative"]
lines.append("")
lines.append(
f"Recent coverage included {len(news_rows)} relevant articles "
f"({len(positive)} positive / {len(negative)} negative)."
)
if news_rows:
lines.append("")
lines.append("### Key Moments")
ranked_rows = sorted(
news_rows,
key=lambda row: (
0 if str(row.get("relevance") or "").lower() in {"high", "relevant"} else 1,
-abs(float(row.get("ret_t0") or 0.0)),
),
)
for row in ranked_rows[:5]:
trade_date = row.get("trade_date") or str(row.get("date") or "")[:10]
title = row.get("title") or "Untitled"
key_discussion = row.get("key_discussion") or row.get("summary") or ""
sentiment = str(row.get("sentiment") or "neutral").lower()
lines.append(
f"- `{trade_date}` [{sentiment}] {title}: {str(key_discussion).strip()[:220]}"
)
if positive:
lines.append("")
lines.append("### Bullish Threads")
for row in positive[:3]:
reason = row.get("reason_growth") or row.get("key_discussion") or row.get("summary") or row.get("title")
lines.append(f"- {str(reason).strip()[:220]}")
if negative:
lines.append("")
lines.append("### Bearish Threads")
for row in negative[:3]:
reason = row.get("reason_decrease") or row.get("key_discussion") or row.get("summary") or row.get("title")
lines.append(f"- {str(reason).strip()[:220]}")
return "\n".join(lines)
def get_or_create_stock_story(
store: MarketStore,
*,
symbol: str,
as_of_date: str,
) -> dict[str, Any]:
"""Return cached story or build a new one from recent market context."""
cached = store.get_story_cache(symbol, as_of_date=as_of_date)
if cached:
return {
"symbol": symbol,
"as_of_date": as_of_date,
"story": cached.get("content") or "",
"source": cached.get("source") or "cache",
}
start_date = None
if len(as_of_date) >= 10:
target_date = datetime.strptime(as_of_date[:10], "%Y-%m-%d").date()
start_date = (target_date - timedelta(days=29)).isoformat()
price_rows = (
store.get_ohlc(symbol, start_date, as_of_date)
if start_date
else []
)
news_rows = store.get_news_items_enriched(
symbol,
start_date=start_date,
end_date=as_of_date,
limit=40,
)
story = build_stock_story(
symbol=symbol,
as_of_date=as_of_date,
price_rows=price_rows,
news_rows=news_rows,
)
store.upsert_story_cache(symbol, as_of_date=as_of_date, content=story, source="local")
return {
"symbol": symbol,
"as_of_date": as_of_date,
"story": story,
"source": "local",
}