Initial commit of integrated agent system
This commit is contained in:
2
backend/enrich/__init__.py
Normal file
2
backend/enrich/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
||||
"""News enrichment utilities for explain-oriented market research."""
|
||||
|
||||
301
backend/enrich/llm_enricher.py
Normal file
301
backend/enrich/llm_enricher.py
Normal file
@@ -0,0 +1,301 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Optional AgentScope-backed news enrichment with safe local fallback."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from backend.config.env_config import canonicalize_model_provider, get_env_bool, get_env_str
|
||||
from backend.llm.models import create_model
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class EnrichedNewsItem(BaseModel):
|
||||
"""Structured output schema for one enriched article."""
|
||||
|
||||
id: str = Field(description="The source article id")
|
||||
relevance: str = Field(description="One of high, medium, low")
|
||||
sentiment: str = Field(description="One of positive, negative, neutral")
|
||||
key_discussion: str = Field(description="Concise core discussion")
|
||||
summary: str = Field(description="Concise factual summary")
|
||||
reason_growth: str = Field(description="Growth-oriented reason if present")
|
||||
reason_decrease: str = Field(description="Downside-oriented reason if present")
|
||||
|
||||
|
||||
class EnrichedNewsBatch(BaseModel):
|
||||
"""Structured output schema for a batch of enriched articles."""
|
||||
|
||||
items: list[EnrichedNewsItem]
|
||||
|
||||
|
||||
class RangeAnalysisPayload(BaseModel):
|
||||
"""Structured output schema for range explanation text."""
|
||||
|
||||
summary: str = Field(description="Concise Chinese range summary for the selected window")
|
||||
trend_analysis: str = Field(description="Concise Chinese trend explanation for the selected window")
|
||||
bullish_factors: list[str] = Field(description="Top bullish factors in Chinese")
|
||||
bearish_factors: list[str] = Field(description="Top bearish factors in Chinese")
|
||||
|
||||
|
||||
def get_explain_model_info() -> dict[str, str]:
|
||||
"""Resolve provider/model used by explain enrichment."""
|
||||
provider = canonicalize_model_provider(
|
||||
get_env_str("EXPLAIN_ENRICH_MODEL_PROVIDER")
|
||||
or get_env_str("MODEL_PROVIDER", "OPENAI"),
|
||||
)
|
||||
model_name = get_env_str("EXPLAIN_ENRICH_MODEL_NAME") or get_env_str(
|
||||
"MODEL_NAME",
|
||||
"gpt-4o-mini",
|
||||
)
|
||||
return {
|
||||
"provider": provider,
|
||||
"model_name": model_name,
|
||||
"label": f"{provider}:{model_name}",
|
||||
}
|
||||
|
||||
|
||||
def _normalize_enrichment_payload(payload: Any) -> dict[str, Any] | None:
|
||||
if isinstance(payload, BaseModel):
|
||||
payload = payload.model_dump()
|
||||
if not isinstance(payload, dict):
|
||||
return None
|
||||
return {
|
||||
"relevance": str(payload.get("relevance") or "").strip().lower() or None,
|
||||
"sentiment": str(payload.get("sentiment") or "").strip().lower() or None,
|
||||
"key_discussion": str(payload.get("key_discussion") or "").strip() or None,
|
||||
"summary": str(payload.get("summary") or "").strip() or None,
|
||||
"reason_growth": str(payload.get("reason_growth") or "").strip() or None,
|
||||
"reason_decrease": str(payload.get("reason_decrease") or "").strip() or None,
|
||||
"raw_json": payload,
|
||||
}
|
||||
|
||||
|
||||
def _run_async(coro: Any) -> Any:
|
||||
"""Run an async AgentScope model call from sync code, even inside a running loop."""
|
||||
try:
|
||||
asyncio.get_running_loop()
|
||||
except RuntimeError:
|
||||
return asyncio.run(coro)
|
||||
|
||||
with ThreadPoolExecutor(max_workers=1) as executor:
|
||||
future = executor.submit(asyncio.run, coro)
|
||||
return future.result()
|
||||
|
||||
|
||||
def _get_explain_model():
|
||||
"""Create an AgentScope model for explain enrichment."""
|
||||
model_info = get_explain_model_info()
|
||||
return create_model(
|
||||
model_name=model_info["model_name"],
|
||||
provider=model_info["provider"],
|
||||
stream=False,
|
||||
generate_kwargs={"temperature": 0.1},
|
||||
)
|
||||
|
||||
|
||||
def llm_enrichment_enabled() -> bool:
|
||||
"""Return whether AgentScope-backed LLM enrichment should be attempted."""
|
||||
if not get_env_bool("EXPLAIN_ENRICH_USE_LLM", False):
|
||||
return False
|
||||
provider = get_explain_model_info()["provider"]
|
||||
provider_key_map = {
|
||||
"OPENAI": "OPENAI_API_KEY",
|
||||
"ANTHROPIC": "ANTHROPIC_API_KEY",
|
||||
"DASHSCOPE": "DASHSCOPE_API_KEY",
|
||||
"ALIBABA": "DASHSCOPE_API_KEY",
|
||||
"GEMINI": "GOOGLE_API_KEY",
|
||||
"GOOGLE": "GOOGLE_API_KEY",
|
||||
"DEEPSEEK": "DEEPSEEK_API_KEY",
|
||||
"GROQ": "GROQ_API_KEY",
|
||||
"OPENROUTER": "OPENROUTER_API_KEY",
|
||||
}
|
||||
env_key = provider_key_map.get(provider)
|
||||
return bool(get_env_str(env_key)) if env_key else provider == "OLLAMA"
|
||||
|
||||
|
||||
def llm_range_analysis_enabled() -> bool:
|
||||
"""Return whether LLM range analysis should be attempted."""
|
||||
raw_value = get_env_str("EXPLAIN_RANGE_USE_LLM")
|
||||
if raw_value is not None and str(raw_value).strip() != "":
|
||||
return get_env_bool("EXPLAIN_RANGE_USE_LLM", False) and llm_enrichment_enabled()
|
||||
return llm_enrichment_enabled()
|
||||
|
||||
|
||||
def analyze_news_row_with_llm(row: dict[str, Any]) -> dict[str, Any] | None:
|
||||
"""Generate explain-oriented structured analysis for one article."""
|
||||
if not llm_enrichment_enabled():
|
||||
return None
|
||||
|
||||
model = _get_explain_model()
|
||||
title = str(row.get("title") or "").strip()
|
||||
summary = str(row.get("summary") or "").strip()
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
"You produce concise structured financial news analysis. "
|
||||
"Use only the requested fields and keep content factual."
|
||||
),
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
"Analyze this stock-news article for an explain UI.\n"
|
||||
"Rules:\n"
|
||||
"- relevance must be one of: high, medium, low\n"
|
||||
"- sentiment must be one of: positive, negative, neutral\n"
|
||||
"- keep each text field concise and factual\n"
|
||||
f"- article id: {str(row.get('id') or '').strip()}\n"
|
||||
f"Title: {title}\n"
|
||||
f"Summary: {summary}\n"
|
||||
),
|
||||
},
|
||||
]
|
||||
try:
|
||||
response = _run_async(model(messages=messages, structured_model=EnrichedNewsItem))
|
||||
except Exception as e:
|
||||
logger.warning(f"LLM enrichment failed: {e}")
|
||||
return None
|
||||
|
||||
payload = _normalize_enrichment_payload(getattr(response, "metadata", None))
|
||||
if payload:
|
||||
payload.setdefault("raw_json", {})
|
||||
payload["raw_json"]["model_provider"] = get_explain_model_info()["provider"]
|
||||
payload["raw_json"]["model_name"] = get_explain_model_info()["model_name"]
|
||||
payload["raw_json"]["model_label"] = get_explain_model_info()["label"]
|
||||
return payload
|
||||
|
||||
|
||||
def analyze_news_rows_with_llm(rows: list[dict[str, Any]]) -> dict[str, dict[str, Any]]:
|
||||
"""Generate structured analysis for multiple articles in one request."""
|
||||
if not llm_enrichment_enabled() or not rows:
|
||||
return {}
|
||||
|
||||
payload_rows = [
|
||||
{
|
||||
"id": str(row.get("id") or "").strip(),
|
||||
"title": str(row.get("title") or "").strip(),
|
||||
"summary": str(row.get("summary") or "").strip(),
|
||||
}
|
||||
for row in rows
|
||||
if str(row.get("id") or "").strip()
|
||||
]
|
||||
if not payload_rows:
|
||||
return {}
|
||||
|
||||
model = _get_explain_model()
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
"You produce concise structured financial news analysis in JSON. "
|
||||
"Preserve ids exactly and do not invent extra items."
|
||||
),
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
"Analyze these stock-news articles for an explain UI.\n"
|
||||
"For each item return: id, relevance, sentiment, key_discussion, summary, "
|
||||
"reason_growth, reason_decrease.\n"
|
||||
"Rules:\n"
|
||||
"- relevance must be one of: high, medium, low\n"
|
||||
"- sentiment must be one of: positive, negative, neutral\n"
|
||||
"- keep all text concise and factual\n"
|
||||
f"Articles: {payload_rows}"
|
||||
),
|
||||
},
|
||||
]
|
||||
try:
|
||||
response = _run_async(
|
||||
model(messages=messages, structured_model=EnrichedNewsBatch),
|
||||
)
|
||||
except Exception:
|
||||
return {}
|
||||
|
||||
metadata = getattr(response, "metadata", None)
|
||||
if isinstance(metadata, BaseModel):
|
||||
metadata = metadata.model_dump()
|
||||
items = metadata.get("items") if isinstance(metadata, dict) else None
|
||||
if not isinstance(items, list):
|
||||
return {}
|
||||
|
||||
results: dict[str, dict[str, Any]] = {}
|
||||
for item in items:
|
||||
normalized = _normalize_enrichment_payload(item)
|
||||
news_id = str((item.model_dump() if isinstance(item, BaseModel) else item).get("id") or "").strip() if isinstance(item, (dict, BaseModel)) else ""
|
||||
if normalized and news_id:
|
||||
normalized.setdefault("raw_json", {})
|
||||
normalized["raw_json"]["model_provider"] = get_explain_model_info()["provider"]
|
||||
normalized["raw_json"]["model_name"] = get_explain_model_info()["model_name"]
|
||||
normalized["raw_json"]["model_label"] = get_explain_model_info()["label"]
|
||||
results[news_id] = normalized
|
||||
return results
|
||||
|
||||
|
||||
def analyze_range_with_llm(payload: dict[str, Any]) -> dict[str, Any] | None:
|
||||
"""Generate explain-oriented range summary and factor refinement."""
|
||||
if not llm_range_analysis_enabled():
|
||||
return None
|
||||
|
||||
model = _get_explain_model()
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
"You write concise Chinese stock range analysis for an explain UI. "
|
||||
"Use only the supplied facts. Keep the tone factual and analyst-like."
|
||||
),
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
"请基于给定事实生成区间分析。\n"
|
||||
"输出字段:summary, trend_analysis, bullish_factors, bearish_factors。\n"
|
||||
"要求:\n"
|
||||
"- 全部使用简体中文\n"
|
||||
"- summary 1到2句,概括区间走势、新闻密度和主导主题\n"
|
||||
"- trend_analysis 1句,解释区间内部阶段变化\n"
|
||||
"- bullish_factors 和 bearish_factors 各返回最多3条短句\n"
|
||||
"- 不要编造未提供的信息\n"
|
||||
f"事实数据: {payload}"
|
||||
),
|
||||
},
|
||||
]
|
||||
try:
|
||||
response = _run_async(
|
||||
model(messages=messages, structured_model=RangeAnalysisPayload),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"LLM enrichment failed: {e}")
|
||||
return None
|
||||
|
||||
metadata = getattr(response, "metadata", None)
|
||||
if isinstance(metadata, BaseModel):
|
||||
metadata = metadata.model_dump()
|
||||
if not isinstance(metadata, dict):
|
||||
return None
|
||||
|
||||
return {
|
||||
"summary": str(metadata.get("summary") or "").strip() or None,
|
||||
"trend_analysis": str(metadata.get("trend_analysis") or "").strip() or None,
|
||||
"bullish_factors": [
|
||||
str(item).strip()
|
||||
for item in list(metadata.get("bullish_factors") or [])
|
||||
if str(item).strip()
|
||||
][:3],
|
||||
"bearish_factors": [
|
||||
str(item).strip()
|
||||
for item in list(metadata.get("bearish_factors") or [])
|
||||
if str(item).strip()
|
||||
][:3],
|
||||
"model_provider": get_explain_model_info()["provider"],
|
||||
"model_name": get_explain_model_info()["model_name"],
|
||||
"model_label": get_explain_model_info()["label"],
|
||||
}
|
||||
362
backend/enrich/news_enricher.py
Normal file
362
backend/enrich/news_enricher.py
Normal file
@@ -0,0 +1,362 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Lightweight news enrichment for explain-oriented market analysis."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
from typing import Any
|
||||
|
||||
from backend.config.env_config import get_env_int
|
||||
from backend.enrich.llm_enricher import (
|
||||
analyze_news_row_with_llm,
|
||||
analyze_news_rows_with_llm,
|
||||
llm_enrichment_enabled,
|
||||
)
|
||||
from backend.data.market_store import MarketStore
|
||||
|
||||
|
||||
POSITIVE_KEYWORDS = (
|
||||
"beat", "surge", "gain", "growth", "record", "upgrade", "strong",
|
||||
"partnership", "approved", "launch", "expands", "profit",
|
||||
)
|
||||
NEGATIVE_KEYWORDS = (
|
||||
"miss", "drop", "fall", "cut", "downgrade", "weak", "warning",
|
||||
"delay", "lawsuit", "probe", "tariff", "decline", "layoff",
|
||||
)
|
||||
HIGH_RELEVANCE_KEYWORDS = (
|
||||
"earnings", "guidance", "profit", "revenue", "ceo", "fda", "tariff",
|
||||
"regulation", "acquisition", "buyback", "forecast", "launch",
|
||||
)
|
||||
|
||||
|
||||
def _dedupe_key(row: dict[str, Any]) -> str:
|
||||
trade_date = str(row.get("trade_date") or row.get("date") or "")[:10]
|
||||
title = str(row.get("title") or "").strip().lower()
|
||||
summary = str(row.get("summary") or "").strip().lower()[:160]
|
||||
raw = f"{trade_date}::{title}::{summary}"
|
||||
return hashlib.sha1(raw.encode("utf-8")).hexdigest()
|
||||
|
||||
|
||||
def _chunk_rows(rows: list[dict[str, Any]], size: int) -> list[list[dict[str, Any]]]:
|
||||
chunk_size = max(1, int(size))
|
||||
return [rows[index:index + chunk_size] for index in range(0, len(rows), chunk_size)]
|
||||
|
||||
|
||||
def classify_news_row(row: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Return a lightweight explain-oriented analysis for one article."""
|
||||
llm_result = analyze_news_row_with_llm(row)
|
||||
if isinstance(llm_result, dict):
|
||||
merged = dict(llm_result)
|
||||
merged.setdefault("summary", str(row.get("summary") or row.get("title") or "")[:280])
|
||||
merged.setdefault("raw_json", row)
|
||||
merged["analysis_source"] = "llm"
|
||||
return merged
|
||||
|
||||
title = str(row.get("title") or "").strip()
|
||||
summary = str(row.get("summary") or "").strip()
|
||||
text = f"{title} {summary}".lower()
|
||||
|
||||
positive_hits = [keyword for keyword in POSITIVE_KEYWORDS if keyword in text]
|
||||
negative_hits = [keyword for keyword in NEGATIVE_KEYWORDS if keyword in text]
|
||||
relevance_hits = [keyword for keyword in HIGH_RELEVANCE_KEYWORDS if keyword in text]
|
||||
|
||||
if len(positive_hits) > len(negative_hits):
|
||||
sentiment = "positive"
|
||||
elif len(negative_hits) > len(positive_hits):
|
||||
sentiment = "negative"
|
||||
else:
|
||||
sentiment = "neutral"
|
||||
|
||||
relevance = "high" if relevance_hits else "medium" if title else "low"
|
||||
summary_text = summary or title
|
||||
key_discussion = ""
|
||||
if relevance_hits:
|
||||
key_discussion = f"核心主题集中在 {', '.join(relevance_hits[:3])}"
|
||||
elif summary_text:
|
||||
key_discussion = summary_text[:160]
|
||||
|
||||
reason_growth = ""
|
||||
reason_decrease = ""
|
||||
if sentiment == "positive":
|
||||
reason_growth = summary_text[:200]
|
||||
elif sentiment == "negative":
|
||||
reason_decrease = summary_text[:200]
|
||||
|
||||
return {
|
||||
"relevance": relevance,
|
||||
"sentiment": sentiment,
|
||||
"key_discussion": key_discussion,
|
||||
"summary": summary_text[:280],
|
||||
"reason_growth": reason_growth,
|
||||
"reason_decrease": reason_decrease,
|
||||
"analysis_source": "local",
|
||||
"raw_json": row,
|
||||
}
|
||||
|
||||
|
||||
def attach_forward_returns(
|
||||
*,
|
||||
news_rows: list[dict[str, Any]],
|
||||
ohlc_rows: list[dict[str, Any]],
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Attach forward-return labels to each analyzed row."""
|
||||
if not ohlc_rows:
|
||||
return news_rows
|
||||
|
||||
closes_by_date = {
|
||||
str(row.get("date")): float(row.get("close"))
|
||||
for row in ohlc_rows
|
||||
if row.get("date") is not None and row.get("close") is not None
|
||||
}
|
||||
ordered_dates = [str(row.get("date")) for row in ohlc_rows if row.get("date") is not None]
|
||||
date_index = {date: idx for idx, date in enumerate(ordered_dates)}
|
||||
|
||||
horizons = {
|
||||
"ret_t0": 0,
|
||||
"ret_t1": 1,
|
||||
"ret_t3": 3,
|
||||
"ret_t5": 5,
|
||||
"ret_t10": 10,
|
||||
}
|
||||
|
||||
enriched: list[dict[str, Any]] = []
|
||||
for row in news_rows:
|
||||
trade_date = str(row.get("trade_date") or "")[:10]
|
||||
base_close = closes_by_date.get(trade_date)
|
||||
if not trade_date or base_close in (None, 0):
|
||||
enriched.append(row)
|
||||
continue
|
||||
|
||||
next_row = dict(row)
|
||||
base_index = date_index.get(trade_date)
|
||||
if base_index is None:
|
||||
enriched.append(next_row)
|
||||
continue
|
||||
|
||||
for field, offset in horizons.items():
|
||||
target_index = base_index + offset
|
||||
if target_index >= len(ordered_dates):
|
||||
next_row[field] = None
|
||||
continue
|
||||
target_close = closes_by_date.get(ordered_dates[target_index])
|
||||
next_row[field] = (
|
||||
(float(target_close) - float(base_close)) / float(base_close)
|
||||
if target_close not in (None, 0)
|
||||
else None
|
||||
)
|
||||
enriched.append(next_row)
|
||||
return enriched
|
||||
|
||||
|
||||
def build_analysis_rows(
|
||||
*,
|
||||
symbol: str,
|
||||
news_rows: list[dict[str, Any]],
|
||||
ohlc_rows: list[dict[str, Any]],
|
||||
) -> tuple[list[dict[str, Any]], dict[str, int]]:
|
||||
"""Transform raw news rows into market_store news_analysis payloads plus stats."""
|
||||
llm_results: dict[str, dict[str, Any]] = {}
|
||||
if llm_enrichment_enabled():
|
||||
batch_size = get_env_int("EXPLAIN_ENRICH_BATCH_SIZE", 8)
|
||||
for chunk in _chunk_rows(news_rows, batch_size):
|
||||
llm_results.update(analyze_news_rows_with_llm(chunk))
|
||||
|
||||
staged_rows: list[dict[str, Any]] = []
|
||||
seen_dedupe_keys: set[str] = set()
|
||||
deduped_count = 0
|
||||
llm_count = 0
|
||||
local_count = 0
|
||||
for row in news_rows:
|
||||
news_id = str(row.get("id") or "").strip()
|
||||
if not news_id:
|
||||
continue
|
||||
dedupe_key = _dedupe_key(row)
|
||||
if dedupe_key in seen_dedupe_keys:
|
||||
deduped_count += 1
|
||||
continue
|
||||
seen_dedupe_keys.add(dedupe_key)
|
||||
batch_result = llm_results.get(news_id)
|
||||
if isinstance(batch_result, dict):
|
||||
analysis = dict(batch_result)
|
||||
analysis.setdefault("summary", str(row.get("summary") or row.get("title") or "")[:280])
|
||||
analysis.setdefault("raw_json", row)
|
||||
analysis["analysis_source"] = "llm"
|
||||
llm_count += 1
|
||||
else:
|
||||
analysis = classify_news_row(row)
|
||||
if analysis.get("analysis_source") == "llm":
|
||||
llm_count += 1
|
||||
else:
|
||||
local_count += 1
|
||||
staged_rows.append(
|
||||
{
|
||||
"news_id": news_id,
|
||||
"trade_date": str(row.get("trade_date") or "")[:10] or None,
|
||||
**analysis,
|
||||
}
|
||||
)
|
||||
return (
|
||||
attach_forward_returns(news_rows=staged_rows, ohlc_rows=ohlc_rows),
|
||||
{
|
||||
"deduped_count": deduped_count,
|
||||
"llm_count": llm_count,
|
||||
"local_count": local_count,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def enrich_news_for_symbol(
|
||||
store: MarketStore,
|
||||
symbol: str,
|
||||
*,
|
||||
start_date: str | None = None,
|
||||
end_date: str | None = None,
|
||||
limit: int = 200,
|
||||
analysis_source: str = "local",
|
||||
skip_existing: bool = True,
|
||||
only_reanalyze_local: bool = False,
|
||||
) -> dict[str, Any]:
|
||||
"""Read raw market news, compute explain fields, and persist them."""
|
||||
normalized_symbol = str(symbol or "").strip().upper()
|
||||
if not normalized_symbol:
|
||||
return {"symbol": "", "analyzed": 0}
|
||||
|
||||
news_rows = store.get_news_items(
|
||||
normalized_symbol,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
limit=limit,
|
||||
)
|
||||
total_news_count = len(news_rows)
|
||||
skipped_existing_count = 0
|
||||
analyzed_sources: dict[str, str] = {}
|
||||
skipped_missing_analysis_count = 0
|
||||
skipped_non_local_count = 0
|
||||
if news_rows and only_reanalyze_local:
|
||||
analyzed_sources = store.get_analyzed_news_sources(
|
||||
normalized_symbol,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
skipped_missing_analysis_count = sum(
|
||||
1
|
||||
for row in news_rows
|
||||
if str(row.get("id") or "").strip() not in analyzed_sources
|
||||
)
|
||||
skipped_non_local_count = sum(
|
||||
1
|
||||
for row in news_rows
|
||||
if str(row.get("id") or "").strip() in analyzed_sources
|
||||
and analyzed_sources.get(str(row.get("id") or "").strip()) != "local"
|
||||
)
|
||||
skipped_existing_count = sum(
|
||||
1
|
||||
for row in news_rows
|
||||
if str(row.get("id") or "").strip() not in analyzed_sources
|
||||
or analyzed_sources.get(str(row.get("id") or "").strip()) != "local"
|
||||
)
|
||||
news_rows = [
|
||||
row for row in news_rows
|
||||
if analyzed_sources.get(str(row.get("id") or "").strip()) == "local"
|
||||
]
|
||||
elif skip_existing and news_rows:
|
||||
analyzed_ids = store.get_analyzed_news_ids(
|
||||
normalized_symbol,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
skipped_existing_count = sum(
|
||||
1
|
||||
for row in news_rows
|
||||
if str(row.get("id") or "").strip() in analyzed_ids
|
||||
)
|
||||
news_rows = [
|
||||
row for row in news_rows
|
||||
if str(row.get("id") or "").strip() not in analyzed_ids
|
||||
]
|
||||
ohlc_start = start_date or (news_rows[-1]["trade_date"] if news_rows and news_rows[-1].get("trade_date") else None)
|
||||
ohlc_end = end_date or (news_rows[0]["trade_date"] if news_rows and news_rows[0].get("trade_date") else None)
|
||||
ohlc_rows = (
|
||||
store.get_ohlc(normalized_symbol, ohlc_start, ohlc_end)
|
||||
if ohlc_start and ohlc_end
|
||||
else []
|
||||
)
|
||||
analysis_rows, stats = build_analysis_rows(
|
||||
symbol=normalized_symbol,
|
||||
news_rows=news_rows,
|
||||
ohlc_rows=ohlc_rows,
|
||||
)
|
||||
analyzed = store.upsert_news_analysis(
|
||||
normalized_symbol,
|
||||
analysis_rows,
|
||||
analysis_source=analysis_source,
|
||||
)
|
||||
upgraded_dates = sorted(
|
||||
{
|
||||
str(row.get("trade_date") or "")[:10]
|
||||
for row in analysis_rows
|
||||
if str(row.get("analysis_source") or "").strip().lower() == "llm"
|
||||
and str(row.get("trade_date") or "").strip()
|
||||
}
|
||||
)
|
||||
remaining_local_titles = [
|
||||
str(row.get("title") or row.get("news_id") or "").strip()
|
||||
for row in news_rows
|
||||
for analyzed_row in analysis_rows
|
||||
if str(analyzed_row.get("news_id") or "").strip() == str(row.get("id") or "").strip()
|
||||
and str(analyzed_row.get("analysis_source") or "").strip().lower() == "local"
|
||||
][:5]
|
||||
return {
|
||||
"symbol": normalized_symbol,
|
||||
"analyzed": analyzed,
|
||||
"news_count": total_news_count,
|
||||
"queued_count": len(news_rows),
|
||||
"skipped_existing_count": skipped_existing_count,
|
||||
"deduped_count": stats["deduped_count"],
|
||||
"llm_count": stats["llm_count"],
|
||||
"local_count": stats["local_count"],
|
||||
"only_reanalyze_local": only_reanalyze_local,
|
||||
"upgraded_local_to_llm_count": (
|
||||
stats["llm_count"]
|
||||
if only_reanalyze_local
|
||||
else 0
|
||||
),
|
||||
"execution_summary": {
|
||||
"upgraded_dates": upgraded_dates[:5],
|
||||
"remaining_local_titles": remaining_local_titles,
|
||||
"skipped_missing_analysis_count": skipped_missing_analysis_count,
|
||||
"skipped_non_local_count": skipped_non_local_count,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def enrich_symbols(
|
||||
store: MarketStore,
|
||||
symbols: list[str],
|
||||
*,
|
||||
start_date: str | None = None,
|
||||
end_date: str | None = None,
|
||||
limit: int = 200,
|
||||
analysis_source: str = "local",
|
||||
skip_existing: bool = True,
|
||||
only_reanalyze_local: bool = False,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Batch enrich multiple symbols for explain-oriented news analysis."""
|
||||
results = []
|
||||
for symbol in symbols:
|
||||
normalized_symbol = str(symbol or "").strip().upper()
|
||||
if not normalized_symbol:
|
||||
continue
|
||||
results.append(
|
||||
enrich_news_for_symbol(
|
||||
store,
|
||||
normalized_symbol,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
limit=limit,
|
||||
analysis_source=analysis_source,
|
||||
skip_existing=skip_existing,
|
||||
only_reanalyze_local=only_reanalyze_local,
|
||||
)
|
||||
)
|
||||
return results
|
||||
Reference in New Issue
Block a user