Initial commit of integrated agent system
This commit is contained in:
161
backend/data/polygon_client.py
Normal file
161
backend/data/polygon_client.py
Normal file
@@ -0,0 +1,161 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Polygon client used for long-lived market research ingestion."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, Optional
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
BASE = "https://api.polygon.io"
|
||||
|
||||
|
||||
def _headers() -> dict[str, str]:
|
||||
api_key = os.getenv("POLYGON_API_KEY", "").strip()
|
||||
if not api_key:
|
||||
raise ValueError("Missing required API key: POLYGON_API_KEY")
|
||||
return {"Authorization": f"Bearer {api_key}"}
|
||||
|
||||
|
||||
def http_get(
|
||||
url: str,
|
||||
params: Optional[dict[str, Any]] = None,
|
||||
*,
|
||||
max_retries: int = 8,
|
||||
backoff: float = 2.0,
|
||||
) -> requests.Response:
|
||||
"""HTTP GET with exponential backoff and 429 handling."""
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
response = requests.get(
|
||||
url,
|
||||
params=params or {},
|
||||
headers=_headers(),
|
||||
timeout=30,
|
||||
)
|
||||
except requests.RequestException:
|
||||
time.sleep((backoff**attempt) + 0.5)
|
||||
if attempt == max_retries - 1:
|
||||
raise
|
||||
continue
|
||||
|
||||
if response.status_code == 429:
|
||||
retry_after = response.headers.get("Retry-After")
|
||||
wait = (
|
||||
float(retry_after)
|
||||
if retry_after and retry_after.isdigit()
|
||||
else min((backoff**attempt) + 1.0, 60.0)
|
||||
)
|
||||
time.sleep(wait)
|
||||
if attempt == max_retries - 1:
|
||||
response.raise_for_status()
|
||||
continue
|
||||
|
||||
if 500 <= response.status_code < 600:
|
||||
time.sleep(min((backoff**attempt) + 1.0, 60.0))
|
||||
if attempt == max_retries - 1:
|
||||
response.raise_for_status()
|
||||
continue
|
||||
|
||||
response.raise_for_status()
|
||||
return response
|
||||
raise RuntimeError("Unreachable")
|
||||
|
||||
|
||||
def fetch_ticker_details(symbol: str) -> dict[str, Any]:
|
||||
"""Fetch company metadata from Polygon."""
|
||||
response = http_get(f"{BASE}/v3/reference/tickers/{symbol}")
|
||||
return response.json().get("results", {}) or {}
|
||||
|
||||
|
||||
def fetch_ohlc(symbol: str, start_date: str, end_date: str) -> list[dict[str, Any]]:
|
||||
"""Fetch daily OHLC data from Polygon."""
|
||||
response = http_get(
|
||||
f"{BASE}/v2/aggs/ticker/{symbol}/range/1/day/{start_date}/{end_date}",
|
||||
params={"adjusted": "true", "sort": "asc", "limit": 50000},
|
||||
)
|
||||
results = response.json().get("results") or []
|
||||
rows: list[dict[str, Any]] = []
|
||||
for item in results:
|
||||
rows.append(
|
||||
{
|
||||
"date": datetime.fromtimestamp(
|
||||
int(item["t"]) / 1000,
|
||||
tz=timezone.utc,
|
||||
).date().isoformat(),
|
||||
"open": item.get("o"),
|
||||
"high": item.get("h"),
|
||||
"low": item.get("l"),
|
||||
"close": item.get("c"),
|
||||
"volume": item.get("v"),
|
||||
"vwap": item.get("vw"),
|
||||
"transactions": item.get("n"),
|
||||
}
|
||||
)
|
||||
return rows
|
||||
|
||||
|
||||
def fetch_news(
|
||||
symbol: str,
|
||||
start_date: str,
|
||||
end_date: str,
|
||||
*,
|
||||
per_page: int = 50,
|
||||
page_sleep: float = 1.2,
|
||||
max_pages: Optional[int] = None,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Fetch all Polygon news for a ticker, with pagination."""
|
||||
url = f"{BASE}/v2/reference/news"
|
||||
params = {
|
||||
"ticker": symbol,
|
||||
"published_utc.gte": start_date,
|
||||
"published_utc.lte": end_date,
|
||||
"limit": per_page,
|
||||
"order": "asc",
|
||||
}
|
||||
next_url: Optional[str] = None
|
||||
pages = 0
|
||||
all_articles: list[dict[str, Any]] = []
|
||||
seen_ids: set[str] = set()
|
||||
|
||||
while True:
|
||||
response = http_get(next_url or url, params=None if next_url else params)
|
||||
data = response.json()
|
||||
results = data.get("results") or []
|
||||
if not results:
|
||||
break
|
||||
|
||||
for item in results:
|
||||
article_id = item.get("id")
|
||||
if article_id and article_id in seen_ids:
|
||||
continue
|
||||
all_articles.append(
|
||||
{
|
||||
"id": article_id,
|
||||
"publisher": (item.get("publisher") or {}).get("name"),
|
||||
"title": item.get("title"),
|
||||
"author": item.get("author"),
|
||||
"published_utc": item.get("published_utc"),
|
||||
"amp_url": item.get("amp_url"),
|
||||
"article_url": item.get("article_url"),
|
||||
"tickers": item.get("tickers"),
|
||||
"description": item.get("description"),
|
||||
"insights": item.get("insights"),
|
||||
}
|
||||
)
|
||||
if article_id:
|
||||
seen_ids.add(article_id)
|
||||
|
||||
next_url = data.get("next_url")
|
||||
pages += 1
|
||||
if max_pages is not None and pages >= max_pages:
|
||||
break
|
||||
if not next_url:
|
||||
break
|
||||
time.sleep(page_sleep)
|
||||
|
||||
return all_articles
|
||||
Reference in New Issue
Block a user