Files
evotraders/backend/data/polygon_client.py
2026-03-30 17:46:44 +08:00

162 lines
4.8 KiB
Python

# -*- coding: utf-8 -*-
"""Polygon client used for long-lived market research ingestion."""
from __future__ import annotations
import os
import time
from datetime import datetime, timezone
from typing import Any, Optional
import requests
BASE = "https://api.polygon.io"
def _headers() -> dict[str, str]:
api_key = os.getenv("POLYGON_API_KEY", "").strip()
if not api_key:
raise ValueError("Missing required API key: POLYGON_API_KEY")
return {"Authorization": f"Bearer {api_key}"}
def http_get(
url: str,
params: Optional[dict[str, Any]] = None,
*,
max_retries: int = 8,
backoff: float = 2.0,
) -> requests.Response:
"""HTTP GET with exponential backoff and 429 handling."""
for attempt in range(max_retries):
try:
response = requests.get(
url,
params=params or {},
headers=_headers(),
timeout=30,
)
except requests.RequestException:
time.sleep((backoff**attempt) + 0.5)
if attempt == max_retries - 1:
raise
continue
if response.status_code == 429:
retry_after = response.headers.get("Retry-After")
wait = (
float(retry_after)
if retry_after and retry_after.isdigit()
else min((backoff**attempt) + 1.0, 60.0)
)
time.sleep(wait)
if attempt == max_retries - 1:
response.raise_for_status()
continue
if 500 <= response.status_code < 600:
time.sleep(min((backoff**attempt) + 1.0, 60.0))
if attempt == max_retries - 1:
response.raise_for_status()
continue
response.raise_for_status()
return response
raise RuntimeError("Unreachable")
def fetch_ticker_details(symbol: str) -> dict[str, Any]:
"""Fetch company metadata from Polygon."""
response = http_get(f"{BASE}/v3/reference/tickers/{symbol}")
return response.json().get("results", {}) or {}
def fetch_ohlc(symbol: str, start_date: str, end_date: str) -> list[dict[str, Any]]:
"""Fetch daily OHLC data from Polygon."""
response = http_get(
f"{BASE}/v2/aggs/ticker/{symbol}/range/1/day/{start_date}/{end_date}",
params={"adjusted": "true", "sort": "asc", "limit": 50000},
)
results = response.json().get("results") or []
rows: list[dict[str, Any]] = []
for item in results:
rows.append(
{
"date": datetime.fromtimestamp(
int(item["t"]) / 1000,
tz=timezone.utc,
).date().isoformat(),
"open": item.get("o"),
"high": item.get("h"),
"low": item.get("l"),
"close": item.get("c"),
"volume": item.get("v"),
"vwap": item.get("vw"),
"transactions": item.get("n"),
}
)
return rows
def fetch_news(
symbol: str,
start_date: str,
end_date: str,
*,
per_page: int = 50,
page_sleep: float = 1.2,
max_pages: Optional[int] = None,
) -> list[dict[str, Any]]:
"""Fetch all Polygon news for a ticker, with pagination."""
url = f"{BASE}/v2/reference/news"
params = {
"ticker": symbol,
"published_utc.gte": start_date,
"published_utc.lte": end_date,
"limit": per_page,
"order": "asc",
}
next_url: Optional[str] = None
pages = 0
all_articles: list[dict[str, Any]] = []
seen_ids: set[str] = set()
while True:
response = http_get(next_url or url, params=None if next_url else params)
data = response.json()
results = data.get("results") or []
if not results:
break
for item in results:
article_id = item.get("id")
if article_id and article_id in seen_ids:
continue
all_articles.append(
{
"id": article_id,
"publisher": (item.get("publisher") or {}).get("name"),
"title": item.get("title"),
"author": item.get("author"),
"published_utc": item.get("published_utc"),
"amp_url": item.get("amp_url"),
"article_url": item.get("article_url"),
"tickers": item.get("tickers"),
"description": item.get("description"),
"insights": item.get("insights"),
}
)
if article_id:
seen_ids.add(article_id)
next_url = data.get("next_url")
pages += 1
if max_pages is not None and pages >= max_pages:
break
if not next_url:
break
time.sleep(page_sleep)
return all_articles