# -*- coding: utf-8 -*- """Polygon client used for long-lived market research ingestion.""" from __future__ import annotations import os import time from datetime import datetime, timezone from typing import Any, Optional import requests BASE = "https://api.polygon.io" def _headers() -> dict[str, str]: api_key = os.getenv("POLYGON_API_KEY", "").strip() if not api_key: raise ValueError("Missing required API key: POLYGON_API_KEY") return {"Authorization": f"Bearer {api_key}"} def http_get( url: str, params: Optional[dict[str, Any]] = None, *, max_retries: int = 8, backoff: float = 2.0, ) -> requests.Response: """HTTP GET with exponential backoff and 429 handling.""" for attempt in range(max_retries): try: response = requests.get( url, params=params or {}, headers=_headers(), timeout=30, ) except requests.RequestException: time.sleep((backoff**attempt) + 0.5) if attempt == max_retries - 1: raise continue if response.status_code == 429: retry_after = response.headers.get("Retry-After") wait = ( float(retry_after) if retry_after and retry_after.isdigit() else min((backoff**attempt) + 1.0, 60.0) ) time.sleep(wait) if attempt == max_retries - 1: response.raise_for_status() continue if 500 <= response.status_code < 600: time.sleep(min((backoff**attempt) + 1.0, 60.0)) if attempt == max_retries - 1: response.raise_for_status() continue response.raise_for_status() return response raise RuntimeError("Unreachable") def fetch_ticker_details(symbol: str) -> dict[str, Any]: """Fetch company metadata from Polygon.""" response = http_get(f"{BASE}/v3/reference/tickers/{symbol}") return response.json().get("results", {}) or {} def fetch_ohlc(symbol: str, start_date: str, end_date: str) -> list[dict[str, Any]]: """Fetch daily OHLC data from Polygon.""" response = http_get( f"{BASE}/v2/aggs/ticker/{symbol}/range/1/day/{start_date}/{end_date}", params={"adjusted": "true", "sort": "asc", "limit": 50000}, ) results = response.json().get("results") or [] rows: list[dict[str, Any]] = [] for item in results: rows.append( { "date": datetime.fromtimestamp( int(item["t"]) / 1000, tz=timezone.utc, ).date().isoformat(), "open": item.get("o"), "high": item.get("h"), "low": item.get("l"), "close": item.get("c"), "volume": item.get("v"), "vwap": item.get("vw"), "transactions": item.get("n"), } ) return rows def fetch_news( symbol: str, start_date: str, end_date: str, *, per_page: int = 50, page_sleep: float = 1.2, max_pages: Optional[int] = None, ) -> list[dict[str, Any]]: """Fetch all Polygon news for a ticker, with pagination.""" url = f"{BASE}/v2/reference/news" params = { "ticker": symbol, "published_utc.gte": start_date, "published_utc.lte": end_date, "limit": per_page, "order": "asc", } next_url: Optional[str] = None pages = 0 all_articles: list[dict[str, Any]] = [] seen_ids: set[str] = set() while True: response = http_get(next_url or url, params=None if next_url else params) data = response.json() results = data.get("results") or [] if not results: break for item in results: article_id = item.get("id") if article_id and article_id in seen_ids: continue all_articles.append( { "id": article_id, "publisher": (item.get("publisher") or {}).get("name"), "title": item.get("title"), "author": item.get("author"), "published_utc": item.get("published_utc"), "amp_url": item.get("amp_url"), "article_url": item.get("article_url"), "tickers": item.get("tickers"), "description": item.get("description"), "insights": item.get("insights"), } ) if article_id: seen_ids.add(article_id) next_url = data.get("next_url") pages += 1 if max_pages is not None and pages >= max_pages: break if not next_url: break time.sleep(page_sleep) return all_articles