# -*- coding: utf-8 -*- """Shared market symbol normalization helpers.""" from dataclasses import dataclass @dataclass(frozen=True) class MarketSymbol: """Normalized symbol metadata.""" raw: str canonical: str market: str def canonical_symbol(symbol: str) -> str: """Return canonical uppercase symbol for storage and routing.""" return (symbol or "").strip().upper() def normalize_symbol(symbol: str) -> str: """ Normalize symbols across US and exchange-prefixed formats. Examples: - sh600519 -> 600519 - 600519.SH -> 600519 - aapl -> AAPL - hk00700 -> HK00700 """ canonical = canonical_symbol(symbol) if canonical.startswith(("SH", "SZ", "BJ")) and len(canonical) > 2: candidate = canonical[2:] if candidate.isdigit() and len(candidate) in (5, 6): return candidate if "." in canonical: base, suffix = canonical.rsplit(".", 1) if suffix in {"SH", "SZ", "SS", "BJ"} and base.isdigit(): return base return canonical def detect_market(symbol: str) -> str: """Infer market tag from normalized symbol.""" normalized = normalize_symbol(symbol) if normalized.startswith("HK") or ( normalized.isdigit() and len(normalized) == 5 ): return "hk" if normalized.isalpha() or ( "/" not in normalized and not normalized.isdigit() ): return "us" return "cn" def describe_symbol(symbol: str) -> MarketSymbol: """Return normalized symbol metadata.""" normalized = normalize_symbol(symbol) return MarketSymbol( raw=symbol, canonical=normalized, market=detect_market(normalized), )