68 lines
1.7 KiB
Python
68 lines
1.7 KiB
Python
# -*- coding: utf-8 -*-
|
|
"""Shared market symbol normalization helpers."""
|
|
|
|
from dataclasses import dataclass
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class MarketSymbol:
|
|
"""Normalized symbol metadata."""
|
|
|
|
raw: str
|
|
canonical: str
|
|
market: str
|
|
|
|
|
|
def canonical_symbol(symbol: str) -> str:
|
|
"""Return canonical uppercase symbol for storage and routing."""
|
|
return (symbol or "").strip().upper()
|
|
|
|
|
|
def normalize_symbol(symbol: str) -> str:
|
|
"""
|
|
Normalize symbols across US and exchange-prefixed formats.
|
|
|
|
Examples:
|
|
- sh600519 -> 600519
|
|
- 600519.SH -> 600519
|
|
- aapl -> AAPL
|
|
- hk00700 -> HK00700
|
|
"""
|
|
canonical = canonical_symbol(symbol)
|
|
|
|
if canonical.startswith(("SH", "SZ", "BJ")) and len(canonical) > 2:
|
|
candidate = canonical[2:]
|
|
if candidate.isdigit() and len(candidate) in (5, 6):
|
|
return candidate
|
|
|
|
if "." in canonical:
|
|
base, suffix = canonical.rsplit(".", 1)
|
|
if suffix in {"SH", "SZ", "SS", "BJ"} and base.isdigit():
|
|
return base
|
|
|
|
return canonical
|
|
|
|
|
|
def detect_market(symbol: str) -> str:
|
|
"""Infer market tag from normalized symbol."""
|
|
normalized = normalize_symbol(symbol)
|
|
if normalized.startswith("HK") or (
|
|
normalized.isdigit() and len(normalized) == 5
|
|
):
|
|
return "hk"
|
|
if normalized.isalpha() or (
|
|
"/" not in normalized and not normalized.isdigit()
|
|
):
|
|
return "us"
|
|
return "cn"
|
|
|
|
|
|
def describe_symbol(symbol: str) -> MarketSymbol:
|
|
"""Return normalized symbol metadata."""
|
|
normalized = normalize_symbol(symbol)
|
|
return MarketSymbol(
|
|
raw=symbol,
|
|
canonical=normalized,
|
|
market=detect_market(normalized),
|
|
)
|