2026-05-20 23:49:56 -05:00
|
|
|
|
"""Live provider usage — fetch token limits and reset times from provider APIs.
|
2026-05-20 22:03:57 -05:00
|
|
|
|
|
2026-05-20 23:49:56 -05:00
|
|
|
|
Some providers expose rate-limit headers on regular API responses. Pipeline first
|
|
|
|
|
|
calls a lightweight endpoint (model list) and then, when needed, performs a
|
|
|
|
|
|
minimal generation probe to surface usage and latency details.
|
2026-05-20 22:03:57 -05:00
|
|
|
|
|
|
|
|
|
|
No guessing, no JSONL scanning, no estimates. If the provider API is
|
|
|
|
|
|
unreachable or the key is invalid, an error is returned and all limit fields
|
|
|
|
|
|
are None.
|
|
|
|
|
|
|
|
|
|
|
|
Supported providers
|
|
|
|
|
|
-------------------
|
|
|
|
|
|
anthropic → GET https://api.anthropic.com/v1/models
|
|
|
|
|
|
Headers: anthropic-ratelimit-tokens-limit/remaining/reset
|
|
|
|
|
|
anthropic-ratelimit-requests-limit/remaining/reset
|
|
|
|
|
|
anthropic-ratelimit-input-tokens-limit/remaining/reset
|
2026-05-20 23:22:54 -05:00
|
|
|
|
Fallback probe (only when headers missing):
|
|
|
|
|
|
POST /v1/messages with max_tokens=1 to surface usage+time data.
|
2026-05-20 22:03:57 -05:00
|
|
|
|
|
|
|
|
|
|
openai → GET https://api.openai.com/v1/models
|
|
|
|
|
|
(codex) Headers: x-ratelimit-limit-tokens, x-ratelimit-remaining-tokens,
|
|
|
|
|
|
x-ratelimit-reset-tokens, x-ratelimit-limit-requests,
|
|
|
|
|
|
x-ratelimit-remaining-requests, x-ratelimit-reset-requests
|
2026-05-20 23:22:54 -05:00
|
|
|
|
Fallback probe (only when headers missing):
|
2026-05-20 23:49:56 -05:00
|
|
|
|
POST /v1/responses with max_output_tokens=1 (preferred),
|
|
|
|
|
|
then /v1/chat/completions with max_tokens=1 (compatibility).
|
2026-05-20 22:03:57 -05:00
|
|
|
|
|
|
|
|
|
|
ollama → GET {base_url}/api/tags (health-check only; no rate limits)
|
|
|
|
|
|
Returns: model list, server reachable flag
|
2026-05-20 23:22:54 -05:00
|
|
|
|
Fallback probe:
|
|
|
|
|
|
POST {base_url}/api/generate with num_predict=1 for usage+time.
|
2026-05-20 22:03:57 -05:00
|
|
|
|
|
|
|
|
|
|
Caching
|
|
|
|
|
|
-------
|
2026-05-24 19:05:30 -05:00
|
|
|
|
Results are cached by the effective provider credential source for
|
|
|
|
|
|
CACHE_TTL_SECONDS (default 60s) to avoid hammering provider APIs on every page
|
|
|
|
|
|
load. Multiple Pipeline credential rows that point at the same local Claude or
|
|
|
|
|
|
Codex login share one provider fetch.
|
2026-05-20 22:03:57 -05:00
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
2026-05-24 17:37:08 -05:00
|
|
|
|
import asyncio
|
2026-05-24 19:05:30 -05:00
|
|
|
|
import hashlib
|
2026-05-21 19:42:46 -05:00
|
|
|
|
import json as _json_module
|
|
|
|
|
|
import os
|
2026-05-20 22:03:57 -05:00
|
|
|
|
import re
|
2026-05-21 19:42:46 -05:00
|
|
|
|
import time as _time_module
|
2026-05-20 22:03:57 -05:00
|
|
|
|
from dataclasses import dataclass, field
|
|
|
|
|
|
from datetime import datetime, timedelta, timezone
|
|
|
|
|
|
from typing import Any
|
|
|
|
|
|
|
|
|
|
|
|
import httpx
|
|
|
|
|
|
|
|
|
|
|
|
from app.core.logging import get_logger
|
|
|
|
|
|
from app.core.time import utcnow
|
|
|
|
|
|
|
|
|
|
|
|
logger = get_logger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
CACHE_TTL_SECONDS = 60
|
2026-05-24 17:51:48 -05:00
|
|
|
|
# Anthropic's subscription endpoint rate-limit window is ~30-60s. Retrying
|
|
|
|
|
|
# sooner than that (old 5s value) creates a tight retry loop that keeps the
|
|
|
|
|
|
# rate limit permanently throttled. Use the same 60s TTL as successes so the
|
|
|
|
|
|
# next auto-poll only fires after the window has cleared. Manual refreshes
|
|
|
|
|
|
# (force_refresh=True) always bypass this TTL.
|
|
|
|
|
|
CACHE_TTL_FAILURE_SECONDS = 60
|
2026-05-20 22:03:57 -05:00
|
|
|
|
REQUEST_TIMEOUT = 8.0 # seconds
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
# Result types
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
|
|
|
class TokenWindow:
|
|
|
|
|
|
limit: int | None = None
|
|
|
|
|
|
remaining: int | None = None
|
|
|
|
|
|
reset_at: datetime | None = None # UTC naive datetime
|
|
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
|
def reset_in_ms(self) -> int | None:
|
|
|
|
|
|
if self.reset_at is None:
|
|
|
|
|
|
return None
|
|
|
|
|
|
delta = (self.reset_at - utcnow()).total_seconds()
|
|
|
|
|
|
return max(0, int(delta * 1000))
|
|
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
|
def used(self) -> int | None:
|
|
|
|
|
|
if self.limit is not None and self.remaining is not None:
|
|
|
|
|
|
return max(0, self.limit - self.remaining)
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
|
def pct_used(self) -> float | None:
|
|
|
|
|
|
if self.limit and self.limit > 0 and self.remaining is not None:
|
|
|
|
|
|
return round((1 - self.remaining / self.limit) * 100, 1)
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
|
|
|
class RequestWindow:
|
|
|
|
|
|
limit: int | None = None
|
|
|
|
|
|
remaining: int | None = None
|
|
|
|
|
|
reset_at: datetime | None = None
|
|
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
|
def reset_in_ms(self) -> int | None:
|
|
|
|
|
|
if self.reset_at is None:
|
|
|
|
|
|
return None
|
|
|
|
|
|
delta = (self.reset_at - utcnow()).total_seconds()
|
|
|
|
|
|
return max(0, int(delta * 1000))
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-05-21 03:29:35 -05:00
|
|
|
|
@dataclass
|
|
|
|
|
|
class SubscriptionWindow:
|
|
|
|
|
|
"""One subscription-plan usage window (e.g. 5h session, 7-day all-models)."""
|
|
|
|
|
|
|
|
|
|
|
|
key: str # "five_hour" | "seven_day" | "seven_day_sonnet" | "seven_day_opus"
|
|
|
|
|
|
label: str # human label: "Current session" | "All models" | "Sonnet" | "Opus"
|
|
|
|
|
|
pct_used: float # 0–100
|
|
|
|
|
|
reset_at: datetime | None = None # UTC naive datetime
|
|
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
|
def reset_in_ms(self) -> int | None:
|
|
|
|
|
|
if self.reset_at is None:
|
|
|
|
|
|
return None
|
|
|
|
|
|
delta = (self.reset_at - utcnow()).total_seconds()
|
|
|
|
|
|
return max(0, int(delta * 1000))
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-05-20 22:03:57 -05:00
|
|
|
|
@dataclass
|
|
|
|
|
|
class ProviderUsageLive:
|
|
|
|
|
|
provider: str
|
|
|
|
|
|
account_key: str
|
|
|
|
|
|
checked_at: datetime
|
|
|
|
|
|
reachable: bool
|
2026-05-21 01:01:05 -05:00
|
|
|
|
# Phase 1 semantics: this service reports provider API diagnostics
|
|
|
|
|
|
# (rate-limit windows / probe metadata), not subscription usage windows.
|
|
|
|
|
|
source: str = "provider_api_rate_limit"
|
|
|
|
|
|
confidence: str = "high"
|
2026-05-20 22:03:57 -05:00
|
|
|
|
error: str | None = None
|
|
|
|
|
|
tokens: TokenWindow = field(default_factory=TokenWindow)
|
2026-05-21 02:25:50 -05:00
|
|
|
|
input_tokens: TokenWindow = field(default_factory=TokenWindow) # Anthropic input-only window
|
|
|
|
|
|
output_tokens: TokenWindow = field(default_factory=TokenWindow) # Anthropic output-only window
|
2026-05-20 22:03:57 -05:00
|
|
|
|
requests: RequestWindow = field(default_factory=RequestWindow)
|
2026-05-21 03:29:35 -05:00
|
|
|
|
# Provider subscription windows — populated when session_key is provided
|
|
|
|
|
|
subscription_windows: list[SubscriptionWindow] = field(default_factory=list)
|
|
|
|
|
|
subscription_plan: str | None = None # e.g. "pro", "plus ($15.00)"
|
2026-05-20 22:03:57 -05:00
|
|
|
|
models: list[str] = field(default_factory=list) # model IDs available on this key
|
|
|
|
|
|
raw_headers: dict[str, str] = field(default_factory=dict)
|
2026-05-20 23:22:54 -05:00
|
|
|
|
sample_model: str | None = None
|
|
|
|
|
|
sample_input_tokens: int | None = None
|
|
|
|
|
|
sample_output_tokens: int | None = None
|
|
|
|
|
|
sample_latency_ms: int | None = None
|
2026-05-20 22:03:57 -05:00
|
|
|
|
|
|
|
|
|
|
def to_dict(self) -> dict[str, Any]:
|
|
|
|
|
|
def _window(w: TokenWindow | RequestWindow) -> dict[str, Any]:
|
|
|
|
|
|
d: dict[str, Any] = {}
|
2026-05-20 23:22:54 -05:00
|
|
|
|
if hasattr(w, "limit"):
|
|
|
|
|
|
d["limit"] = w.limit
|
|
|
|
|
|
if hasattr(w, "remaining"):
|
|
|
|
|
|
d["remaining"] = w.remaining
|
|
|
|
|
|
if hasattr(w, "reset_in_ms"):
|
|
|
|
|
|
d["reset_in_ms"] = w.reset_in_ms
|
|
|
|
|
|
if hasattr(w, "reset_at"):
|
|
|
|
|
|
d["reset_at"] = w.reset_at.isoformat() if w.reset_at else None
|
2026-05-20 22:03:57 -05:00
|
|
|
|
if isinstance(w, TokenWindow):
|
2026-05-20 23:22:54 -05:00
|
|
|
|
d["used"] = w.used
|
2026-05-20 22:03:57 -05:00
|
|
|
|
d["pct_used"] = w.pct_used
|
|
|
|
|
|
return d
|
|
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
|
"provider": self.provider,
|
|
|
|
|
|
"account_key": self.account_key,
|
|
|
|
|
|
"checked_at": self.checked_at.isoformat(),
|
|
|
|
|
|
"reachable": self.reachable,
|
2026-05-21 01:01:05 -05:00
|
|
|
|
"source": self.source,
|
|
|
|
|
|
"confidence": self.confidence,
|
2026-05-20 22:03:57 -05:00
|
|
|
|
"error": self.error,
|
2026-05-21 02:25:50 -05:00
|
|
|
|
"tokens": _window(self.tokens),
|
|
|
|
|
|
"input_tokens": _window(self.input_tokens),
|
|
|
|
|
|
"output_tokens": _window(self.output_tokens),
|
|
|
|
|
|
"requests": _window(self.requests),
|
2026-05-20 22:03:57 -05:00
|
|
|
|
"models": self.models[:20], # cap for response size
|
2026-05-20 23:22:54 -05:00
|
|
|
|
"sample_model": self.sample_model,
|
|
|
|
|
|
"sample_input_tokens": self.sample_input_tokens,
|
|
|
|
|
|
"sample_output_tokens": self.sample_output_tokens,
|
|
|
|
|
|
"sample_latency_ms": self.sample_latency_ms,
|
2026-05-20 22:03:57 -05:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
# Header parsers
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
def _parse_int_header(headers: dict[str, str], *names: str) -> int | None:
|
|
|
|
|
|
for name in names:
|
|
|
|
|
|
val = headers.get(name.lower())
|
|
|
|
|
|
if val is not None:
|
|
|
|
|
|
try:
|
|
|
|
|
|
return int(val)
|
|
|
|
|
|
except (ValueError, TypeError):
|
|
|
|
|
|
pass
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _parse_iso_reset(value: str) -> datetime | None:
|
|
|
|
|
|
"""Parse an ISO 8601 reset timestamp → UTC naive datetime."""
|
|
|
|
|
|
if not value:
|
|
|
|
|
|
return None
|
|
|
|
|
|
try:
|
|
|
|
|
|
normalized = value.strip().replace("Z", "+00:00")
|
|
|
|
|
|
dt = datetime.fromisoformat(normalized)
|
|
|
|
|
|
if dt.tzinfo is not None:
|
|
|
|
|
|
dt = dt.astimezone(timezone.utc).replace(tzinfo=None)
|
|
|
|
|
|
return dt
|
|
|
|
|
|
except ValueError:
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# OpenAI encodes reset as a duration string like "1m30s", "2h", "30s"
|
|
|
|
|
|
_OAI_DURATION_RE = re.compile(
|
|
|
|
|
|
r"(?:(\d+)h)?(?:(\d+)m)?(?:(\d+(?:\.\d+)?)s)?$"
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-05-20 23:22:54 -05:00
|
|
|
|
def _apply_anthropic_ratelimit_headers(result: ProviderUsageLive, headers: dict[str, str]) -> None:
|
|
|
|
|
|
"""Populate Anthropic limit windows from response headers."""
|
|
|
|
|
|
result.tokens = TokenWindow(
|
|
|
|
|
|
limit=_parse_int_header(headers, "anthropic-ratelimit-tokens-limit"),
|
|
|
|
|
|
remaining=_parse_int_header(headers, "anthropic-ratelimit-tokens-remaining"),
|
|
|
|
|
|
reset_at=_parse_iso_reset(headers.get("anthropic-ratelimit-tokens-reset", "")),
|
|
|
|
|
|
)
|
|
|
|
|
|
result.input_tokens = TokenWindow(
|
|
|
|
|
|
limit=_parse_int_header(headers, "anthropic-ratelimit-input-tokens-limit"),
|
|
|
|
|
|
remaining=_parse_int_header(headers, "anthropic-ratelimit-input-tokens-remaining"),
|
|
|
|
|
|
reset_at=_parse_iso_reset(headers.get("anthropic-ratelimit-input-tokens-reset", "")),
|
|
|
|
|
|
)
|
2026-05-21 02:25:50 -05:00
|
|
|
|
result.output_tokens = TokenWindow(
|
|
|
|
|
|
limit=_parse_int_header(headers, "anthropic-ratelimit-output-tokens-limit"),
|
|
|
|
|
|
remaining=_parse_int_header(headers, "anthropic-ratelimit-output-tokens-remaining"),
|
|
|
|
|
|
reset_at=_parse_iso_reset(headers.get("anthropic-ratelimit-output-tokens-reset", "")),
|
|
|
|
|
|
)
|
2026-05-20 23:22:54 -05:00
|
|
|
|
result.requests = RequestWindow(
|
|
|
|
|
|
limit=_parse_int_header(headers, "anthropic-ratelimit-requests-limit"),
|
|
|
|
|
|
remaining=_parse_int_header(headers, "anthropic-ratelimit-requests-remaining"),
|
|
|
|
|
|
reset_at=_parse_iso_reset(headers.get("anthropic-ratelimit-requests-reset", "")),
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _pick_anthropic_probe_model(models: list[str]) -> str | None:
|
|
|
|
|
|
if not models:
|
|
|
|
|
|
return None
|
|
|
|
|
|
priorities = ("haiku", "sonnet", "opus")
|
|
|
|
|
|
lowered = [(m, m.lower()) for m in models]
|
|
|
|
|
|
for priority in priorities:
|
|
|
|
|
|
for original, lowered_name in lowered:
|
|
|
|
|
|
if priority in lowered_name:
|
|
|
|
|
|
return original
|
|
|
|
|
|
return models[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _pick_openai_probe_model(models: list[str]) -> str | None:
|
|
|
|
|
|
if not models:
|
|
|
|
|
|
return None
|
2026-05-20 23:49:56 -05:00
|
|
|
|
priorities = (
|
|
|
|
|
|
"gpt-5.5",
|
|
|
|
|
|
"gpt-5.4",
|
|
|
|
|
|
"gpt-5.3-codex",
|
|
|
|
|
|
"gpt-5.2-codex",
|
|
|
|
|
|
"gpt-5.1-codex",
|
|
|
|
|
|
"gpt-5-codex",
|
|
|
|
|
|
"codex",
|
|
|
|
|
|
"gpt-4.1-mini",
|
|
|
|
|
|
"gpt-4o-mini",
|
|
|
|
|
|
"gpt-4.1",
|
|
|
|
|
|
"gpt-4o",
|
|
|
|
|
|
"o4-mini",
|
|
|
|
|
|
)
|
2026-05-20 23:22:54 -05:00
|
|
|
|
lowered = [(m, m.lower()) for m in models]
|
|
|
|
|
|
for priority in priorities:
|
|
|
|
|
|
for original, lowered_name in lowered:
|
|
|
|
|
|
if priority in lowered_name:
|
|
|
|
|
|
return original
|
|
|
|
|
|
return models[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-05-20 23:49:56 -05:00
|
|
|
|
def _normalize_base(base_url: str | None, default_base: str, *, strip_suffixes: tuple[str, ...]) -> str:
|
|
|
|
|
|
base = (base_url or default_base).strip().rstrip("/")
|
|
|
|
|
|
lowered = base.lower()
|
|
|
|
|
|
for suffix in strip_suffixes:
|
|
|
|
|
|
if lowered.endswith(suffix):
|
|
|
|
|
|
base = base[: -len(suffix)]
|
|
|
|
|
|
break
|
|
|
|
|
|
return base.rstrip("/")
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-05-20 22:03:57 -05:00
|
|
|
|
def _parse_openai_reset(value: str) -> datetime | None:
|
|
|
|
|
|
"""Parse an OpenAI reset header: ISO datetime OR duration like '1m30s'."""
|
|
|
|
|
|
if not value:
|
|
|
|
|
|
return None
|
|
|
|
|
|
# ISO format first
|
|
|
|
|
|
if "T" in value or value.endswith("Z"):
|
|
|
|
|
|
return _parse_iso_reset(value)
|
|
|
|
|
|
# Duration
|
|
|
|
|
|
m = _OAI_DURATION_RE.match(value.strip())
|
|
|
|
|
|
if m and any(m.groups()):
|
|
|
|
|
|
h = float(m.group(1) or 0)
|
|
|
|
|
|
mn = float(m.group(2) or 0)
|
|
|
|
|
|
s = float(m.group(3) or 0)
|
|
|
|
|
|
total_seconds = h * 3600 + mn * 60 + s
|
|
|
|
|
|
return utcnow() + timedelta(seconds=total_seconds)
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-05-20 23:49:56 -05:00
|
|
|
|
def _apply_openai_ratelimit_headers(result: ProviderUsageLive, headers: dict[str, str]) -> None:
|
|
|
|
|
|
result.tokens = TokenWindow(
|
|
|
|
|
|
limit=_parse_int_header(headers, "x-ratelimit-limit-tokens"),
|
|
|
|
|
|
remaining=_parse_int_header(headers, "x-ratelimit-remaining-tokens"),
|
|
|
|
|
|
reset_at=_parse_openai_reset(headers.get("x-ratelimit-reset-tokens", "")),
|
|
|
|
|
|
)
|
|
|
|
|
|
result.requests = RequestWindow(
|
|
|
|
|
|
limit=_parse_int_header(headers, "x-ratelimit-limit-requests"),
|
|
|
|
|
|
remaining=_parse_int_header(headers, "x-ratelimit-remaining-requests"),
|
|
|
|
|
|
reset_at=_parse_openai_reset(headers.get("x-ratelimit-reset-requests", "")),
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _extract_openai_usage(payload: Any) -> tuple[int | None, int | None]:
|
|
|
|
|
|
if not isinstance(payload, dict):
|
|
|
|
|
|
return (None, None)
|
|
|
|
|
|
usage = payload.get("usage")
|
|
|
|
|
|
if not isinstance(usage, dict):
|
|
|
|
|
|
return (None, None)
|
|
|
|
|
|
|
|
|
|
|
|
# Responses API style
|
|
|
|
|
|
in_tok = usage.get("input_tokens")
|
|
|
|
|
|
out_tok = usage.get("output_tokens")
|
|
|
|
|
|
if isinstance(in_tok, int) or isinstance(out_tok, int):
|
|
|
|
|
|
return (
|
|
|
|
|
|
in_tok if isinstance(in_tok, int) else None,
|
|
|
|
|
|
out_tok if isinstance(out_tok, int) else None,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
# Chat Completions style
|
|
|
|
|
|
in_tok = usage.get("prompt_tokens")
|
|
|
|
|
|
out_tok = usage.get("completion_tokens")
|
|
|
|
|
|
return (
|
|
|
|
|
|
in_tok if isinstance(in_tok, int) else None,
|
|
|
|
|
|
out_tok if isinstance(out_tok, int) else None,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-05-20 22:03:57 -05:00
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
# Provider-specific fetch functions
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
async def _fetch_anthropic(api_key: str, base_url: str | None) -> ProviderUsageLive:
|
2026-05-20 23:49:56 -05:00
|
|
|
|
base = _normalize_base(
|
|
|
|
|
|
base_url,
|
|
|
|
|
|
"https://api.anthropic.com",
|
|
|
|
|
|
strip_suffixes=("/v1",),
|
|
|
|
|
|
)
|
2026-05-20 22:03:57 -05:00
|
|
|
|
now = utcnow()
|
|
|
|
|
|
result = ProviderUsageLive(provider="anthropic", account_key="", checked_at=now, reachable=False)
|
|
|
|
|
|
|
|
|
|
|
|
async with httpx.AsyncClient(timeout=REQUEST_TIMEOUT) as client:
|
|
|
|
|
|
try:
|
|
|
|
|
|
resp = await client.get(
|
|
|
|
|
|
f"{base}/v1/models",
|
|
|
|
|
|
headers={
|
|
|
|
|
|
"x-api-key": api_key,
|
|
|
|
|
|
"anthropic-version": "2023-06-01",
|
|
|
|
|
|
},
|
|
|
|
|
|
)
|
|
|
|
|
|
except (httpx.ConnectError, httpx.TimeoutException) as exc:
|
|
|
|
|
|
result.error = f"Connection failed: {exc}"
|
|
|
|
|
|
return result
|
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
|
result.error = str(exc)
|
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
|
if resp.status_code == 401:
|
|
|
|
|
|
result.error = "Invalid API key (401)."
|
|
|
|
|
|
return result
|
|
|
|
|
|
if resp.status_code not in (200, 429):
|
|
|
|
|
|
result.error = f"Provider returned HTTP {resp.status_code}."
|
|
|
|
|
|
# Still try to parse headers on 429 (rate limited but data is there)
|
|
|
|
|
|
if resp.status_code != 429:
|
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
|
h = {k.lower(): v for k, v in resp.headers.items()}
|
|
|
|
|
|
result.reachable = True
|
|
|
|
|
|
result.raw_headers = {k: v for k, v in h.items() if "ratelimit" in k}
|
|
|
|
|
|
|
2026-05-20 23:22:54 -05:00
|
|
|
|
_apply_anthropic_ratelimit_headers(result, h)
|
2026-05-20 22:03:57 -05:00
|
|
|
|
|
|
|
|
|
|
# Extract model IDs
|
|
|
|
|
|
try:
|
|
|
|
|
|
data = resp.json()
|
|
|
|
|
|
items = data.get("data") or data if isinstance(data.get("data"), list) else []
|
|
|
|
|
|
result.models = [m.get("id", "") for m in items if isinstance(m, dict) and m.get("id")]
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
2026-05-20 23:22:54 -05:00
|
|
|
|
# Some tiers/paths may omit ratelimit headers on /v1/models.
|
|
|
|
|
|
# Fallback to a minimal /v1/messages probe so we can still surface usage/time.
|
|
|
|
|
|
if (
|
|
|
|
|
|
result.tokens.limit is None
|
|
|
|
|
|
and result.input_tokens.limit is None
|
|
|
|
|
|
and result.requests.limit is None
|
|
|
|
|
|
):
|
|
|
|
|
|
probe_model = _pick_anthropic_probe_model(result.models)
|
|
|
|
|
|
if probe_model:
|
|
|
|
|
|
result.sample_model = probe_model
|
|
|
|
|
|
async with httpx.AsyncClient(timeout=REQUEST_TIMEOUT) as client:
|
|
|
|
|
|
try:
|
|
|
|
|
|
probe_resp = await client.post(
|
|
|
|
|
|
f"{base}/v1/messages",
|
|
|
|
|
|
headers={
|
|
|
|
|
|
"x-api-key": api_key,
|
|
|
|
|
|
"anthropic-version": "2023-06-01",
|
|
|
|
|
|
"content-type": "application/json",
|
|
|
|
|
|
},
|
|
|
|
|
|
json={
|
|
|
|
|
|
"model": probe_model,
|
|
|
|
|
|
"max_tokens": 1,
|
|
|
|
|
|
"messages": [{"role": "user", "content": "Usage probe"}],
|
|
|
|
|
|
},
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
probe_resp = None
|
|
|
|
|
|
|
|
|
|
|
|
if probe_resp is not None:
|
|
|
|
|
|
probe_headers = {k.lower(): v for k, v in probe_resp.headers.items()}
|
|
|
|
|
|
probe_rl_headers = {k: v for k, v in probe_headers.items() if "ratelimit" in k}
|
|
|
|
|
|
if probe_rl_headers:
|
|
|
|
|
|
result.raw_headers = probe_rl_headers
|
|
|
|
|
|
_apply_anthropic_ratelimit_headers(result, probe_headers)
|
|
|
|
|
|
if probe_resp.status_code == 200:
|
|
|
|
|
|
try:
|
|
|
|
|
|
payload = probe_resp.json()
|
|
|
|
|
|
usage = payload.get("usage") if isinstance(payload, dict) else None
|
|
|
|
|
|
if isinstance(usage, dict):
|
|
|
|
|
|
in_tok = usage.get("input_tokens")
|
|
|
|
|
|
out_tok = usage.get("output_tokens")
|
|
|
|
|
|
if isinstance(in_tok, int):
|
|
|
|
|
|
result.sample_input_tokens = in_tok
|
|
|
|
|
|
if isinstance(out_tok, int):
|
|
|
|
|
|
result.sample_output_tokens = out_tok
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
elapsed_ms = probe_resp.elapsed.total_seconds() * 1000.0
|
|
|
|
|
|
result.sample_latency_ms = int(max(0.0, round(elapsed_ms)))
|
|
|
|
|
|
|
2026-05-20 22:03:57 -05:00
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def _fetch_openai(api_key: str, base_url: str | None) -> ProviderUsageLive:
|
2026-05-20 23:49:56 -05:00
|
|
|
|
base = _normalize_base(
|
|
|
|
|
|
base_url,
|
|
|
|
|
|
"https://api.openai.com",
|
|
|
|
|
|
strip_suffixes=("/v1",),
|
|
|
|
|
|
)
|
2026-05-20 22:03:57 -05:00
|
|
|
|
now = utcnow()
|
|
|
|
|
|
result = ProviderUsageLive(provider="openai", account_key="", checked_at=now, reachable=False)
|
|
|
|
|
|
|
|
|
|
|
|
async with httpx.AsyncClient(timeout=REQUEST_TIMEOUT) as client:
|
|
|
|
|
|
try:
|
|
|
|
|
|
resp = await client.get(
|
|
|
|
|
|
f"{base}/v1/models",
|
|
|
|
|
|
headers={"Authorization": f"Bearer {api_key}"},
|
|
|
|
|
|
)
|
|
|
|
|
|
except (httpx.ConnectError, httpx.TimeoutException) as exc:
|
|
|
|
|
|
result.error = f"Connection failed: {exc}"
|
|
|
|
|
|
return result
|
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
|
result.error = str(exc)
|
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
|
if resp.status_code == 401:
|
|
|
|
|
|
result.error = "Invalid API key (401)."
|
|
|
|
|
|
return result
|
|
|
|
|
|
if resp.status_code not in (200, 429):
|
|
|
|
|
|
result.error = f"Provider returned HTTP {resp.status_code}."
|
|
|
|
|
|
if resp.status_code != 429:
|
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
|
h = {k.lower(): v for k, v in resp.headers.items()}
|
|
|
|
|
|
result.reachable = True
|
|
|
|
|
|
result.raw_headers = {k: v for k, v in h.items() if "ratelimit" in k}
|
|
|
|
|
|
|
2026-05-20 23:49:56 -05:00
|
|
|
|
_apply_openai_ratelimit_headers(result, h)
|
2026-05-20 22:03:57 -05:00
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
data = resp.json()
|
|
|
|
|
|
items = data.get("data") or []
|
|
|
|
|
|
result.models = [m.get("id", "") for m in items if isinstance(m, dict) and m.get("id")]
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
2026-05-20 23:22:54 -05:00
|
|
|
|
if result.tokens.limit is None and result.requests.limit is None:
|
|
|
|
|
|
probe_model = _pick_openai_probe_model(result.models)
|
|
|
|
|
|
if probe_model:
|
|
|
|
|
|
result.sample_model = probe_model
|
2026-05-21 02:25:50 -05:00
|
|
|
|
# min 16 tokens: gpt-5.x models reject max_output_tokens < 16
|
2026-05-20 23:49:56 -05:00
|
|
|
|
probe_endpoints: list[tuple[str, dict[str, Any]]] = [
|
|
|
|
|
|
(
|
|
|
|
|
|
f"{base}/v1/responses",
|
|
|
|
|
|
{
|
|
|
|
|
|
"model": probe_model,
|
|
|
|
|
|
"input": "Usage probe",
|
2026-05-21 02:25:50 -05:00
|
|
|
|
"max_output_tokens": 16,
|
2026-05-20 23:49:56 -05:00
|
|
|
|
},
|
|
|
|
|
|
),
|
|
|
|
|
|
(
|
|
|
|
|
|
f"{base}/v1/chat/completions",
|
|
|
|
|
|
{
|
|
|
|
|
|
"model": probe_model,
|
|
|
|
|
|
"messages": [{"role": "user", "content": "Usage probe"}],
|
2026-05-21 02:25:50 -05:00
|
|
|
|
"max_tokens": 16,
|
2026-05-20 23:49:56 -05:00
|
|
|
|
},
|
|
|
|
|
|
),
|
|
|
|
|
|
]
|
|
|
|
|
|
for endpoint, body in probe_endpoints:
|
|
|
|
|
|
async with httpx.AsyncClient(timeout=REQUEST_TIMEOUT) as client:
|
|
|
|
|
|
try:
|
|
|
|
|
|
probe_resp = await client.post(
|
|
|
|
|
|
endpoint,
|
|
|
|
|
|
headers={
|
|
|
|
|
|
"Authorization": f"Bearer {api_key}",
|
|
|
|
|
|
"content-type": "application/json",
|
|
|
|
|
|
},
|
|
|
|
|
|
json=body,
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
continue
|
2026-05-21 02:25:50 -05:00
|
|
|
|
# Quota exhaustion is a 429 with a distinct error code — surface it
|
|
|
|
|
|
# clearly rather than treating it as a transient rate limit.
|
|
|
|
|
|
if probe_resp.status_code == 429:
|
|
|
|
|
|
try:
|
|
|
|
|
|
err = probe_resp.json().get("error", {})
|
|
|
|
|
|
if err.get("code") == "insufficient_quota" or "exceeded your current quota" in str(err.get("message", "")):
|
|
|
|
|
|
result.error = "Quota exhausted — add credits at platform.openai.com/billing."
|
|
|
|
|
|
elapsed_ms = probe_resp.elapsed.total_seconds() * 1000.0
|
|
|
|
|
|
result.sample_latency_ms = int(max(0.0, round(elapsed_ms)))
|
|
|
|
|
|
break
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
2026-05-20 23:22:54 -05:00
|
|
|
|
probe_headers = {k.lower(): v for k, v in probe_resp.headers.items()}
|
|
|
|
|
|
probe_rl_headers = {k: v for k, v in probe_headers.items() if "ratelimit" in k}
|
|
|
|
|
|
if probe_rl_headers:
|
|
|
|
|
|
result.raw_headers = probe_rl_headers
|
2026-05-20 23:49:56 -05:00
|
|
|
|
_apply_openai_ratelimit_headers(result, probe_headers)
|
2026-05-20 23:22:54 -05:00
|
|
|
|
if probe_resp.status_code == 200:
|
|
|
|
|
|
try:
|
|
|
|
|
|
payload = probe_resp.json()
|
2026-05-20 23:49:56 -05:00
|
|
|
|
in_tok, out_tok = _extract_openai_usage(payload)
|
|
|
|
|
|
if in_tok is not None:
|
|
|
|
|
|
result.sample_input_tokens = in_tok
|
|
|
|
|
|
if out_tok is not None:
|
|
|
|
|
|
result.sample_output_tokens = out_tok
|
2026-05-20 23:22:54 -05:00
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
elapsed_ms = probe_resp.elapsed.total_seconds() * 1000.0
|
|
|
|
|
|
result.sample_latency_ms = int(max(0.0, round(elapsed_ms)))
|
2026-05-20 23:49:56 -05:00
|
|
|
|
if (
|
|
|
|
|
|
result.tokens.limit is not None
|
|
|
|
|
|
or result.requests.limit is not None
|
|
|
|
|
|
or result.sample_input_tokens is not None
|
|
|
|
|
|
or result.sample_output_tokens is not None
|
|
|
|
|
|
):
|
|
|
|
|
|
break
|
2026-05-20 23:22:54 -05:00
|
|
|
|
|
2026-05-20 22:03:57 -05:00
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def _fetch_ollama(base_url: str | None, api_key: str | None) -> ProviderUsageLive:
|
2026-05-20 23:49:56 -05:00
|
|
|
|
base = _normalize_base(
|
|
|
|
|
|
base_url,
|
|
|
|
|
|
"http://localhost:11434",
|
|
|
|
|
|
strip_suffixes=("/api",),
|
|
|
|
|
|
)
|
2026-05-20 22:03:57 -05:00
|
|
|
|
now = utcnow()
|
|
|
|
|
|
result = ProviderUsageLive(provider="ollama", account_key="", checked_at=now, reachable=False)
|
|
|
|
|
|
|
|
|
|
|
|
headers: dict[str, str] = {}
|
|
|
|
|
|
if api_key:
|
|
|
|
|
|
headers["Authorization"] = f"Bearer {api_key}"
|
|
|
|
|
|
|
|
|
|
|
|
async with httpx.AsyncClient(timeout=REQUEST_TIMEOUT) as client:
|
|
|
|
|
|
try:
|
|
|
|
|
|
resp = await client.get(f"{base}/api/tags", headers=headers)
|
|
|
|
|
|
except (httpx.ConnectError, httpx.TimeoutException) as exc:
|
|
|
|
|
|
result.error = f"Ollama unreachable: {exc}"
|
|
|
|
|
|
return result
|
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
|
result.error = str(exc)
|
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
|
if resp.status_code not in (200,):
|
|
|
|
|
|
result.error = f"Ollama returned HTTP {resp.status_code}."
|
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
|
result.reachable = True
|
|
|
|
|
|
# Ollama has no rate limits — just expose available models
|
|
|
|
|
|
try:
|
|
|
|
|
|
data = resp.json()
|
|
|
|
|
|
models_raw = data.get("models") or []
|
|
|
|
|
|
result.models = [m.get("name", "") for m in models_raw if isinstance(m, dict) and m.get("name")]
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
2026-05-20 23:22:54 -05:00
|
|
|
|
if result.models:
|
|
|
|
|
|
result.sample_model = result.models[0]
|
|
|
|
|
|
async with httpx.AsyncClient(timeout=REQUEST_TIMEOUT) as client:
|
|
|
|
|
|
try:
|
|
|
|
|
|
probe_resp = await client.post(
|
|
|
|
|
|
f"{base}/api/generate",
|
|
|
|
|
|
headers={**headers, "content-type": "application/json"},
|
|
|
|
|
|
json={
|
|
|
|
|
|
"model": result.sample_model,
|
|
|
|
|
|
"prompt": "Usage probe",
|
|
|
|
|
|
"stream": False,
|
|
|
|
|
|
"options": {"num_predict": 1},
|
|
|
|
|
|
},
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
probe_resp = None
|
|
|
|
|
|
if probe_resp is not None and probe_resp.status_code == 200:
|
|
|
|
|
|
try:
|
|
|
|
|
|
payload = probe_resp.json()
|
|
|
|
|
|
in_tok = payload.get("prompt_eval_count")
|
|
|
|
|
|
out_tok = payload.get("eval_count")
|
|
|
|
|
|
total_duration_ns = payload.get("total_duration")
|
|
|
|
|
|
if isinstance(in_tok, int):
|
|
|
|
|
|
result.sample_input_tokens = in_tok
|
|
|
|
|
|
if isinstance(out_tok, int):
|
|
|
|
|
|
result.sample_output_tokens = out_tok
|
|
|
|
|
|
if isinstance(total_duration_ns, int):
|
|
|
|
|
|
result.sample_latency_ms = max(0, int(round(total_duration_ns / 1_000_000)))
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
2026-05-20 22:03:57 -05:00
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-05-21 03:29:35 -05:00
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
# Subscription usage fetchers — require session/OAuth tokens, not API keys
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
_ANTHROPIC_SUBSCRIPTION_URL = "https://api.anthropic.com/api/oauth/usage"
|
|
|
|
|
|
_CODEX_SUBSCRIPTION_URL = "https://chatgpt.com/backend-api/wham/usage"
|
|
|
|
|
|
|
2026-05-21 19:42:46 -05:00
|
|
|
|
# Path to the Claude Code OAuth credentials file, mounted read-only from the host.
|
|
|
|
|
|
_CLAUDE_CREDENTIALS_PATH = os.environ.get("CLAUDE_CREDENTIALS_PATH", "")
|
|
|
|
|
|
|
2026-05-21 19:50:19 -05:00
|
|
|
|
# Path to the Codex CLI auth file, mounted read-only from the host.
|
|
|
|
|
|
_CODEX_CREDENTIALS_PATH = os.environ.get("CODEX_CREDENTIALS_PATH", "")
|
|
|
|
|
|
|
2026-05-21 19:42:46 -05:00
|
|
|
|
_claude_oauth_cache: tuple[float, str] | None = None # (expires_at_ms, access_token)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _read_claude_local_oauth_token() -> str | None:
|
|
|
|
|
|
"""Read the Claude Code OAuth access token from the host credentials file.
|
|
|
|
|
|
|
|
|
|
|
|
Returns a valid access token, or None if the file is absent, unreadable,
|
|
|
|
|
|
or the token has expired. The caller should fall back to the manually
|
|
|
|
|
|
configured session_key when this returns None.
|
|
|
|
|
|
"""
|
|
|
|
|
|
global _claude_oauth_cache
|
|
|
|
|
|
|
|
|
|
|
|
path = _CLAUDE_CREDENTIALS_PATH
|
|
|
|
|
|
if not path:
|
|
|
|
|
|
# Fall back to the default XDG location when no explicit path is set.
|
|
|
|
|
|
home = os.path.expanduser("~")
|
|
|
|
|
|
path = os.path.join(home, ".claude", ".credentials.json")
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
with open(path) as fh:
|
|
|
|
|
|
data = _json_module.load(fh)
|
|
|
|
|
|
except (FileNotFoundError, PermissionError, ValueError, OSError):
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
oauth = data.get("claudeAiOauth")
|
|
|
|
|
|
if not isinstance(oauth, dict):
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
access_token = oauth.get("accessToken")
|
|
|
|
|
|
expires_at = oauth.get("expiresAt")
|
|
|
|
|
|
|
|
|
|
|
|
if not isinstance(access_token, str) or not access_token:
|
|
|
|
|
|
return None
|
|
|
|
|
|
if not isinstance(expires_at, (int, float)) or expires_at <= 0:
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
now_ms = _time_module.time() * 1000
|
|
|
|
|
|
if expires_at <= now_ms:
|
|
|
|
|
|
logger.debug("provider_usage.claude_oauth.token_expired expires_at=%s", expires_at)
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
return access_token
|
|
|
|
|
|
|
2026-05-21 19:50:19 -05:00
|
|
|
|
|
|
|
|
|
|
def _read_codex_local_token() -> str | None:
|
|
|
|
|
|
"""Read the Codex CLI JWT access token from the host auth file (~/.codex/auth.json).
|
|
|
|
|
|
|
|
|
|
|
|
The Codex CLI stores a long-lived JWT (typically ~9 days) issued by OpenAI's
|
|
|
|
|
|
OAuth flow. This token works as a Bearer credential for the ChatGPT backend
|
|
|
|
|
|
API, including the wham/usage subscription endpoint.
|
|
|
|
|
|
|
|
|
|
|
|
Returns the access_token string, or None if the file is absent, unreadable,
|
|
|
|
|
|
or the token has expired. The caller should fall back to the manually
|
|
|
|
|
|
configured session_key when this returns None.
|
|
|
|
|
|
"""
|
|
|
|
|
|
path = _CODEX_CREDENTIALS_PATH
|
|
|
|
|
|
if not path:
|
|
|
|
|
|
home = os.path.expanduser("~")
|
|
|
|
|
|
path = os.path.join(home, ".codex", "auth.json")
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
with open(path) as fh:
|
|
|
|
|
|
data = _json_module.load(fh)
|
|
|
|
|
|
except (FileNotFoundError, PermissionError, ValueError, OSError):
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
tokens = data.get("tokens")
|
|
|
|
|
|
if not isinstance(tokens, dict):
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
access_token = tokens.get("access_token")
|
|
|
|
|
|
if not isinstance(access_token, str) or not access_token:
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
# Decode the JWT payload (no signature verification needed here — we just
|
|
|
|
|
|
# want to avoid using a token we know is expired before making a network call).
|
|
|
|
|
|
parts = access_token.split(".")
|
|
|
|
|
|
if len(parts) >= 2:
|
|
|
|
|
|
import base64 as _base64
|
|
|
|
|
|
try:
|
|
|
|
|
|
pad = parts[1] + "=" * (4 - len(parts[1]) % 4)
|
|
|
|
|
|
payload = _json_module.loads(_base64.urlsafe_b64decode(pad))
|
|
|
|
|
|
exp = payload.get("exp")
|
|
|
|
|
|
if isinstance(exp, (int, float)) and exp <= _time_module.time():
|
|
|
|
|
|
logger.debug("provider_usage.codex_oauth.token_expired exp=%s", exp)
|
|
|
|
|
|
return None
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass # Can't decode JWT — proceed optimistically
|
|
|
|
|
|
|
|
|
|
|
|
return access_token
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-05-21 03:29:35 -05:00
|
|
|
|
_ANTHROPIC_WINDOW_LABELS: dict[str, str] = {
|
|
|
|
|
|
"five_hour": "Current session",
|
|
|
|
|
|
"seven_day": "All models",
|
|
|
|
|
|
"seven_day_sonnet": "Sonnet",
|
|
|
|
|
|
"seven_day_opus": "Opus",
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def _fetch_anthropic_subscription(session_key: str) -> list[SubscriptionWindow]:
|
|
|
|
|
|
"""Fetch Claude subscription usage windows via the Anthropic OAuth usage endpoint.
|
|
|
|
|
|
|
|
|
|
|
|
Requires a Claude.ai session key (the ``sessionKey`` cookie value, which
|
|
|
|
|
|
starts with ``sk-ant-``). Returns an empty list if the key is invalid or
|
|
|
|
|
|
the endpoint is unreachable.
|
|
|
|
|
|
"""
|
2026-05-24 17:23:41 -05:00
|
|
|
|
headers = {
|
|
|
|
|
|
"Authorization": f"Bearer {session_key}",
|
|
|
|
|
|
"User-Agent": "pipeline",
|
|
|
|
|
|
"Accept": "application/json",
|
|
|
|
|
|
"anthropic-version": "2023-06-01",
|
|
|
|
|
|
"anthropic-beta": "oauth-2025-04-20",
|
|
|
|
|
|
}
|
|
|
|
|
|
# Retry once on 429 — the subscription endpoint can be rate-limited when
|
|
|
|
|
|
# called right after another api.anthropic.com request (e.g. /v1/models).
|
2026-05-24 17:51:48 -05:00
|
|
|
|
# Sleep 3s before retrying; the rate-limit window is typically >1s but
|
|
|
|
|
|
# rarely longer than 3s in isolation (persistent throttling is handled by
|
|
|
|
|
|
# the 60s CACHE_TTL_FAILURE_SECONDS at the call site).
|
2026-05-24 17:23:41 -05:00
|
|
|
|
for attempt in range(2):
|
|
|
|
|
|
async with httpx.AsyncClient(timeout=REQUEST_TIMEOUT) as client:
|
|
|
|
|
|
try:
|
|
|
|
|
|
resp = await client.get(_ANTHROPIC_SUBSCRIPTION_URL, headers=headers)
|
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
|
logger.warning("provider_usage.subscription.anthropic.fetch_failed error=%s", exc)
|
|
|
|
|
|
return []
|
|
|
|
|
|
if resp.status_code == 429 and attempt == 0:
|
2026-05-24 17:51:48 -05:00
|
|
|
|
await asyncio.sleep(3.0)
|
2026-05-24 17:23:41 -05:00
|
|
|
|
continue
|
|
|
|
|
|
break
|
2026-05-21 03:29:35 -05:00
|
|
|
|
|
|
|
|
|
|
if not resp.status_code == 200:
|
2026-05-24 17:51:48 -05:00
|
|
|
|
logger.warning(
|
2026-05-21 03:29:35 -05:00
|
|
|
|
"provider_usage.subscription.anthropic.http_error status=%s body=%s",
|
|
|
|
|
|
resp.status_code,
|
|
|
|
|
|
resp.text[:200],
|
|
|
|
|
|
)
|
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
data = resp.json()
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
|
|
windows: list[SubscriptionWindow] = []
|
|
|
|
|
|
for key, label in _ANTHROPIC_WINDOW_LABELS.items():
|
|
|
|
|
|
block = data.get(key)
|
|
|
|
|
|
if not isinstance(block, dict):
|
|
|
|
|
|
continue
|
|
|
|
|
|
utilization = block.get("utilization")
|
|
|
|
|
|
if utilization is None:
|
|
|
|
|
|
continue
|
|
|
|
|
|
resets_at_str = block.get("resets_at")
|
|
|
|
|
|
reset_dt: datetime | None = None
|
|
|
|
|
|
if isinstance(resets_at_str, str):
|
|
|
|
|
|
try:
|
|
|
|
|
|
normalized = resets_at_str.replace("Z", "+00:00")
|
|
|
|
|
|
parsed = datetime.fromisoformat(normalized)
|
|
|
|
|
|
if parsed.tzinfo is not None:
|
|
|
|
|
|
reset_dt = parsed.astimezone(timezone.utc).replace(tzinfo=None)
|
|
|
|
|
|
else:
|
|
|
|
|
|
reset_dt = parsed
|
|
|
|
|
|
except ValueError:
|
|
|
|
|
|
pass
|
|
|
|
|
|
windows.append(SubscriptionWindow(
|
|
|
|
|
|
key=key,
|
|
|
|
|
|
label=label,
|
2026-05-21 19:42:46 -05:00
|
|
|
|
pct_used=min(100.0, round(float(utilization), 1)), # already 0–100
|
2026-05-21 03:29:35 -05:00
|
|
|
|
reset_at=reset_dt,
|
|
|
|
|
|
))
|
|
|
|
|
|
|
|
|
|
|
|
logger.info(
|
|
|
|
|
|
"provider_usage.subscription.anthropic.fetched windows=%d",
|
|
|
|
|
|
len(windows),
|
|
|
|
|
|
)
|
|
|
|
|
|
return windows
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def _fetch_codex_subscription(session_key: str) -> tuple[list[SubscriptionWindow], str | None]:
|
|
|
|
|
|
"""Fetch Codex/ChatGPT subscription usage windows via the wham/usage endpoint.
|
|
|
|
|
|
|
|
|
|
|
|
Requires a ChatGPT bearer token (from browser session, not the standard
|
|
|
|
|
|
API key). Returns (windows, plan_label) or ([], None) on failure.
|
|
|
|
|
|
"""
|
|
|
|
|
|
async with httpx.AsyncClient(timeout=REQUEST_TIMEOUT) as client:
|
|
|
|
|
|
try:
|
|
|
|
|
|
resp = await client.get(
|
|
|
|
|
|
_CODEX_SUBSCRIPTION_URL,
|
|
|
|
|
|
headers={
|
|
|
|
|
|
"Authorization": f"Bearer {session_key}",
|
|
|
|
|
|
"Accept": "application/json",
|
|
|
|
|
|
},
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
|
logger.warning("provider_usage.subscription.codex.fetch_failed error=%s", exc)
|
|
|
|
|
|
return [], None
|
|
|
|
|
|
|
|
|
|
|
|
if resp.status_code != 200:
|
|
|
|
|
|
logger.debug(
|
|
|
|
|
|
"provider_usage.subscription.codex.http_error status=%s",
|
|
|
|
|
|
resp.status_code,
|
|
|
|
|
|
)
|
|
|
|
|
|
return [], None
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
data = resp.json()
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
return [], None
|
|
|
|
|
|
|
|
|
|
|
|
windows: list[SubscriptionWindow] = []
|
|
|
|
|
|
rate_limit = data.get("rate_limit") or {}
|
|
|
|
|
|
|
|
|
|
|
|
primary = rate_limit.get("primary_window")
|
|
|
|
|
|
if isinstance(primary, dict) and primary.get("used_percent") is not None:
|
|
|
|
|
|
window_seconds = primary.get("limit_window_seconds") or 10800
|
|
|
|
|
|
window_hours = round(window_seconds / 3600)
|
|
|
|
|
|
reset_ts = primary.get("reset_at")
|
|
|
|
|
|
reset_dt: datetime | None = None
|
|
|
|
|
|
if isinstance(reset_ts, (int, float)):
|
|
|
|
|
|
reset_dt = datetime.fromtimestamp(reset_ts, tz=timezone.utc).replace(tzinfo=None)
|
|
|
|
|
|
windows.append(SubscriptionWindow(
|
|
|
|
|
|
key="primary",
|
2026-05-21 19:50:19 -05:00
|
|
|
|
label=f"Current session ({window_hours}h)",
|
2026-05-21 03:29:35 -05:00
|
|
|
|
pct_used=round(float(primary["used_percent"]), 1),
|
|
|
|
|
|
reset_at=reset_dt,
|
|
|
|
|
|
))
|
|
|
|
|
|
|
|
|
|
|
|
secondary = rate_limit.get("secondary_window")
|
|
|
|
|
|
if isinstance(secondary, dict) and secondary.get("used_percent") is not None:
|
|
|
|
|
|
sec_seconds = secondary.get("limit_window_seconds") or 86400
|
|
|
|
|
|
sec_hours = round(sec_seconds / 3600)
|
2026-05-21 19:50:19 -05:00
|
|
|
|
sec_label = "All models (weekly)" if sec_hours >= 168 else f"All models ({sec_hours}h)"
|
2026-05-21 03:29:35 -05:00
|
|
|
|
reset_ts = secondary.get("reset_at")
|
|
|
|
|
|
reset_dt = None
|
|
|
|
|
|
if isinstance(reset_ts, (int, float)):
|
|
|
|
|
|
reset_dt = datetime.fromtimestamp(reset_ts, tz=timezone.utc).replace(tzinfo=None)
|
|
|
|
|
|
windows.append(SubscriptionWindow(
|
|
|
|
|
|
key="secondary",
|
|
|
|
|
|
label=sec_label,
|
|
|
|
|
|
pct_used=round(float(secondary["used_percent"]), 1),
|
|
|
|
|
|
reset_at=reset_dt,
|
|
|
|
|
|
))
|
|
|
|
|
|
|
|
|
|
|
|
# Plan label
|
|
|
|
|
|
plan_type = data.get("plan_type")
|
|
|
|
|
|
credits = data.get("credits") or {}
|
|
|
|
|
|
balance = credits.get("balance")
|
|
|
|
|
|
plan_label: str | None = None
|
|
|
|
|
|
if isinstance(balance, (int, float)) and balance > 0:
|
|
|
|
|
|
plan_label = f"{plan_type} (${float(balance):.2f})" if plan_type else f"${float(balance):.2f}"
|
|
|
|
|
|
elif plan_type:
|
|
|
|
|
|
plan_label = str(plan_type)
|
|
|
|
|
|
|
|
|
|
|
|
logger.info(
|
|
|
|
|
|
"provider_usage.subscription.codex.fetched windows=%d plan=%s",
|
|
|
|
|
|
len(windows),
|
|
|
|
|
|
plan_label,
|
|
|
|
|
|
)
|
|
|
|
|
|
return windows, plan_label
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-05-20 22:03:57 -05:00
|
|
|
|
# ---------------------------------------------------------------------------
|
2026-05-24 17:37:08 -05:00
|
|
|
|
# In-memory TTL cache + in-flight deduplication
|
2026-05-20 22:03:57 -05:00
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
2026-05-24 17:37:08 -05:00
|
|
|
|
_cache: dict[str, tuple[datetime, ProviderUsageLive, int]] = {}
|
|
|
|
|
|
# Tracks in-progress fetches so concurrent requests share one result instead
|
|
|
|
|
|
# of each racing to hit the provider API (which triggers 429 cascades).
|
|
|
|
|
|
_inflight: dict[str, asyncio.Future[ProviderUsageLive]] = {}
|
2026-05-20 22:03:57 -05:00
|
|
|
|
|
|
|
|
|
|
|
2026-05-24 19:05:30 -05:00
|
|
|
|
def _get_cached(cache_key: str) -> ProviderUsageLive | None:
|
|
|
|
|
|
entry = _cache.get(cache_key)
|
2026-05-20 22:03:57 -05:00
|
|
|
|
if entry is None:
|
|
|
|
|
|
return None
|
2026-05-24 17:37:08 -05:00
|
|
|
|
cached_at, result, ttl = entry
|
|
|
|
|
|
if (utcnow() - cached_at).total_seconds() > ttl:
|
2026-05-24 19:05:30 -05:00
|
|
|
|
del _cache[cache_key]
|
2026-05-20 22:03:57 -05:00
|
|
|
|
return None
|
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-05-24 19:05:30 -05:00
|
|
|
|
def _set_cached(cache_key: str, result: ProviderUsageLive, ttl: int = CACHE_TTL_SECONDS) -> None:
|
|
|
|
|
|
_cache[cache_key] = (utcnow(), result, ttl)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _secret_fingerprint(value: str | None) -> str:
|
|
|
|
|
|
if not value:
|
|
|
|
|
|
return "none"
|
|
|
|
|
|
return hashlib.sha256(value.encode("utf-8")).hexdigest()[:16]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _usage_cache_key(
|
|
|
|
|
|
credential_id: str,
|
|
|
|
|
|
provider: str,
|
|
|
|
|
|
api_key: str | None,
|
|
|
|
|
|
base_url: str | None,
|
|
|
|
|
|
session_key: str | None,
|
|
|
|
|
|
) -> str:
|
|
|
|
|
|
"""Return a cache key for the real upstream credential being used.
|
|
|
|
|
|
|
|
|
|
|
|
Local OAuth tokens are intentionally shared across credential rows, because
|
|
|
|
|
|
Pipeline's primary source of truth is the local machine's Claude/Codex login.
|
|
|
|
|
|
"""
|
|
|
|
|
|
normalized_provider = provider.lower()
|
|
|
|
|
|
normalized_base_url = (base_url or "").rstrip("/")
|
|
|
|
|
|
|
|
|
|
|
|
if normalized_provider == "anthropic":
|
|
|
|
|
|
local_oauth = _read_claude_local_oauth_token()
|
|
|
|
|
|
if local_oauth:
|
|
|
|
|
|
return f"anthropic:local-oauth:{_secret_fingerprint(local_oauth)}"
|
|
|
|
|
|
if session_key:
|
|
|
|
|
|
return f"anthropic:session:{_secret_fingerprint(session_key)}"
|
|
|
|
|
|
if api_key:
|
|
|
|
|
|
return f"anthropic:api:{normalized_base_url}:{_secret_fingerprint(api_key)}"
|
|
|
|
|
|
|
|
|
|
|
|
if normalized_provider in ("openai", "codex"):
|
|
|
|
|
|
if api_key:
|
|
|
|
|
|
return (
|
|
|
|
|
|
f"{normalized_provider}:api:"
|
|
|
|
|
|
f"{normalized_base_url}:{_secret_fingerprint(api_key)}"
|
|
|
|
|
|
)
|
|
|
|
|
|
if session_key:
|
|
|
|
|
|
return f"{normalized_provider}:session:{_secret_fingerprint(session_key)}"
|
|
|
|
|
|
local_codex = _read_codex_local_token()
|
|
|
|
|
|
if local_codex:
|
|
|
|
|
|
return f"{normalized_provider}:local-oauth:{_secret_fingerprint(local_codex)}"
|
|
|
|
|
|
|
|
|
|
|
|
if normalized_provider == "ollama":
|
|
|
|
|
|
return f"ollama:{normalized_base_url}:{_secret_fingerprint(api_key)}"
|
|
|
|
|
|
|
|
|
|
|
|
return f"{normalized_provider}:credential:{credential_id}"
|
2026-05-20 22:03:57 -05:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
# Public entry point
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
2026-05-24 19:05:30 -05:00
|
|
|
|
|
2026-05-24 17:37:08 -05:00
|
|
|
|
async def _do_fetch_provider_usage(
|
2026-05-24 19:05:30 -05:00
|
|
|
|
cache_key: str,
|
2026-05-20 22:03:57 -05:00
|
|
|
|
provider: str,
|
|
|
|
|
|
account_key: str,
|
|
|
|
|
|
api_key: str | None,
|
|
|
|
|
|
base_url: str | None,
|
2026-05-24 17:37:08 -05:00
|
|
|
|
session_key: str | None,
|
2026-05-20 22:03:57 -05:00
|
|
|
|
) -> ProviderUsageLive:
|
2026-05-24 17:37:08 -05:00
|
|
|
|
"""Inner fetch — called once per credential even under concurrent load."""
|
|
|
|
|
|
subscription_attempted = False
|
2026-05-24 17:16:53 -05:00
|
|
|
|
|
2026-05-20 22:03:57 -05:00
|
|
|
|
if provider == "anthropic":
|
2026-05-21 20:11:46 -05:00
|
|
|
|
# Auto-detected local OAuth token (sk-ant-oat01-) takes precedence — it is
|
|
|
|
|
|
# the correct credential type for the oauth/usage endpoint and is always
|
|
|
|
|
|
# fresher than a manually stored key. Fall back to session_key only when
|
|
|
|
|
|
# no local token is found (e.g. Pipeline running on a different machine).
|
2026-05-21 19:42:46 -05:00
|
|
|
|
local_oauth = _read_claude_local_oauth_token()
|
|
|
|
|
|
effective_session_key = local_oauth or session_key
|
|
|
|
|
|
|
|
|
|
|
|
if not api_key and not effective_session_key:
|
2026-05-20 22:03:57 -05:00
|
|
|
|
result = ProviderUsageLive(
|
|
|
|
|
|
provider=provider, account_key=account_key,
|
|
|
|
|
|
checked_at=utcnow(), reachable=False,
|
|
|
|
|
|
error="No API key configured.",
|
|
|
|
|
|
)
|
2026-05-24 19:18:25 -05:00
|
|
|
|
|
2026-05-24 19:22:18 -05:00
|
|
|
|
elif local_oauth:
|
|
|
|
|
|
# Local Claude session is the primary source — use the subscription
|
|
|
|
|
|
# endpoint only. Skipping /v1/models eliminates the sequential
|
|
|
|
|
|
# same-IP request that caused the subscription endpoint to 429.
|
|
|
|
|
|
subscription_attempted = True
|
|
|
|
|
|
result = ProviderUsageLive(
|
|
|
|
|
|
provider=provider, account_key=account_key,
|
|
|
|
|
|
checked_at=utcnow(), reachable=True,
|
|
|
|
|
|
)
|
|
|
|
|
|
sub_windows = await _fetch_anthropic_subscription(local_oauth)
|
|
|
|
|
|
if sub_windows:
|
|
|
|
|
|
result.subscription_windows = sub_windows
|
|
|
|
|
|
else:
|
|
|
|
|
|
result.error = "No subscription data returned."
|
|
|
|
|
|
|
2026-05-24 19:18:25 -05:00
|
|
|
|
elif api_key and effective_session_key:
|
2026-05-24 19:22:18 -05:00
|
|
|
|
# No local OAuth — fire API key + session key calls concurrently.
|
2026-05-24 19:18:25 -05:00
|
|
|
|
subscription_attempted = True
|
|
|
|
|
|
result, sub_windows = await asyncio.gather(
|
|
|
|
|
|
_fetch_anthropic(api_key, base_url),
|
|
|
|
|
|
_fetch_anthropic_subscription(effective_session_key),
|
|
|
|
|
|
)
|
|
|
|
|
|
if result.reachable is not False:
|
|
|
|
|
|
if sub_windows:
|
|
|
|
|
|
result.subscription_windows = sub_windows
|
|
|
|
|
|
result.reachable = True
|
|
|
|
|
|
else:
|
|
|
|
|
|
result.error = result.error or "No subscription data returned."
|
|
|
|
|
|
|
2026-05-21 03:29:35 -05:00
|
|
|
|
elif api_key:
|
2026-05-20 22:03:57 -05:00
|
|
|
|
result = await _fetch_anthropic(api_key, base_url)
|
2026-05-24 19:18:25 -05:00
|
|
|
|
|
2026-05-21 03:29:35 -05:00
|
|
|
|
else:
|
2026-05-24 19:22:18 -05:00
|
|
|
|
# session_key only, no local OAuth, no API key
|
2026-05-24 19:18:25 -05:00
|
|
|
|
subscription_attempted = True
|
2026-05-21 03:29:35 -05:00
|
|
|
|
result = ProviderUsageLive(
|
|
|
|
|
|
provider=provider, account_key=account_key,
|
|
|
|
|
|
checked_at=utcnow(), reachable=True,
|
|
|
|
|
|
)
|
2026-05-21 19:42:46 -05:00
|
|
|
|
sub_windows = await _fetch_anthropic_subscription(effective_session_key)
|
2026-05-21 03:29:35 -05:00
|
|
|
|
if sub_windows:
|
|
|
|
|
|
result.subscription_windows = sub_windows
|
2026-05-21 04:16:19 -05:00
|
|
|
|
else:
|
2026-05-24 19:18:25 -05:00
|
|
|
|
result.error = "No subscription data returned."
|
2026-05-20 22:03:57 -05:00
|
|
|
|
|
|
|
|
|
|
elif provider in ("openai", "codex"):
|
2026-05-21 03:29:35 -05:00
|
|
|
|
if not api_key and not session_key:
|
2026-05-20 22:03:57 -05:00
|
|
|
|
result = ProviderUsageLive(
|
|
|
|
|
|
provider=provider, account_key=account_key,
|
|
|
|
|
|
checked_at=utcnow(), reachable=False,
|
|
|
|
|
|
error="No API key configured.",
|
|
|
|
|
|
)
|
2026-05-21 03:29:35 -05:00
|
|
|
|
elif api_key:
|
2026-05-20 22:03:57 -05:00
|
|
|
|
result = await _fetch_openai(api_key, base_url)
|
2026-05-21 03:29:35 -05:00
|
|
|
|
else:
|
|
|
|
|
|
result = ProviderUsageLive(
|
|
|
|
|
|
provider=provider, account_key=account_key,
|
|
|
|
|
|
checked_at=utcnow(), reachable=True,
|
|
|
|
|
|
)
|
2026-05-21 20:11:46 -05:00
|
|
|
|
# Overlay subscription windows.
|
|
|
|
|
|
# Explicit session_key wins (allows a second account to override auto-detection).
|
|
|
|
|
|
# Fall back to auto-detected Codex CLI token when no key is stored.
|
2026-05-21 19:50:19 -05:00
|
|
|
|
local_codex = _read_codex_local_token()
|
2026-05-21 20:11:46 -05:00
|
|
|
|
effective_codex_key = session_key or local_codex
|
2026-05-21 19:50:19 -05:00
|
|
|
|
if effective_codex_key and result.reachable is not False:
|
2026-05-24 17:37:08 -05:00
|
|
|
|
subscription_attempted = True
|
2026-05-21 19:50:19 -05:00
|
|
|
|
sub_windows, plan_label = await _fetch_codex_subscription(effective_codex_key)
|
2026-05-21 03:29:35 -05:00
|
|
|
|
if sub_windows:
|
|
|
|
|
|
result.subscription_windows = sub_windows
|
|
|
|
|
|
result.subscription_plan = plan_label
|
|
|
|
|
|
result.reachable = True
|
2026-05-21 04:16:19 -05:00
|
|
|
|
else:
|
|
|
|
|
|
result.error = result.error or "No subscription data returned."
|
2026-05-20 22:03:57 -05:00
|
|
|
|
|
|
|
|
|
|
elif provider == "ollama":
|
|
|
|
|
|
result = await _fetch_ollama(base_url, api_key)
|
|
|
|
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
result = ProviderUsageLive(
|
|
|
|
|
|
provider=provider, account_key=account_key,
|
|
|
|
|
|
checked_at=utcnow(), reachable=False,
|
|
|
|
|
|
error=f"Live usage not supported for provider '{provider}'.",
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
result.account_key = account_key
|
2026-05-24 17:37:08 -05:00
|
|
|
|
# Use a short TTL when subscription windows were expected but came back empty
|
|
|
|
|
|
# (e.g. a transient 429 at startup). This avoids persisting a 60s stale result
|
|
|
|
|
|
# while still preventing the thundering-herd that occurs with no caching at all.
|
2026-05-24 19:05:30 -05:00
|
|
|
|
ttl = (
|
|
|
|
|
|
CACHE_TTL_FAILURE_SECONDS
|
|
|
|
|
|
if (subscription_attempted and not result.subscription_windows)
|
|
|
|
|
|
else CACHE_TTL_SECONDS
|
|
|
|
|
|
)
|
|
|
|
|
|
_set_cached(cache_key, result, ttl=ttl)
|
2026-05-20 22:03:57 -05:00
|
|
|
|
logger.info(
|
2026-05-24 17:37:08 -05:00
|
|
|
|
"provider_usage.checked provider=%s account=%s reachable=%s windows=%d error=%s",
|
2026-05-24 19:05:30 -05:00
|
|
|
|
provider,
|
|
|
|
|
|
account_key,
|
|
|
|
|
|
result.reachable,
|
|
|
|
|
|
len(result.subscription_windows),
|
|
|
|
|
|
result.error,
|
2026-05-20 22:03:57 -05:00
|
|
|
|
)
|
|
|
|
|
|
return result
|
2026-05-24 17:37:08 -05:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def fetch_provider_usage(
|
|
|
|
|
|
credential_id: str,
|
|
|
|
|
|
provider: str,
|
|
|
|
|
|
account_key: str,
|
|
|
|
|
|
api_key: str | None,
|
|
|
|
|
|
base_url: str | None,
|
|
|
|
|
|
*,
|
|
|
|
|
|
session_key: str | None = None,
|
|
|
|
|
|
force_refresh: bool = False,
|
|
|
|
|
|
) -> ProviderUsageLive:
|
|
|
|
|
|
"""Fetch live usage from the provider API.
|
|
|
|
|
|
|
|
|
|
|
|
When ``session_key`` is provided, also fetches subscription-plan usage
|
|
|
|
|
|
windows (e.g. "Current session 77% used, resets in 23 min") in addition
|
|
|
|
|
|
to the standard API rate-limit diagnostics.
|
|
|
|
|
|
|
|
|
|
|
|
Results are cached for CACHE_TTL_SECONDS (short CACHE_TTL_FAILURE_SECONDS
|
|
|
|
|
|
when subscription windows are unavailable). Concurrent requests for the same
|
2026-05-24 19:05:30 -05:00
|
|
|
|
effective provider credential share one in-flight fetch to avoid rate-limit cascades.
|
2026-05-24 17:37:08 -05:00
|
|
|
|
Pass force_refresh=True to bypass the cache.
|
|
|
|
|
|
"""
|
2026-05-24 19:05:30 -05:00
|
|
|
|
cache_key = _usage_cache_key(credential_id, provider, api_key, base_url, session_key)
|
|
|
|
|
|
|
2026-05-24 17:37:08 -05:00
|
|
|
|
if not force_refresh:
|
2026-05-24 19:05:30 -05:00
|
|
|
|
cached = _get_cached(cache_key)
|
2026-05-24 17:37:08 -05:00
|
|
|
|
if cached is not None:
|
|
|
|
|
|
return cached
|
|
|
|
|
|
|
|
|
|
|
|
# In-flight deduplication: if another coroutine is already fetching this
|
2026-05-24 19:05:30 -05:00
|
|
|
|
# upstream credential, await its result rather than racing to hit the provider API.
|
2026-05-24 17:37:08 -05:00
|
|
|
|
loop = asyncio.get_event_loop()
|
2026-05-24 19:05:30 -05:00
|
|
|
|
if cache_key in _inflight:
|
|
|
|
|
|
return await asyncio.shield(_inflight[cache_key])
|
2026-05-24 17:37:08 -05:00
|
|
|
|
|
|
|
|
|
|
fut: asyncio.Future[ProviderUsageLive] = loop.create_future()
|
2026-05-24 19:05:30 -05:00
|
|
|
|
_inflight[cache_key] = fut
|
2026-05-24 17:37:08 -05:00
|
|
|
|
try:
|
|
|
|
|
|
result = await _do_fetch_provider_usage(
|
2026-05-24 19:05:30 -05:00
|
|
|
|
cache_key,
|
|
|
|
|
|
provider,
|
|
|
|
|
|
account_key,
|
|
|
|
|
|
api_key,
|
|
|
|
|
|
base_url,
|
|
|
|
|
|
session_key,
|
2026-05-24 17:37:08 -05:00
|
|
|
|
)
|
|
|
|
|
|
fut.set_result(result)
|
|
|
|
|
|
return result
|
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
|
fut.set_exception(exc)
|
|
|
|
|
|
raise
|
|
|
|
|
|
finally:
|
2026-05-24 19:05:30 -05:00
|
|
|
|
_inflight.pop(cache_key, None)
|