diff --git a/backend/app/services/provider_usage.py b/backend/app/services/provider_usage.py index f0be676..972167b 100644 --- a/backend/app/services/provider_usage.py +++ b/backend/app/services/provider_usage.py @@ -32,13 +32,16 @@ ollama → GET {base_url}/api/tags (health-check only; no rate limits) Caching ------- -Results are cached per credential_id for CACHE_TTL_SECONDS (default 60s) to -avoid hammering provider APIs on every page load. +Results are cached by the effective provider credential source for +CACHE_TTL_SECONDS (default 60s) to avoid hammering provider APIs on every page +load. Multiple Pipeline credential rows that point at the same local Claude or +Codex login share one provider fetch. """ from __future__ import annotations import asyncio +import hashlib import json as _json_module import os import re @@ -926,27 +929,76 @@ _cache: dict[str, tuple[datetime, ProviderUsageLive, int]] = {} _inflight: dict[str, asyncio.Future[ProviderUsageLive]] = {} -def _get_cached(credential_id: str) -> ProviderUsageLive | None: - entry = _cache.get(credential_id) +def _get_cached(cache_key: str) -> ProviderUsageLive | None: + entry = _cache.get(cache_key) if entry is None: return None cached_at, result, ttl = entry if (utcnow() - cached_at).total_seconds() > ttl: - del _cache[credential_id] + del _cache[cache_key] return None return result -def _set_cached(credential_id: str, result: ProviderUsageLive, ttl: int = CACHE_TTL_SECONDS) -> None: - _cache[credential_id] = (utcnow(), result, ttl) +def _set_cached(cache_key: str, result: ProviderUsageLive, ttl: int = CACHE_TTL_SECONDS) -> None: + _cache[cache_key] = (utcnow(), result, ttl) + + +def _secret_fingerprint(value: str | None) -> str: + if not value: + return "none" + return hashlib.sha256(value.encode("utf-8")).hexdigest()[:16] + + +def _usage_cache_key( + credential_id: str, + provider: str, + api_key: str | None, + base_url: str | None, + session_key: str | None, +) -> str: + """Return a cache key for the real upstream credential being used. + + Local OAuth tokens are intentionally shared across credential rows, because + Pipeline's primary source of truth is the local machine's Claude/Codex login. + """ + normalized_provider = provider.lower() + normalized_base_url = (base_url or "").rstrip("/") + + if normalized_provider == "anthropic": + local_oauth = _read_claude_local_oauth_token() + if local_oauth: + return f"anthropic:local-oauth:{_secret_fingerprint(local_oauth)}" + if session_key: + return f"anthropic:session:{_secret_fingerprint(session_key)}" + if api_key: + return f"anthropic:api:{normalized_base_url}:{_secret_fingerprint(api_key)}" + + if normalized_provider in ("openai", "codex"): + if api_key: + return ( + f"{normalized_provider}:api:" + f"{normalized_base_url}:{_secret_fingerprint(api_key)}" + ) + if session_key: + return f"{normalized_provider}:session:{_secret_fingerprint(session_key)}" + local_codex = _read_codex_local_token() + if local_codex: + return f"{normalized_provider}:local-oauth:{_secret_fingerprint(local_codex)}" + + if normalized_provider == "ollama": + return f"ollama:{normalized_base_url}:{_secret_fingerprint(api_key)}" + + return f"{normalized_provider}:credential:{credential_id}" # --------------------------------------------------------------------------- # Public entry point # --------------------------------------------------------------------------- + async def _do_fetch_provider_usage( - credential_id: str, + cache_key: str, provider: str, account_key: str, api_key: str | None, @@ -1030,11 +1082,19 @@ async def _do_fetch_provider_usage( # Use a short TTL when subscription windows were expected but came back empty # (e.g. a transient 429 at startup). This avoids persisting a 60s stale result # while still preventing the thundering-herd that occurs with no caching at all. - ttl = CACHE_TTL_FAILURE_SECONDS if (subscription_attempted and not result.subscription_windows) else CACHE_TTL_SECONDS - _set_cached(credential_id, result, ttl=ttl) + ttl = ( + CACHE_TTL_FAILURE_SECONDS + if (subscription_attempted and not result.subscription_windows) + else CACHE_TTL_SECONDS + ) + _set_cached(cache_key, result, ttl=ttl) logger.info( "provider_usage.checked provider=%s account=%s reachable=%s windows=%d error=%s", - provider, account_key, result.reachable, len(result.subscription_windows), result.error, + provider, + account_key, + result.reachable, + len(result.subscription_windows), + result.error, ) return result @@ -1057,25 +1117,32 @@ async def fetch_provider_usage( Results are cached for CACHE_TTL_SECONDS (short CACHE_TTL_FAILURE_SECONDS when subscription windows are unavailable). Concurrent requests for the same - credential share one in-flight fetch to avoid rate-limit cascades. + effective provider credential share one in-flight fetch to avoid rate-limit cascades. Pass force_refresh=True to bypass the cache. """ + cache_key = _usage_cache_key(credential_id, provider, api_key, base_url, session_key) + if not force_refresh: - cached = _get_cached(credential_id) + cached = _get_cached(cache_key) if cached is not None: return cached # In-flight deduplication: if another coroutine is already fetching this - # credential, await its result rather than racing to hit the provider API. + # upstream credential, await its result rather than racing to hit the provider API. loop = asyncio.get_event_loop() - if credential_id in _inflight: - return await asyncio.shield(_inflight[credential_id]) + if cache_key in _inflight: + return await asyncio.shield(_inflight[cache_key]) fut: asyncio.Future[ProviderUsageLive] = loop.create_future() - _inflight[credential_id] = fut + _inflight[cache_key] = fut try: result = await _do_fetch_provider_usage( - credential_id, provider, account_key, api_key, base_url, session_key, + cache_key, + provider, + account_key, + api_key, + base_url, + session_key, ) fut.set_result(result) return result @@ -1083,4 +1150,4 @@ async def fetch_provider_usage( fut.set_exception(exc) raise finally: - _inflight.pop(credential_id, None) + _inflight.pop(cache_key, None) diff --git a/frontend/src/app/dashboard/page.tsx b/frontend/src/app/dashboard/page.tsx index 2ce9827..d7dda3c 100644 --- a/frontend/src/app/dashboard/page.tsx +++ b/frontend/src/app/dashboard/page.tsx @@ -860,7 +860,7 @@ export default function DashboardPage() { const credentialUsageQuery = useQuery({ queryKey: ["dashboard", "provider-credential-usage"], enabled: Boolean(isSignedIn), - refetchInterval: 30_000, + refetchInterval: 60_000, refetchOnMount: "always", queryFn: async () => { const credentialsRes = await listProviderCredentialsApiV1ProviderCredentialsGet(); diff --git a/frontend/src/components/organisms/ProviderNavbarStatus.tsx b/frontend/src/components/organisms/ProviderNavbarStatus.tsx index 63feea8..7f40d17 100644 --- a/frontend/src/components/organisms/ProviderNavbarStatus.tsx +++ b/frontend/src/components/organisms/ProviderNavbarStatus.tsx @@ -302,14 +302,13 @@ export function ProviderNavbarStatus() { } let cancelled = false; - const fetchUsage = async (refresh = false) => { + const fetchUsage = async () => { setIsUsageLoading(true); const pairs = await Promise.all( usageCredentials.map(async (cred) => { try { const res = await getProviderUsageLiveApiV1ProviderCredentialsCredentialIdUsageGet( cred.id, - refresh ? { refresh: true } : undefined, ); return [cred.id, res.status === 200 ? res.data : null] as const; } catch { @@ -328,7 +327,7 @@ export function ProviderNavbarStatus() { void fetchUsage(); }, 0); const interval = window.setInterval(() => { - void fetchUsage(true); + void fetchUsage(); }, 60_000); return () => {