feat: update caching logic and improve provider usage fetching intervals

2026-05-24 19:05:30 -05:00 · 2026-05-24 19:05:30 -05:00 · d406beec56
parent 7d297df9aa
commit d406beec56
3 changed files with 89 additions and 23 deletions
--- a/backend/app/services/provider_usage.py
+++ b/backend/app/services/provider_usage.py
@ -32,13 +32,16 @@ ollama     → GET {base_url}/api/tags  (health-check only; no rate limits)

 Caching
 -------
-Results are cached per credential_id for CACHE_TTL_SECONDS (default 60s) to
-avoid hammering provider APIs on every page load.
+Results are cached by the effective provider credential source for
+CACHE_TTL_SECONDS (default 60s) to avoid hammering provider APIs on every page
+load. Multiple Pipeline credential rows that point at the same local Claude or
+Codex login share one provider fetch.
 """

 from __future__ import annotations

 import asyncio
+import hashlib
 import json as _json_module
 import os
 import re
@ -926,27 +929,76 @@ _cache: dict[str, tuple[datetime, ProviderUsageLive, int]] = {}
 _inflight: dict[str, asyncio.Future[ProviderUsageLive]] = {}


-def _get_cached(credential_id: str) -> ProviderUsageLive | None:
-    entry = _cache.get(credential_id)
+def _get_cached(cache_key: str) -> ProviderUsageLive | None:
+    entry = _cache.get(cache_key)
    if entry is None:
        return None
    cached_at, result, ttl = entry
    if (utcnow() - cached_at).total_seconds() > ttl:
-        del _cache[credential_id]
+        del _cache[cache_key]
        return None
    return result


-def _set_cached(credential_id: str, result: ProviderUsageLive, ttl: int = CACHE_TTL_SECONDS) -> None:
-    _cache[credential_id] = (utcnow(), result, ttl)
+def _set_cached(cache_key: str, result: ProviderUsageLive, ttl: int = CACHE_TTL_SECONDS) -> None:
+    _cache[cache_key] = (utcnow(), result, ttl)
+
+
+def _secret_fingerprint(value: str | None) -> str:
+    if not value:
+        return "none"
+    return hashlib.sha256(value.encode("utf-8")).hexdigest()[:16]
+
+
+def _usage_cache_key(
+    credential_id: str,
+    provider: str,
+    api_key: str | None,
+    base_url: str | None,
+    session_key: str | None,
+) -> str:
+    """Return a cache key for the real upstream credential being used.
+
+    Local OAuth tokens are intentionally shared across credential rows, because
+    Pipeline's primary source of truth is the local machine's Claude/Codex login.
+    """
+    normalized_provider = provider.lower()
+    normalized_base_url = (base_url or "").rstrip("/")
+
+    if normalized_provider == "anthropic":
+        local_oauth = _read_claude_local_oauth_token()
+        if local_oauth:
+            return f"anthropic:local-oauth:{_secret_fingerprint(local_oauth)}"
+        if session_key:
+            return f"anthropic:session:{_secret_fingerprint(session_key)}"
+        if api_key:
+            return f"anthropic:api:{normalized_base_url}:{_secret_fingerprint(api_key)}"
+
+    if normalized_provider in ("openai", "codex"):
+        if api_key:
+            return (
+                f"{normalized_provider}:api:"
+                f"{normalized_base_url}:{_secret_fingerprint(api_key)}"
+            )
+        if session_key:
+            return f"{normalized_provider}:session:{_secret_fingerprint(session_key)}"
+        local_codex = _read_codex_local_token()
+        if local_codex:
+            return f"{normalized_provider}:local-oauth:{_secret_fingerprint(local_codex)}"
+
+    if normalized_provider == "ollama":
+        return f"ollama:{normalized_base_url}:{_secret_fingerprint(api_key)}"
+
+    return f"{normalized_provider}:credential:{credential_id}"


 # ---------------------------------------------------------------------------
 # Public entry point
 # ---------------------------------------------------------------------------

+
 async def _do_fetch_provider_usage(
-    credential_id: str,
+    cache_key: str,
    provider: str,
    account_key: str,
    api_key: str | None,
@ -1030,11 +1082,19 @@ async def _do_fetch_provider_usage(
    # Use a short TTL when subscription windows were expected but came back empty
    # (e.g. a transient 429 at startup). This avoids persisting a 60s stale result
    # while still preventing the thundering-herd that occurs with no caching at all.
-    ttl = CACHE_TTL_FAILURE_SECONDS if (subscription_attempted and not result.subscription_windows) else CACHE_TTL_SECONDS
-    _set_cached(credential_id, result, ttl=ttl)
+    ttl = (
+        CACHE_TTL_FAILURE_SECONDS
+        if (subscription_attempted and not result.subscription_windows)
+        else CACHE_TTL_SECONDS
+    )
+    _set_cached(cache_key, result, ttl=ttl)
    logger.info(
        "provider_usage.checked provider=%s account=%s reachable=%s windows=%d error=%s",
-        provider, account_key, result.reachable, len(result.subscription_windows), result.error,
+        provider,
+        account_key,
+        result.reachable,
+        len(result.subscription_windows),
+        result.error,
    )
    return result

@ -1057,25 +1117,32 @@ async def fetch_provider_usage(

    Results are cached for CACHE_TTL_SECONDS (short CACHE_TTL_FAILURE_SECONDS
    when subscription windows are unavailable). Concurrent requests for the same
-    credential share one in-flight fetch to avoid rate-limit cascades.
+    effective provider credential share one in-flight fetch to avoid rate-limit cascades.
    Pass force_refresh=True to bypass the cache.
    """
+    cache_key = _usage_cache_key(credential_id, provider, api_key, base_url, session_key)
+
    if not force_refresh:
-        cached = _get_cached(credential_id)
+        cached = _get_cached(cache_key)
        if cached is not None:
            return cached

    # In-flight deduplication: if another coroutine is already fetching this
-    # credential, await its result rather than racing to hit the provider API.
+    # upstream credential, await its result rather than racing to hit the provider API.
    loop = asyncio.get_event_loop()
-    if credential_id in _inflight:
-        return await asyncio.shield(_inflight[credential_id])
+    if cache_key in _inflight:
+        return await asyncio.shield(_inflight[cache_key])

    fut: asyncio.Future[ProviderUsageLive] = loop.create_future()
-    _inflight[credential_id] = fut
+    _inflight[cache_key] = fut
    try:
        result = await _do_fetch_provider_usage(
-            credential_id, provider, account_key, api_key, base_url, session_key,
+            cache_key,
+            provider,
+            account_key,
+            api_key,
+            base_url,
+            session_key,
        )
        fut.set_result(result)
        return result
@ -1083,4 +1150,4 @@ async def fetch_provider_usage(
        fut.set_exception(exc)
        raise
    finally:
-        _inflight.pop(credential_id, None)
+        _inflight.pop(cache_key, None)
--- a/frontend/src/app/dashboard/page.tsx
+++ b/frontend/src/app/dashboard/page.tsx
@ -860,7 +860,7 @@ export default function DashboardPage() {
  const credentialUsageQuery = useQuery<ProviderNativeUsageWindow[], ApiError>({
    queryKey: ["dashboard", "provider-credential-usage"],
    enabled: Boolean(isSignedIn),
-    refetchInterval: 30_000,
+    refetchInterval: 60_000,
    refetchOnMount: "always",
    queryFn: async () => {
      const credentialsRes = await listProviderCredentialsApiV1ProviderCredentialsGet();
--- a/frontend/src/components/organisms/ProviderNavbarStatus.tsx
+++ b/frontend/src/components/organisms/ProviderNavbarStatus.tsx
@ -302,14 +302,13 @@ export function ProviderNavbarStatus() {
    }

    let cancelled = false;
-    const fetchUsage = async (refresh = false) => {
+    const fetchUsage = async () => {
      setIsUsageLoading(true);
      const pairs = await Promise.all(
        usageCredentials.map(async (cred) => {
          try {
            const res = await getProviderUsageLiveApiV1ProviderCredentialsCredentialIdUsageGet(
              cred.id,
-              refresh ? { refresh: true } : undefined,
            );
            return [cred.id, res.status === 200 ? res.data : null] as const;
          } catch {
@ -328,7 +327,7 @@ export function ProviderNavbarStatus() {
      void fetchUsage();
    }, 0);
    const interval = window.setInterval(() => {
-      void fetchUsage(true);
+      void fetchUsage();
    }, 60_000);

    return () => {