From 55a99ac9fd8307de7565281202fcca326a3e73fe Mon Sep 17 00:00:00 2001 From: null Date: Sun, 24 May 2026 17:51:48 -0500 Subject: [PATCH] feat: extend CACHE_TTL_FAILURE_SECONDS to 60s and adjust retry logic for Anthropic subscription endpoint --- backend/app/services/provider_usage.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/backend/app/services/provider_usage.py b/backend/app/services/provider_usage.py index 5a81930..f0be676 100644 --- a/backend/app/services/provider_usage.py +++ b/backend/app/services/provider_usage.py @@ -55,7 +55,12 @@ from app.core.time import utcnow logger = get_logger(__name__) CACHE_TTL_SECONDS = 60 -CACHE_TTL_FAILURE_SECONDS = 5 # short TTL for results with no subscription windows +# Anthropic's subscription endpoint rate-limit window is ~30-60s. Retrying +# sooner than that (old 5s value) creates a tight retry loop that keeps the +# rate limit permanently throttled. Use the same 60s TTL as successes so the +# next auto-poll only fires after the window has cleared. Manual refreshes +# (force_refresh=True) always bypass this TTL. +CACHE_TTL_FAILURE_SECONDS = 60 REQUEST_TIMEOUT = 8.0 # seconds @@ -766,6 +771,9 @@ async def _fetch_anthropic_subscription(session_key: str) -> list[SubscriptionWi } # Retry once on 429 — the subscription endpoint can be rate-limited when # called right after another api.anthropic.com request (e.g. /v1/models). + # Sleep 3s before retrying; the rate-limit window is typically >1s but + # rarely longer than 3s in isolation (persistent throttling is handled by + # the 60s CACHE_TTL_FAILURE_SECONDS at the call site). for attempt in range(2): async with httpx.AsyncClient(timeout=REQUEST_TIMEOUT) as client: try: @@ -774,12 +782,12 @@ async def _fetch_anthropic_subscription(session_key: str) -> list[SubscriptionWi logger.warning("provider_usage.subscription.anthropic.fetch_failed error=%s", exc) return [] if resp.status_code == 429 and attempt == 0: - await asyncio.sleep(1.5) + await asyncio.sleep(3.0) continue break if not resp.status_code == 200: - logger.debug( + logger.warning( "provider_usage.subscription.anthropic.http_error status=%s body=%s", resp.status_code, resp.text[:200],