feat: extend CACHE_TTL_FAILURE_SECONDS to 60s and adjust retry logic for Anthropic subscription endpoint

This commit is contained in:
null 2026-05-24 17:51:48 -05:00
parent fca8f028ab
commit 55a99ac9fd
1 changed files with 11 additions and 3 deletions

View File

@ -55,7 +55,12 @@ from app.core.time import utcnow
logger = get_logger(__name__)
CACHE_TTL_SECONDS = 60
CACHE_TTL_FAILURE_SECONDS = 5 # short TTL for results with no subscription windows
# Anthropic's subscription endpoint rate-limit window is ~30-60s. Retrying
# sooner than that (old 5s value) creates a tight retry loop that keeps the
# rate limit permanently throttled. Use the same 60s TTL as successes so the
# next auto-poll only fires after the window has cleared. Manual refreshes
# (force_refresh=True) always bypass this TTL.
CACHE_TTL_FAILURE_SECONDS = 60
REQUEST_TIMEOUT = 8.0 # seconds
@ -766,6 +771,9 @@ async def _fetch_anthropic_subscription(session_key: str) -> list[SubscriptionWi
}
# Retry once on 429 — the subscription endpoint can be rate-limited when
# called right after another api.anthropic.com request (e.g. /v1/models).
# Sleep 3s before retrying; the rate-limit window is typically >1s but
# rarely longer than 3s in isolation (persistent throttling is handled by
# the 60s CACHE_TTL_FAILURE_SECONDS at the call site).
for attempt in range(2):
async with httpx.AsyncClient(timeout=REQUEST_TIMEOUT) as client:
try:
@ -774,12 +782,12 @@ async def _fetch_anthropic_subscription(session_key: str) -> list[SubscriptionWi
logger.warning("provider_usage.subscription.anthropic.fetch_failed error=%s", exc)
return []
if resp.status_code == 429 and attempt == 0:
await asyncio.sleep(1.5)
await asyncio.sleep(3.0)
continue
break
if not resp.status_code == 200:
logger.debug(
logger.warning(
"provider_usage.subscription.anthropic.http_error status=%s body=%s",
resp.status_code,
resp.text[:200],