fix(scripts): ai calc
This commit is contained in:
parent
dc5af312a6
commit
1a0eaeee68
|
|
@ -167,6 +167,7 @@ async def test_provider_credential(
|
||||||
error=live.error,
|
error=live.error,
|
||||||
tokens=_tok(live.tokens),
|
tokens=_tok(live.tokens),
|
||||||
input_tokens=_tok(live.input_tokens),
|
input_tokens=_tok(live.input_tokens),
|
||||||
|
output_tokens=_tok(live.output_tokens),
|
||||||
requests=_req(live.requests),
|
requests=_req(live.requests),
|
||||||
models=live.models,
|
models=live.models,
|
||||||
sample_model=live.sample_model,
|
sample_model=live.sample_model,
|
||||||
|
|
@ -271,6 +272,7 @@ async def get_provider_usage_live(
|
||||||
error=live.error,
|
error=live.error,
|
||||||
tokens=_tok(live.tokens),
|
tokens=_tok(live.tokens),
|
||||||
input_tokens=_tok(live.input_tokens),
|
input_tokens=_tok(live.input_tokens),
|
||||||
|
output_tokens=_tok(live.output_tokens),
|
||||||
requests=_req(live.requests),
|
requests=_req(live.requests),
|
||||||
models=live.models,
|
models=live.models,
|
||||||
sample_model=live.sample_model,
|
sample_model=live.sample_model,
|
||||||
|
|
|
||||||
|
|
@ -60,7 +60,8 @@ class ProviderUsageLiveRead(SQLModel):
|
||||||
confidence: str # high | medium | low
|
confidence: str # high | medium | low
|
||||||
error: str | None = None
|
error: str | None = None
|
||||||
tokens: TokenWindowRead
|
tokens: TokenWindowRead
|
||||||
input_tokens: TokenWindowRead # Anthropic splits input tokens separately
|
input_tokens: TokenWindowRead # Anthropic input-only window
|
||||||
|
output_tokens: TokenWindowRead # Anthropic output-only window
|
||||||
requests: RequestWindowRead
|
requests: RequestWindowRead
|
||||||
models: list[str] = []
|
models: list[str] = []
|
||||||
sample_model: str | None = None
|
sample_model: str | None = None
|
||||||
|
|
|
||||||
|
|
@ -110,7 +110,8 @@ class ProviderUsageLive:
|
||||||
confidence: str = "high"
|
confidence: str = "high"
|
||||||
error: str | None = None
|
error: str | None = None
|
||||||
tokens: TokenWindow = field(default_factory=TokenWindow)
|
tokens: TokenWindow = field(default_factory=TokenWindow)
|
||||||
input_tokens: TokenWindow = field(default_factory=TokenWindow) # Anthropic splits input/output
|
input_tokens: TokenWindow = field(default_factory=TokenWindow) # Anthropic input-only window
|
||||||
|
output_tokens: TokenWindow = field(default_factory=TokenWindow) # Anthropic output-only window
|
||||||
requests: RequestWindow = field(default_factory=RequestWindow)
|
requests: RequestWindow = field(default_factory=RequestWindow)
|
||||||
models: list[str] = field(default_factory=list) # model IDs available on this key
|
models: list[str] = field(default_factory=list) # model IDs available on this key
|
||||||
raw_headers: dict[str, str] = field(default_factory=dict)
|
raw_headers: dict[str, str] = field(default_factory=dict)
|
||||||
|
|
@ -145,6 +146,7 @@ class ProviderUsageLive:
|
||||||
"error": self.error,
|
"error": self.error,
|
||||||
"tokens": _window(self.tokens),
|
"tokens": _window(self.tokens),
|
||||||
"input_tokens": _window(self.input_tokens),
|
"input_tokens": _window(self.input_tokens),
|
||||||
|
"output_tokens": _window(self.output_tokens),
|
||||||
"requests": _window(self.requests),
|
"requests": _window(self.requests),
|
||||||
"models": self.models[:20], # cap for response size
|
"models": self.models[:20], # cap for response size
|
||||||
"sample_model": self.sample_model,
|
"sample_model": self.sample_model,
|
||||||
|
|
@ -201,6 +203,11 @@ def _apply_anthropic_ratelimit_headers(result: ProviderUsageLive, headers: dict[
|
||||||
remaining=_parse_int_header(headers, "anthropic-ratelimit-input-tokens-remaining"),
|
remaining=_parse_int_header(headers, "anthropic-ratelimit-input-tokens-remaining"),
|
||||||
reset_at=_parse_iso_reset(headers.get("anthropic-ratelimit-input-tokens-reset", "")),
|
reset_at=_parse_iso_reset(headers.get("anthropic-ratelimit-input-tokens-reset", "")),
|
||||||
)
|
)
|
||||||
|
result.output_tokens = TokenWindow(
|
||||||
|
limit=_parse_int_header(headers, "anthropic-ratelimit-output-tokens-limit"),
|
||||||
|
remaining=_parse_int_header(headers, "anthropic-ratelimit-output-tokens-remaining"),
|
||||||
|
reset_at=_parse_iso_reset(headers.get("anthropic-ratelimit-output-tokens-reset", "")),
|
||||||
|
)
|
||||||
result.requests = RequestWindow(
|
result.requests = RequestWindow(
|
||||||
limit=_parse_int_header(headers, "anthropic-ratelimit-requests-limit"),
|
limit=_parse_int_header(headers, "anthropic-ratelimit-requests-limit"),
|
||||||
remaining=_parse_int_header(headers, "anthropic-ratelimit-requests-remaining"),
|
remaining=_parse_int_header(headers, "anthropic-ratelimit-requests-remaining"),
|
||||||
|
|
@ -463,13 +470,14 @@ async def _fetch_openai(api_key: str, base_url: str | None) -> ProviderUsageLive
|
||||||
probe_model = _pick_openai_probe_model(result.models)
|
probe_model = _pick_openai_probe_model(result.models)
|
||||||
if probe_model:
|
if probe_model:
|
||||||
result.sample_model = probe_model
|
result.sample_model = probe_model
|
||||||
|
# min 16 tokens: gpt-5.x models reject max_output_tokens < 16
|
||||||
probe_endpoints: list[tuple[str, dict[str, Any]]] = [
|
probe_endpoints: list[tuple[str, dict[str, Any]]] = [
|
||||||
(
|
(
|
||||||
f"{base}/v1/responses",
|
f"{base}/v1/responses",
|
||||||
{
|
{
|
||||||
"model": probe_model,
|
"model": probe_model,
|
||||||
"input": "Usage probe",
|
"input": "Usage probe",
|
||||||
"max_output_tokens": 1,
|
"max_output_tokens": 16,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
|
|
@ -477,7 +485,7 @@ async def _fetch_openai(api_key: str, base_url: str | None) -> ProviderUsageLive
|
||||||
{
|
{
|
||||||
"model": probe_model,
|
"model": probe_model,
|
||||||
"messages": [{"role": "user", "content": "Usage probe"}],
|
"messages": [{"role": "user", "content": "Usage probe"}],
|
||||||
"max_tokens": 1,
|
"max_tokens": 16,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
@ -494,6 +502,18 @@ async def _fetch_openai(api_key: str, base_url: str | None) -> ProviderUsageLive
|
||||||
)
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
continue
|
continue
|
||||||
|
# Quota exhaustion is a 429 with a distinct error code — surface it
|
||||||
|
# clearly rather than treating it as a transient rate limit.
|
||||||
|
if probe_resp.status_code == 429:
|
||||||
|
try:
|
||||||
|
err = probe_resp.json().get("error", {})
|
||||||
|
if err.get("code") == "insufficient_quota" or "exceeded your current quota" in str(err.get("message", "")):
|
||||||
|
result.error = "Quota exhausted — add credits at platform.openai.com/billing."
|
||||||
|
elapsed_ms = probe_resp.elapsed.total_seconds() * 1000.0
|
||||||
|
result.sample_latency_ms = int(max(0.0, round(elapsed_ms)))
|
||||||
|
break
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
probe_headers = {k.lower(): v for k, v in probe_resp.headers.items()}
|
probe_headers = {k.lower(): v for k, v in probe_resp.headers.items()}
|
||||||
probe_rl_headers = {k: v for k, v in probe_headers.items() if "ratelimit" in k}
|
probe_rl_headers = {k: v for k, v in probe_headers.items() if "ratelimit" in k}
|
||||||
if probe_rl_headers:
|
if probe_rl_headers:
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue