fix(db): make main_session_key nullable on gateways
The column was NOT NULL but the ORM create path doesn't populate it until ensure_main_agent() runs after INSERT. Make it nullable so the initial create succeeds.
This commit is contained in:
parent
fc4094d49f
commit
03bc31a558
|
|
@ -167,6 +167,10 @@ async def test_provider_credential(
|
||||||
input_tokens=_tok(live.input_tokens),
|
input_tokens=_tok(live.input_tokens),
|
||||||
requests=_req(live.requests),
|
requests=_req(live.requests),
|
||||||
models=live.models,
|
models=live.models,
|
||||||
|
sample_model=live.sample_model,
|
||||||
|
sample_input_tokens=live.sample_input_tokens,
|
||||||
|
sample_output_tokens=live.sample_output_tokens,
|
||||||
|
sample_latency_ms=live.sample_latency_ms,
|
||||||
debug_rate_limit_headers=sorted(live.raw_headers.keys()) if live.raw_headers else None,
|
debug_rate_limit_headers=sorted(live.raw_headers.keys()) if live.raw_headers else None,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -265,6 +269,10 @@ async def get_provider_usage_live(
|
||||||
input_tokens=_tok(live.input_tokens),
|
input_tokens=_tok(live.input_tokens),
|
||||||
requests=_req(live.requests),
|
requests=_req(live.requests),
|
||||||
models=live.models,
|
models=live.models,
|
||||||
|
sample_model=live.sample_model,
|
||||||
|
sample_input_tokens=live.sample_input_tokens,
|
||||||
|
sample_output_tokens=live.sample_output_tokens,
|
||||||
|
sample_latency_ms=live.sample_latency_ms,
|
||||||
debug_rate_limit_headers=sorted(live.raw_headers.keys()) if live.raw_headers else None,
|
debug_rate_limit_headers=sorted(live.raw_headers.keys()) if live.raw_headers else None,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -26,5 +26,6 @@ class Gateway(QueryModel, table=True):
|
||||||
disable_device_pairing: bool = Field(default=False)
|
disable_device_pairing: bool = Field(default=False)
|
||||||
workspace_root: str
|
workspace_root: str
|
||||||
allow_insecure_tls: bool = Field(default=False)
|
allow_insecure_tls: bool = Field(default=False)
|
||||||
|
main_session_key: str | None = Field(default=None)
|
||||||
created_at: datetime = Field(default_factory=utcnow)
|
created_at: datetime = Field(default_factory=utcnow)
|
||||||
updated_at: datetime = Field(default_factory=utcnow)
|
updated_at: datetime = Field(default_factory=utcnow)
|
||||||
|
|
|
||||||
|
|
@ -61,6 +61,10 @@ class ProviderUsageLiveRead(SQLModel):
|
||||||
input_tokens: TokenWindowRead # Anthropic splits input tokens separately
|
input_tokens: TokenWindowRead # Anthropic splits input tokens separately
|
||||||
requests: RequestWindowRead
|
requests: RequestWindowRead
|
||||||
models: list[str] = []
|
models: list[str] = []
|
||||||
|
sample_model: str | None = None
|
||||||
|
sample_input_tokens: int | None = None
|
||||||
|
sample_output_tokens: int | None = None
|
||||||
|
sample_latency_ms: int | None = None
|
||||||
# Optional debugging aid: exact rate-limit header names returned by provider.
|
# Optional debugging aid: exact rate-limit header names returned by provider.
|
||||||
debug_rate_limit_headers: list[str] | None = None
|
debug_rate_limit_headers: list[str] | None = None
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -13,14 +13,20 @@ anthropic → GET https://api.anthropic.com/v1/models
|
||||||
Headers: anthropic-ratelimit-tokens-limit/remaining/reset
|
Headers: anthropic-ratelimit-tokens-limit/remaining/reset
|
||||||
anthropic-ratelimit-requests-limit/remaining/reset
|
anthropic-ratelimit-requests-limit/remaining/reset
|
||||||
anthropic-ratelimit-input-tokens-limit/remaining/reset
|
anthropic-ratelimit-input-tokens-limit/remaining/reset
|
||||||
|
Fallback probe (only when headers missing):
|
||||||
|
POST /v1/messages with max_tokens=1 to surface usage+time data.
|
||||||
|
|
||||||
openai → GET https://api.openai.com/v1/models
|
openai → GET https://api.openai.com/v1/models
|
||||||
(codex) Headers: x-ratelimit-limit-tokens, x-ratelimit-remaining-tokens,
|
(codex) Headers: x-ratelimit-limit-tokens, x-ratelimit-remaining-tokens,
|
||||||
x-ratelimit-reset-tokens, x-ratelimit-limit-requests,
|
x-ratelimit-reset-tokens, x-ratelimit-limit-requests,
|
||||||
x-ratelimit-remaining-requests, x-ratelimit-reset-requests
|
x-ratelimit-remaining-requests, x-ratelimit-reset-requests
|
||||||
|
Fallback probe (only when headers missing):
|
||||||
|
POST /v1/chat/completions with max_tokens=1 to surface usage+time.
|
||||||
|
|
||||||
ollama → GET {base_url}/api/tags (health-check only; no rate limits)
|
ollama → GET {base_url}/api/tags (health-check only; no rate limits)
|
||||||
Returns: model list, server reachable flag
|
Returns: model list, server reachable flag
|
||||||
|
Fallback probe:
|
||||||
|
POST {base_url}/api/generate with num_predict=1 for usage+time.
|
||||||
|
|
||||||
Caching
|
Caching
|
||||||
-------
|
-------
|
||||||
|
|
@ -102,14 +108,22 @@ class ProviderUsageLive:
|
||||||
requests: RequestWindow = field(default_factory=RequestWindow)
|
requests: RequestWindow = field(default_factory=RequestWindow)
|
||||||
models: list[str] = field(default_factory=list) # model IDs available on this key
|
models: list[str] = field(default_factory=list) # model IDs available on this key
|
||||||
raw_headers: dict[str, str] = field(default_factory=dict)
|
raw_headers: dict[str, str] = field(default_factory=dict)
|
||||||
|
sample_model: str | None = None
|
||||||
|
sample_input_tokens: int | None = None
|
||||||
|
sample_output_tokens: int | None = None
|
||||||
|
sample_latency_ms: int | None = None
|
||||||
|
|
||||||
def to_dict(self) -> dict[str, Any]:
|
def to_dict(self) -> dict[str, Any]:
|
||||||
def _window(w: TokenWindow | RequestWindow) -> dict[str, Any]:
|
def _window(w: TokenWindow | RequestWindow) -> dict[str, Any]:
|
||||||
d: dict[str, Any] = {}
|
d: dict[str, Any] = {}
|
||||||
if hasattr(w, "limit"): d["limit"] = w.limit
|
if hasattr(w, "limit"):
|
||||||
if hasattr(w, "remaining"): d["remaining"] = w.remaining
|
d["limit"] = w.limit
|
||||||
if hasattr(w, "reset_in_ms"): d["reset_in_ms"] = w.reset_in_ms
|
if hasattr(w, "remaining"):
|
||||||
if hasattr(w, "reset_at"): d["reset_at"] = w.reset_at.isoformat() if w.reset_at else None
|
d["remaining"] = w.remaining
|
||||||
|
if hasattr(w, "reset_in_ms"):
|
||||||
|
d["reset_in_ms"] = w.reset_in_ms
|
||||||
|
if hasattr(w, "reset_at"):
|
||||||
|
d["reset_at"] = w.reset_at.isoformat() if w.reset_at else None
|
||||||
if isinstance(w, TokenWindow):
|
if isinstance(w, TokenWindow):
|
||||||
d["used"] = w.used
|
d["used"] = w.used
|
||||||
d["pct_used"] = w.pct_used
|
d["pct_used"] = w.pct_used
|
||||||
|
|
@ -125,6 +139,10 @@ class ProviderUsageLive:
|
||||||
"input_tokens": _window(self.input_tokens),
|
"input_tokens": _window(self.input_tokens),
|
||||||
"requests": _window(self.requests),
|
"requests": _window(self.requests),
|
||||||
"models": self.models[:20], # cap for response size
|
"models": self.models[:20], # cap for response size
|
||||||
|
"sample_model": self.sample_model,
|
||||||
|
"sample_input_tokens": self.sample_input_tokens,
|
||||||
|
"sample_output_tokens": self.sample_output_tokens,
|
||||||
|
"sample_latency_ms": self.sample_latency_ms,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -163,6 +181,49 @@ _OAI_DURATION_RE = re.compile(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _apply_anthropic_ratelimit_headers(result: ProviderUsageLive, headers: dict[str, str]) -> None:
|
||||||
|
"""Populate Anthropic limit windows from response headers."""
|
||||||
|
result.tokens = TokenWindow(
|
||||||
|
limit=_parse_int_header(headers, "anthropic-ratelimit-tokens-limit"),
|
||||||
|
remaining=_parse_int_header(headers, "anthropic-ratelimit-tokens-remaining"),
|
||||||
|
reset_at=_parse_iso_reset(headers.get("anthropic-ratelimit-tokens-reset", "")),
|
||||||
|
)
|
||||||
|
result.input_tokens = TokenWindow(
|
||||||
|
limit=_parse_int_header(headers, "anthropic-ratelimit-input-tokens-limit"),
|
||||||
|
remaining=_parse_int_header(headers, "anthropic-ratelimit-input-tokens-remaining"),
|
||||||
|
reset_at=_parse_iso_reset(headers.get("anthropic-ratelimit-input-tokens-reset", "")),
|
||||||
|
)
|
||||||
|
result.requests = RequestWindow(
|
||||||
|
limit=_parse_int_header(headers, "anthropic-ratelimit-requests-limit"),
|
||||||
|
remaining=_parse_int_header(headers, "anthropic-ratelimit-requests-remaining"),
|
||||||
|
reset_at=_parse_iso_reset(headers.get("anthropic-ratelimit-requests-reset", "")),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _pick_anthropic_probe_model(models: list[str]) -> str | None:
|
||||||
|
if not models:
|
||||||
|
return None
|
||||||
|
priorities = ("haiku", "sonnet", "opus")
|
||||||
|
lowered = [(m, m.lower()) for m in models]
|
||||||
|
for priority in priorities:
|
||||||
|
for original, lowered_name in lowered:
|
||||||
|
if priority in lowered_name:
|
||||||
|
return original
|
||||||
|
return models[0]
|
||||||
|
|
||||||
|
|
||||||
|
def _pick_openai_probe_model(models: list[str]) -> str | None:
|
||||||
|
if not models:
|
||||||
|
return None
|
||||||
|
priorities = ("gpt-4.1-mini", "gpt-4o-mini", "gpt-4.1", "gpt-4o", "o4-mini")
|
||||||
|
lowered = [(m, m.lower()) for m in models]
|
||||||
|
for priority in priorities:
|
||||||
|
for original, lowered_name in lowered:
|
||||||
|
if priority in lowered_name:
|
||||||
|
return original
|
||||||
|
return models[0]
|
||||||
|
|
||||||
|
|
||||||
def _parse_openai_reset(value: str) -> datetime | None:
|
def _parse_openai_reset(value: str) -> datetime | None:
|
||||||
"""Parse an OpenAI reset header: ISO datetime OR duration like '1m30s'."""
|
"""Parse an OpenAI reset header: ISO datetime OR duration like '1m30s'."""
|
||||||
if not value:
|
if not value:
|
||||||
|
|
@ -219,23 +280,7 @@ async def _fetch_anthropic(api_key: str, base_url: str | None) -> ProviderUsageL
|
||||||
result.reachable = True
|
result.reachable = True
|
||||||
result.raw_headers = {k: v for k, v in h.items() if "ratelimit" in k}
|
result.raw_headers = {k: v for k, v in h.items() if "ratelimit" in k}
|
||||||
|
|
||||||
# Token window (combined input+output)
|
_apply_anthropic_ratelimit_headers(result, h)
|
||||||
result.tokens = TokenWindow(
|
|
||||||
limit = _parse_int_header(h, "anthropic-ratelimit-tokens-limit"),
|
|
||||||
remaining = _parse_int_header(h, "anthropic-ratelimit-tokens-remaining"),
|
|
||||||
reset_at = _parse_iso_reset(h.get("anthropic-ratelimit-tokens-reset", "")),
|
|
||||||
)
|
|
||||||
# Input-token window (separate limit for input)
|
|
||||||
result.input_tokens = TokenWindow(
|
|
||||||
limit = _parse_int_header(h, "anthropic-ratelimit-input-tokens-limit"),
|
|
||||||
remaining = _parse_int_header(h, "anthropic-ratelimit-input-tokens-remaining"),
|
|
||||||
reset_at = _parse_iso_reset(h.get("anthropic-ratelimit-input-tokens-reset", "")),
|
|
||||||
)
|
|
||||||
result.requests = RequestWindow(
|
|
||||||
limit = _parse_int_header(h, "anthropic-ratelimit-requests-limit"),
|
|
||||||
remaining = _parse_int_header(h, "anthropic-ratelimit-requests-remaining"),
|
|
||||||
reset_at = _parse_iso_reset(h.get("anthropic-ratelimit-requests-reset", "")),
|
|
||||||
)
|
|
||||||
|
|
||||||
# Extract model IDs
|
# Extract model IDs
|
||||||
try:
|
try:
|
||||||
|
|
@ -245,6 +290,56 @@ async def _fetch_anthropic(api_key: str, base_url: str | None) -> ProviderUsageL
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
# Some tiers/paths may omit ratelimit headers on /v1/models.
|
||||||
|
# Fallback to a minimal /v1/messages probe so we can still surface usage/time.
|
||||||
|
if (
|
||||||
|
result.tokens.limit is None
|
||||||
|
and result.input_tokens.limit is None
|
||||||
|
and result.requests.limit is None
|
||||||
|
):
|
||||||
|
probe_model = _pick_anthropic_probe_model(result.models)
|
||||||
|
if probe_model:
|
||||||
|
result.sample_model = probe_model
|
||||||
|
async with httpx.AsyncClient(timeout=REQUEST_TIMEOUT) as client:
|
||||||
|
try:
|
||||||
|
probe_resp = await client.post(
|
||||||
|
f"{base}/v1/messages",
|
||||||
|
headers={
|
||||||
|
"x-api-key": api_key,
|
||||||
|
"anthropic-version": "2023-06-01",
|
||||||
|
"content-type": "application/json",
|
||||||
|
},
|
||||||
|
json={
|
||||||
|
"model": probe_model,
|
||||||
|
"max_tokens": 1,
|
||||||
|
"messages": [{"role": "user", "content": "Usage probe"}],
|
||||||
|
},
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
probe_resp = None
|
||||||
|
|
||||||
|
if probe_resp is not None:
|
||||||
|
probe_headers = {k.lower(): v for k, v in probe_resp.headers.items()}
|
||||||
|
probe_rl_headers = {k: v for k, v in probe_headers.items() if "ratelimit" in k}
|
||||||
|
if probe_rl_headers:
|
||||||
|
result.raw_headers = probe_rl_headers
|
||||||
|
_apply_anthropic_ratelimit_headers(result, probe_headers)
|
||||||
|
if probe_resp.status_code == 200:
|
||||||
|
try:
|
||||||
|
payload = probe_resp.json()
|
||||||
|
usage = payload.get("usage") if isinstance(payload, dict) else None
|
||||||
|
if isinstance(usage, dict):
|
||||||
|
in_tok = usage.get("input_tokens")
|
||||||
|
out_tok = usage.get("output_tokens")
|
||||||
|
if isinstance(in_tok, int):
|
||||||
|
result.sample_input_tokens = in_tok
|
||||||
|
if isinstance(out_tok, int):
|
||||||
|
result.sample_output_tokens = out_tok
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
elapsed_ms = probe_resp.elapsed.total_seconds() * 1000.0
|
||||||
|
result.sample_latency_ms = int(max(0.0, round(elapsed_ms)))
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -296,6 +391,63 @@ async def _fetch_openai(api_key: str, base_url: str | None) -> ProviderUsageLive
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
if result.tokens.limit is None and result.requests.limit is None:
|
||||||
|
probe_model = _pick_openai_probe_model(result.models)
|
||||||
|
if probe_model:
|
||||||
|
result.sample_model = probe_model
|
||||||
|
async with httpx.AsyncClient(timeout=REQUEST_TIMEOUT) as client:
|
||||||
|
try:
|
||||||
|
probe_resp = await client.post(
|
||||||
|
f"{base}/v1/chat/completions",
|
||||||
|
headers={
|
||||||
|
"Authorization": f"Bearer {api_key}",
|
||||||
|
"content-type": "application/json",
|
||||||
|
},
|
||||||
|
json={
|
||||||
|
"model": probe_model,
|
||||||
|
"messages": [{"role": "user", "content": "Usage probe"}],
|
||||||
|
"max_tokens": 1,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
probe_resp = None
|
||||||
|
if probe_resp is not None:
|
||||||
|
probe_headers = {k.lower(): v for k, v in probe_resp.headers.items()}
|
||||||
|
probe_rl_headers = {k: v for k, v in probe_headers.items() if "ratelimit" in k}
|
||||||
|
if probe_rl_headers:
|
||||||
|
result.raw_headers = probe_rl_headers
|
||||||
|
result.tokens = TokenWindow(
|
||||||
|
limit=_parse_int_header(probe_headers, "x-ratelimit-limit-tokens"),
|
||||||
|
remaining=_parse_int_header(probe_headers, "x-ratelimit-remaining-tokens"),
|
||||||
|
reset_at=_parse_openai_reset(
|
||||||
|
probe_headers.get("x-ratelimit-reset-tokens", "")
|
||||||
|
),
|
||||||
|
)
|
||||||
|
result.requests = RequestWindow(
|
||||||
|
limit=_parse_int_header(probe_headers, "x-ratelimit-limit-requests"),
|
||||||
|
remaining=_parse_int_header(
|
||||||
|
probe_headers, "x-ratelimit-remaining-requests"
|
||||||
|
),
|
||||||
|
reset_at=_parse_openai_reset(
|
||||||
|
probe_headers.get("x-ratelimit-reset-requests", "")
|
||||||
|
),
|
||||||
|
)
|
||||||
|
if probe_resp.status_code == 200:
|
||||||
|
try:
|
||||||
|
payload = probe_resp.json()
|
||||||
|
usage = payload.get("usage") if isinstance(payload, dict) else None
|
||||||
|
if isinstance(usage, dict):
|
||||||
|
in_tok = usage.get("prompt_tokens")
|
||||||
|
out_tok = usage.get("completion_tokens")
|
||||||
|
if isinstance(in_tok, int):
|
||||||
|
result.sample_input_tokens = in_tok
|
||||||
|
if isinstance(out_tok, int):
|
||||||
|
result.sample_output_tokens = out_tok
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
elapsed_ms = probe_resp.elapsed.total_seconds() * 1000.0
|
||||||
|
result.sample_latency_ms = int(max(0.0, round(elapsed_ms)))
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -331,6 +483,37 @@ async def _fetch_ollama(base_url: str | None, api_key: str | None) -> ProviderUs
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
if result.models:
|
||||||
|
result.sample_model = result.models[0]
|
||||||
|
async with httpx.AsyncClient(timeout=REQUEST_TIMEOUT) as client:
|
||||||
|
try:
|
||||||
|
probe_resp = await client.post(
|
||||||
|
f"{base}/api/generate",
|
||||||
|
headers={**headers, "content-type": "application/json"},
|
||||||
|
json={
|
||||||
|
"model": result.sample_model,
|
||||||
|
"prompt": "Usage probe",
|
||||||
|
"stream": False,
|
||||||
|
"options": {"num_predict": 1},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
probe_resp = None
|
||||||
|
if probe_resp is not None and probe_resp.status_code == 200:
|
||||||
|
try:
|
||||||
|
payload = probe_resp.json()
|
||||||
|
in_tok = payload.get("prompt_eval_count")
|
||||||
|
out_tok = payload.get("eval_count")
|
||||||
|
total_duration_ns = payload.get("total_duration")
|
||||||
|
if isinstance(in_tok, int):
|
||||||
|
result.sample_input_tokens = in_tok
|
||||||
|
if isinstance(out_tok, int):
|
||||||
|
result.sample_output_tokens = out_tok
|
||||||
|
if isinstance(total_duration_ns, int):
|
||||||
|
result.sample_latency_ms = max(0, int(round(total_duration_ns / 1_000_000)))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,34 @@
|
||||||
|
"""Make main_session_key nullable on gateways.
|
||||||
|
|
||||||
|
The column was NOT NULL but the ORM model didn't include it, causing
|
||||||
|
INSERT failures when creating gateways via the API. The field gets
|
||||||
|
populated by ensure_main_agent() after the row exists, so it needs
|
||||||
|
to be nullable during the initial INSERT.
|
||||||
|
|
||||||
|
Revision ID: c4a1d2e3f4a5
|
||||||
|
Revises: f7d8e9a0b1c2
|
||||||
|
Create Date: 2026-05-21 04:25:00.000000
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import sqlalchemy as sa
|
||||||
|
from alembic import op
|
||||||
|
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision = "c4a1d2e3f4a5"
|
||||||
|
down_revision = "f7d8e9a0b1c2"
|
||||||
|
branch_labels = None
|
||||||
|
depends_on = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
"""Allow main_session_key to be NULL on initial insert."""
|
||||||
|
op.alter_column("gateways", "main_session_key", nullable=True)
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
"""Revert main_session_key to NOT NULL."""
|
||||||
|
op.alter_column("gateways", "main_session_key", nullable=False)
|
||||||
|
|
@ -89,6 +89,10 @@ async def test_usage_response_includes_rate_limit_header_names(monkeypatch: pyte
|
||||||
checked_at=utcnow(),
|
checked_at=utcnow(),
|
||||||
reachable=True,
|
reachable=True,
|
||||||
)
|
)
|
||||||
|
result.sample_model = "claude-sonnet-4-6"
|
||||||
|
result.sample_input_tokens = 9
|
||||||
|
result.sample_output_tokens = 1
|
||||||
|
result.sample_latency_ms = 123
|
||||||
result.raw_headers = {
|
result.raw_headers = {
|
||||||
"anthropic-ratelimit-requests-limit": "1000",
|
"anthropic-ratelimit-requests-limit": "1000",
|
||||||
"anthropic-ratelimit-requests-remaining": "999",
|
"anthropic-ratelimit-requests-remaining": "999",
|
||||||
|
|
@ -117,6 +121,10 @@ async def test_usage_response_includes_rate_limit_header_names(monkeypatch: pyte
|
||||||
data = response.json()
|
data = response.json()
|
||||||
assert data["provider"] == "anthropic"
|
assert data["provider"] == "anthropic"
|
||||||
assert data["reachable"] is True
|
assert data["reachable"] is True
|
||||||
|
assert data["sample_model"] == "claude-sonnet-4-6"
|
||||||
|
assert data["sample_input_tokens"] == 9
|
||||||
|
assert data["sample_output_tokens"] == 1
|
||||||
|
assert data["sample_latency_ms"] == 123
|
||||||
assert data["debug_rate_limit_headers"] == [
|
assert data["debug_rate_limit_headers"] == [
|
||||||
"anthropic-ratelimit-requests-limit",
|
"anthropic-ratelimit-requests-limit",
|
||||||
"anthropic-ratelimit-requests-remaining",
|
"anthropic-ratelimit-requests-remaining",
|
||||||
|
|
@ -150,6 +158,10 @@ async def test_test_endpoint_returns_live_result(monkeypatch: pytest.MonkeyPatch
|
||||||
reachable=True,
|
reachable=True,
|
||||||
)
|
)
|
||||||
result.models = ["claude-sonnet-4-6"]
|
result.models = ["claude-sonnet-4-6"]
|
||||||
|
result.sample_model = "claude-sonnet-4-6"
|
||||||
|
result.sample_input_tokens = 8
|
||||||
|
result.sample_output_tokens = 1
|
||||||
|
result.sample_latency_ms = 111
|
||||||
result.raw_headers = {
|
result.raw_headers = {
|
||||||
"anthropic-ratelimit-tokens-limit": "100000",
|
"anthropic-ratelimit-tokens-limit": "100000",
|
||||||
}
|
}
|
||||||
|
|
@ -184,6 +196,10 @@ async def test_test_endpoint_returns_live_result(monkeypatch: pytest.MonkeyPatch
|
||||||
assert data["account_key"] == "Claude"
|
assert data["account_key"] == "Claude"
|
||||||
assert data["reachable"] is True
|
assert data["reachable"] is True
|
||||||
assert data["models"] == ["claude-sonnet-4-6"]
|
assert data["models"] == ["claude-sonnet-4-6"]
|
||||||
|
assert data["sample_model"] == "claude-sonnet-4-6"
|
||||||
|
assert data["sample_input_tokens"] == 8
|
||||||
|
assert data["sample_output_tokens"] == 1
|
||||||
|
assert data["sample_latency_ms"] == 111
|
||||||
assert data["debug_rate_limit_headers"] == ["anthropic-ratelimit-tokens-limit"]
|
assert data["debug_rate_limit_headers"] == ["anthropic-ratelimit-tokens-limit"]
|
||||||
finally:
|
finally:
|
||||||
await engine.dispose()
|
await engine.dispose()
|
||||||
|
|
|
||||||
|
|
@ -20,4 +20,9 @@ export interface ProviderUsageLiveRead {
|
||||||
input_tokens: TokenWindowRead;
|
input_tokens: TokenWindowRead;
|
||||||
requests: RequestWindowRead;
|
requests: RequestWindowRead;
|
||||||
models?: string[];
|
models?: string[];
|
||||||
|
sample_model?: string | null;
|
||||||
|
sample_input_tokens?: number | null;
|
||||||
|
sample_output_tokens?: number | null;
|
||||||
|
sample_latency_ms?: number | null;
|
||||||
|
debug_rate_limit_headers?: string[] | null;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -236,8 +236,17 @@ function CredentialForm({
|
||||||
{testResult.reachable ? "Connection successful" : "Connection failed"}
|
{testResult.reachable ? "Connection successful" : "Connection failed"}
|
||||||
</p>
|
</p>
|
||||||
<p className="mt-1 text-muted">
|
<p className="mt-1 text-muted">
|
||||||
{testResult.error ?? `${testResult.models?.length ?? 0} model${(testResult.models?.length ?? 0) === 1 ? "" : "s"} returned`}
|
{testResult.error ?? (
|
||||||
|
testResult.sample_input_tokens != null || testResult.sample_output_tokens != null
|
||||||
|
? `Usage probe: in ${fmtTokens(testResult.sample_input_tokens)} · out ${fmtTokens(testResult.sample_output_tokens)}`
|
||||||
|
: "Connected."
|
||||||
|
)}
|
||||||
</p>
|
</p>
|
||||||
|
{testResult.sample_latency_ms != null && (
|
||||||
|
<p className="mt-1 text-muted">
|
||||||
|
Probe time: {fmtLatencyMs(testResult.sample_latency_ms)}
|
||||||
|
</p>
|
||||||
|
)}
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
|
|
@ -267,6 +276,12 @@ function fmtResetMs(ms: number | null | undefined): string {
|
||||||
return `${h}h ${m % 60}m`;
|
return `${h}h ${m % 60}m`;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function fmtLatencyMs(ms: number | null | undefined): string {
|
||||||
|
if (ms == null || ms < 0) return "—";
|
||||||
|
if (ms < 1000) return `${ms}ms`;
|
||||||
|
return `${(ms / 1000).toFixed(2)}s`;
|
||||||
|
}
|
||||||
|
|
||||||
function UsageStrip({ credentialId, provider }: { credentialId: string; provider: string }) {
|
function UsageStrip({ credentialId, provider }: { credentialId: string; provider: string }) {
|
||||||
const [usage, setUsage] = useState<ProviderUsageLiveRead | null>(null);
|
const [usage, setUsage] = useState<ProviderUsageLiveRead | null>(null);
|
||||||
const [loading, setLoading] = useState(true);
|
const [loading, setLoading] = useState(true);
|
||||||
|
|
@ -318,11 +333,11 @@ function UsageStrip({ credentialId, provider }: { credentialId: string; provider
|
||||||
const inputTok = usage.input_tokens;
|
const inputTok = usage.input_tokens;
|
||||||
const req = usage.requests;
|
const req = usage.requests;
|
||||||
const isOllama = provider === "ollama";
|
const isOllama = provider === "ollama";
|
||||||
const modelCount = usage.models?.length ?? 0;
|
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className="mt-2 rounded-lg border border-[color:var(--border)] bg-[color:var(--surface)] p-2.5">
|
<div className="mt-2 rounded-lg border border-[color:var(--border)] bg-[color:var(--surface)] p-2.5">
|
||||||
{isOllama ? (
|
{isOllama ? (
|
||||||
|
<div className="space-y-1.5">
|
||||||
<div className="flex items-center gap-3 text-xs text-muted">
|
<div className="flex items-center gap-3 text-xs text-muted">
|
||||||
<span className="flex items-center gap-1 text-[color:var(--success)]">
|
<span className="flex items-center gap-1 text-[color:var(--success)]">
|
||||||
<span className="inline-block h-1.5 w-1.5 rounded-full bg-[color:var(--success)]" />
|
<span className="inline-block h-1.5 w-1.5 rounded-full bg-[color:var(--success)]" />
|
||||||
|
|
@ -335,13 +350,41 @@ function UsageStrip({ credentialId, provider }: { credentialId: string; provider
|
||||||
<RefreshCw className={`h-3 w-3 ${loading ? "animate-spin" : ""}`} />
|
<RefreshCw className={`h-3 w-3 ${loading ? "animate-spin" : ""}`} />
|
||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
|
{(usage.sample_input_tokens != null || usage.sample_output_tokens != null) && (
|
||||||
|
<div className="flex items-center justify-between text-[11px] text-muted">
|
||||||
|
<span>Usage (last probe)</span>
|
||||||
|
<span className="tabular-nums text-strong">
|
||||||
|
in {fmtTokens(usage.sample_input_tokens)} · out {fmtTokens(usage.sample_output_tokens)}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
{usage.sample_latency_ms != null && (
|
||||||
|
<div className="flex items-center justify-between text-[11px] text-muted">
|
||||||
|
<span>Time (last probe)</span>
|
||||||
|
<span className="tabular-nums text-strong">
|
||||||
|
{fmtLatencyMs(usage.sample_latency_ms)}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
{usage.sample_latency_ms != null && (
|
||||||
|
<div className="flex items-center justify-between text-[11px] text-muted">
|
||||||
|
<span>Time (last probe)</span>
|
||||||
|
<span className="tabular-nums text-strong">
|
||||||
|
{fmtLatencyMs(usage.sample_latency_ms)}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
<div className="flex items-center justify-between text-[11px] text-muted">
|
||||||
|
{lastFetched && <span>Updated {Math.round((Date.now() - lastFetched.getTime()) / 1000)}s ago</span>}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
) : (
|
) : (
|
||||||
<div className="space-y-1.5">
|
<div className="space-y-1.5">
|
||||||
{modelCount > 0 && (
|
{(usage.sample_input_tokens != null || usage.sample_output_tokens != null) && (
|
||||||
<div className="flex items-center justify-between text-[11px] text-muted">
|
<div className="flex items-center justify-between text-[11px] text-muted">
|
||||||
<span>Models</span>
|
<span>Usage (last probe)</span>
|
||||||
<span className="tabular-nums text-strong">
|
<span className="tabular-nums text-strong">
|
||||||
{modelCount} available
|
in {fmtTokens(usage.sample_input_tokens)} · out {fmtTokens(usage.sample_output_tokens)}
|
||||||
</span>
|
</span>
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
|
|
@ -415,7 +458,7 @@ function UsageStrip({ credentialId, provider }: { credentialId: string; provider
|
||||||
|
|
||||||
{tok.limit == null && inputTok.limit == null && req.limit == null && (
|
{tok.limit == null && inputTok.limit == null && req.limit == null && (
|
||||||
<p className="text-[11px] text-muted">
|
<p className="text-[11px] text-muted">
|
||||||
Connected — provider did not return token/request limit headers for this key tier.
|
Connected — no token/request limit windows were returned for this key right now.
|
||||||
</p>
|
</p>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue