feat(usage): per-account limit rows and aggregate labeling (#41)

- Add PerGatewayUsage interface and buildPerGatewayUsage() function
- Each gateway's most constrained limit shown independently, sorted by exhaustion
- Per-account rows render BEFORE aggregate summary
- Aggregate cards labeled 'Combined' when multiple gateways exist
- 'Soonest reset' and 'Combined limit' labels replace misleading single-gateway labels
- Filter out gateways with no configured limit to reduce noise
This commit is contained in:
null 2026-05-21 01:57:13 -05:00
parent 5c3c09edba
commit cd688ced26
2 changed files with 170 additions and 9 deletions

View File

@ -47,7 +47,9 @@ import {
type ProviderNativeUsageWindow, type ProviderNativeUsageWindow,
RuntimeUsageSection, RuntimeUsageSection,
aggregateRuntimeUsage, aggregateRuntimeUsage,
buildPerGatewayUsage,
type AggregatedRuntimeUsage, type AggregatedRuntimeUsage,
type PerGatewayUsage,
} from "@/components/dashboard/RuntimeUsageSection"; } from "@/components/dashboard/RuntimeUsageSection";
import { GatewayHealthPanel } from "@/components/dashboard/GatewayHealthPanel"; import { GatewayHealthPanel } from "@/components/dashboard/GatewayHealthPanel";
import { GatewayCronPanel } from "@/components/dashboard/GatewayCronPanel"; import { GatewayCronPanel } from "@/components/dashboard/GatewayCronPanel";
@ -647,8 +649,11 @@ export default function DashboardPage() {
() => gatewayStatusesQuery.data ?? [], () => gatewayStatusesQuery.data ?? [],
[gatewayStatusesQuery.data], [gatewayStatusesQuery.data],
); );
// Runtime usage — query all gateways in parallel, aggregate // Runtime usage — query all gateways in parallel, aggregate + per-gateway
const runtimeUsageQuery = useQuery<AggregatedRuntimeUsage, ApiError>({ const runtimeUsageQuery = useQuery<
{ aggregate: AggregatedRuntimeUsage; perGateway: PerGatewayUsage[] },
ApiError
>({
queryKey: [ queryKey: [
"dashboard", "dashboard",
"runtime-usage", "runtime-usage",
@ -674,11 +679,19 @@ export default function DashboardPage() {
r.status === "fulfilled" && r.value !== null, r.status === "fulfilled" && r.value !== null,
) )
.map((r) => r.value); .map((r) => r.value);
return aggregateRuntimeUsage(valid); const labels: Record<string, string> = {};
for (const t of gatewayTargets) {
labels[t.gatewayId] = t.boardName;
}
return {
aggregate: aggregateRuntimeUsage(valid),
perGateway: buildPerGatewayUsage(valid, labels),
};
}, },
}); });
const runtimeUsage = runtimeUsageQuery.data ?? null; const runtimeUsage = runtimeUsageQuery.data?.aggregate ?? null;
const perGatewayUsage = runtimeUsageQuery.data?.perGateway ?? [];
const providerUsageQuery = useQuery<ProviderNativeUsageWindow[], ApiError>({ const providerUsageQuery = useQuery<ProviderNativeUsageWindow[], ApiError>({
queryKey: [ queryKey: [
"dashboard", "dashboard",
@ -1186,6 +1199,7 @@ export default function DashboardPage() {
<RuntimeUsageSection <RuntimeUsageSection
usage={runtimeUsage} usage={runtimeUsage}
providerUsageWindows={providerUsageWindows} providerUsageWindows={providerUsageWindows}
perGatewayUsage={perGatewayUsage}
isLoading={runtimeUsageQuery.isLoading || providerUsageQuery.isLoading} isLoading={runtimeUsageQuery.isLoading || providerUsageQuery.isLoading}
hasGateways={hasConfiguredGateways} hasGateways={hasConfiguredGateways}
/> />

View File

@ -39,6 +39,97 @@ export interface AggregatedRuntimeUsage {
topSessions: TopSession[]; topSessions: TopSession[];
} }
// One gateway's most-constrained limit — used for per-account rows.
export interface PerGatewayUsage {
gatewayId: string;
gatewayLabel: string; // human name shown in the row
accountKey: string;
limitKind: string; // "output_tokens" | "total_tokens" | "messages" | "cost" | "none"
limitLabel: string; // e.g. "output tokens", "messages"
limitValue: number | null;
usedValue: number;
pctUsed: number | null; // 0100, derived from the most constraining limit
timeToLimitMs: number | null;
safe: boolean;
resetInMs: number;
totalCostUsd: number;
}
// Build per-gateway constraint rows from raw API responses.
export function buildPerGatewayUsage(
responses: RuntimeUsageResponse[],
labels: Record<string, string>, // gatewayId → display label
): PerGatewayUsage[] {
return responses
.map((r): PerGatewayUsage => {
const gid = String(r.gateway_id);
const label = labels[gid] ?? gid.slice(0, 8);
const c = r.current;
// Pick the most constrained typed limit (highest pct used)
type Candidate = { kind: string; label: string; limit: number; used: number; pct: number };
const candidates: Candidate[] = [];
if (c.output_token_limit && c.output_token_limit_pct !== null) {
candidates.push({
kind: "output_tokens", label: "output tokens",
limit: c.output_token_limit,
used: c.total_output_tokens ?? 0,
pct: c.output_token_limit_pct ?? 0,
});
}
if (c.total_token_limit && c.total_token_limit_pct !== null) {
candidates.push({
kind: "total_tokens", label: "total tokens",
limit: c.total_token_limit,
used: c.total_tokens,
pct: c.total_token_limit_pct ?? 0,
});
}
// legacy token_limit only when no typed token limit present
if (!c.output_token_limit && !c.total_token_limit && c.token_limit && c.token_pct !== null) {
candidates.push({
kind: "total_tokens", label: "tokens",
limit: c.token_limit,
used: c.total_tokens,
pct: c.token_pct ?? 0,
});
}
if (c.message_limit && c.message_pct !== null) {
candidates.push({
kind: "messages", label: "messages",
limit: c.message_limit,
used: c.total_calls,
pct: c.message_pct ?? 0,
});
}
// Most constrained = highest pct used
const binding = candidates.length > 0
? candidates.reduce((a, b) => a.pct >= b.pct ? a : b)
: null;
return {
gatewayId: gid,
gatewayLabel: label,
accountKey: gid,
limitKind: binding?.kind ?? "none",
limitLabel: binding?.label ?? "no limit",
limitValue: binding?.limit ?? null,
usedValue: binding?.used ?? 0,
pctUsed: binding?.pct ?? null,
timeToLimitMs: r.predictions.time_to_limit_ms ?? null,
safe: r.predictions.safe,
resetInMs: r.window.reset_in_ms,
totalCostUsd: c.total_cost_usd,
};
})
// Only surface rows that have a configured limit — otherwise it is noise
.filter((row) => row.limitKind !== "none")
// Most exhausted first
.sort((a, b) => (b.pctUsed ?? 0) - (a.pctUsed ?? 0));
}
export interface ProviderNativeUsageWindow { export interface ProviderNativeUsageWindow {
key: string; key: string;
label: string; label: string;
@ -278,6 +369,7 @@ function StatCard({ label, value, sub, tone = "default", icon }: StatCardProps)
interface RuntimeUsageSectionProps { interface RuntimeUsageSectionProps {
usage: AggregatedRuntimeUsage | null; usage: AggregatedRuntimeUsage | null;
providerUsageWindows: ProviderNativeUsageWindow[]; providerUsageWindows: ProviderNativeUsageWindow[];
perGatewayUsage: PerGatewayUsage[];
isLoading: boolean; isLoading: boolean;
hasGateways: boolean; hasGateways: boolean;
} }
@ -285,6 +377,7 @@ interface RuntimeUsageSectionProps {
export function RuntimeUsageSection({ export function RuntimeUsageSection({
usage, usage,
providerUsageWindows, providerUsageWindows,
perGatewayUsage,
isLoading, isLoading,
hasGateways, hasGateways,
}: RuntimeUsageSectionProps) { }: RuntimeUsageSectionProps) {
@ -313,7 +406,8 @@ export function RuntimeUsageSection({
const right = order[b.key] ?? 99; const right = order[b.key] ?? 99;
return left === right ? a.gatewayLabel.localeCompare(b.gatewayLabel) : left - right; return left === right ? a.gatewayLabel.localeCompare(b.gatewayLabel) : left - right;
}); });
const noData = !hasRuntimeData && providerRows.length === 0; const noData = !hasRuntimeData && providerRows.length === 0 && perGatewayUsage.length === 0;
const showCombinedLabel = perGatewayUsage.length > 1;
return ( return (
<DashboardSection title="Runtime Usage"> <DashboardSection title="Runtime Usage">
@ -321,7 +415,7 @@ export function RuntimeUsageSection({
<DashboardEmptyState message="Loading usage data…" /> <DashboardEmptyState message="Loading usage data…" />
) : noData ? ( ) : noData ? (
<DashboardEmptyState message="No usage data yet. Usage appears after the first model call." /> <DashboardEmptyState message="No usage data yet. Usage appears after the first model call." />
) : hasRuntimeData || providerRows.length > 0 ? ( ) : hasRuntimeData || providerRows.length > 0 || perGatewayUsage.length > 0 ? (
<div className="space-y-3"> <div className="space-y-3">
{providerRows.length > 0 && ( {providerRows.length > 0 && (
<div className="space-y-2 rounded-lg border border-[color:var(--border)] bg-[color:var(--surface-muted)] p-3"> <div className="space-y-2 rounded-lg border border-[color:var(--border)] bg-[color:var(--surface-muted)] p-3">
@ -371,23 +465,76 @@ export function RuntimeUsageSection({
</div> </div>
)} )}
{/* Per-account constraint rows — most exhausted first */}
{perGatewayUsage.length > 0 && (
<div className="rounded-lg border border-[color:var(--border)] bg-[color:var(--surface-muted)] p-3">
<p className="mb-2 text-[11px] font-semibold uppercase tracking-wider text-muted">
Per-account limits
</p>
<div className="space-y-2">
{perGatewayUsage.map((row) => {
const pct = row.pctUsed ?? 0;
const barTone =
pct >= 90
? "bg-[color:var(--danger)]"
: pct >= 75
? "bg-[color:var(--warning)]"
: "bg-[color:var(--success)]";
const dangerLabel =
pct >= 90 ? "text-[color:var(--danger)]"
: pct >= 75 ? "text-[color:var(--warning)]"
: "text-muted";
return (
<div
key={row.gatewayId}
className="rounded-md border border-[color:var(--border)] bg-[color:var(--surface)] p-2"
>
<div className="mb-1 flex items-center justify-between gap-2">
<p className="text-xs font-medium text-strong truncate">{row.gatewayLabel}</p>
<div className="flex items-center gap-2 shrink-0">
{row.timeToLimitMs !== null && (
<span className={`text-[11px] tabular-nums ${!row.safe ? "text-[color:var(--danger)]" : "text-muted"}`}>
{row.timeToLimitMs === 0 ? "at limit" : `${fmtMs(row.timeToLimitMs)} left`}
</span>
)}
<span className={`text-[11px] tabular-nums ${dangerLabel}`}>
{row.pctUsed === null ? "—" : `${Math.round(pct)}%`}
</span>
</div>
</div>
<div className="h-1.5 w-full overflow-hidden rounded-full bg-[color:var(--surface-strong)]">
<div
className={`h-full rounded-full transition-all ${barTone}`}
style={{ width: `${Math.max(0, Math.min(100, pct))}%` }}
/>
</div>
<p className="mt-1 text-[11px] text-muted">
{row.limitLabel} · resets in {fmtMs(row.resetInMs)} · {fmtCost(row.totalCostUsd)} spent
</p>
</div>
);
})}
</div>
</div>
)}
{usage && hasRuntimeData && ( {usage && hasRuntimeData && (
<div className="grid grid-cols-2 gap-2 sm:grid-cols-4"> <div className="grid grid-cols-2 gap-2 sm:grid-cols-4">
<StatCard <StatCard
label="5h Spend" label={showCombinedLabel ? "Combined spend" : "5h Spend"}
value={fmtCost(usage.totalCostUsd)} value={fmtCost(usage.totalCostUsd)}
sub={`${fmtTokens(usage.totalTokens)} tokens`} sub={`${fmtTokens(usage.totalTokens)} tokens`}
tone={usage.costLimitUsd && usage.totalCostUsd / usage.costLimitUsd > 0.8 ? "warning" : "default"} tone={usage.costLimitUsd && usage.totalCostUsd / usage.costLimitUsd > 0.8 ? "warning" : "default"}
icon={<Zap className="h-3 w-3" />} icon={<Zap className="h-3 w-3" />}
/> />
<StatCard <StatCard
label="Reset In" label={showCombinedLabel ? "Soonest reset" : "Reset In"}
value={fmtMs(usage.resetInMs)} value={fmtMs(usage.resetInMs)}
sub={usage.resetsAt ? new Date(usage.resetsAt).toLocaleTimeString([], { hour: "2-digit", minute: "2-digit" }) : undefined} sub={usage.resetsAt ? new Date(usage.resetsAt).toLocaleTimeString([], { hour: "2-digit", minute: "2-digit" }) : undefined}
icon={<Clock className="h-3 w-3" />} icon={<Clock className="h-3 w-3" />}
/> />
<StatCard <StatCard
label="Time to Limit" label={showCombinedLabel ? "Combined limit" : "Time to Limit"}
value={usage.timeToLimitMs === null ? "—" : usage.timeToLimitMs === 0 ? "At limit" : fmtMs(usage.timeToLimitMs)} value={usage.timeToLimitMs === null ? "—" : usage.timeToLimitMs === 0 ? "At limit" : fmtMs(usage.timeToLimitMs)}
sub={(() => { sub={(() => {
if (usage.outputTokenLimit) { if (usage.outputTokenLimit) {