Pipeline/backend/app/api/claude_code.py

331 lines
12 KiB
Python

"""Claude Code Integration API.
Provides three integration surfaces:
1. Session Tracking — GET /claude-code/sessions, /sessions/{id}, /projects
Reads ~/.claude/projects/**/*.jsonl directly. Extracts token usage, model,
cost estimates, and active status without requiring a gateway.
2. Config Scanner — GET /claude-code/config
Reads ~/.claude/settings.json, ~/.codex/config.toml, and ~/.codex/rules/.
3. Direct CLI info — GET /claude-code/cli
Reports which CLI tools are detected on the host and their versions.
(Actual CLI execution goes through the gateway; this endpoint surfaces
discovery metadata for the dashboard.)
"""
from __future__ import annotations
import asyncio
import shutil
from typing import Any
from fastapi import APIRouter, Depends, HTTPException, Query, status
from app.api.deps import require_org_member
from app.core.logging import get_logger
from app.schemas.claude_code import (
ClaudeConfigRead,
ClaudeProjectRead,
ClaudeSessionListResponse,
ClaudeSessionRead,
ClaudeSessionStatsRead,
CommandEntry,
FileEntry,
SessionMessagesResponse,
SessionMessage,
SessionTokensRead as ClaudeSessionTokensRead,
SessionTokenUsageRead,
TextBlock,
ThinkingBlock,
ToolAnalyticsResponse,
ToolUseBlock,
)
from app.services import claude_code_reader as reader
from app.services.organizations import OrganizationContext
logger = get_logger(__name__)
router = APIRouter(prefix="/claude-code", tags=["claude-code"])
ORG_MEMBER_DEP = Depends(require_org_member)
def _session_to_read(s: reader.ClaudeSession) -> ClaudeSessionRead:
return ClaudeSessionRead(
session_id=s.session_id,
project_dir=s.project_dir,
cwd=s.cwd,
title=s.title,
models=s.models,
tokens=ClaudeSessionTokensRead(
input=s.tokens.input,
output=s.tokens.output,
cache_read=s.tokens.cache_read,
cache_write=s.tokens.cache_write,
total=s.tokens.total,
),
cost_usd=s.cost_usd,
billing_source=s.billing_source,
message_count=s.message_count,
first_message_at=s.first_message_at,
last_message_at=s.last_message_at,
is_active=s.is_active,
entrypoints=s.entrypoints,
git_branch=s.git_branch,
version=s.version,
)
# ── Session Tracking ──────────────────────────────────────────────────────────
@router.get(
"/sessions",
response_model=ClaudeSessionListResponse,
summary="List local Claude Code sessions",
description=(
"Auto-discovers sessions from `~/.claude/projects/**/*.jsonl`. "
"Returns token usage, model info, cost estimates, and active status "
"for each session without requiring a gateway connection."
),
)
async def list_sessions(
project: str | None = Query(None, description="Filter by project directory name substring"),
active_only: bool = Query(False, description="Return only currently active sessions"),
limit: int = Query(100, ge=1, le=500, description="Maximum sessions to return"),
ctx: OrganizationContext = ORG_MEMBER_DEP,
) -> ClaudeSessionListResponse:
"""List Claude Code sessions from local JSONL files."""
sessions = await asyncio.to_thread(
reader.list_sessions,
project_filter=project,
active_only=active_only,
limit=limit,
)
stats = reader.session_stats(sessions)
return ClaudeSessionListResponse(
sessions=[_session_to_read(s) for s in sessions],
total=len(sessions),
stats=ClaudeSessionStatsRead(
session_count=stats["session_count"],
active_sessions=stats["active_sessions"],
total_tokens=stats["total_tokens"],
total_cost_usd=stats["total_cost_usd"],
models=stats["models"],
),
)
@router.get(
"/sessions/{session_id}",
response_model=ClaudeSessionRead,
summary="Get a single Claude Code session",
)
async def get_session(
session_id: str,
ctx: OrganizationContext = ORG_MEMBER_DEP,
) -> ClaudeSessionRead:
"""Return a single parsed session by its UUID."""
session = await asyncio.to_thread(reader.get_session, session_id)
if session is None:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Session not found")
return _session_to_read(session)
@router.get(
"/sessions/{session_id}/messages",
response_model=SessionMessagesResponse,
summary="Get conversation messages for a Claude Code session",
description=(
"Parses the raw JSONL file for a session and returns the full conversation "
"thread: user turns, assistant text/thinking blocks, and tool calls with "
"embedded results. Tool-result-only user turns are suppressed. "
"Duplicate assistant message records (streaming artefacts) are merged."
),
)
async def get_session_messages(
session_id: str,
limit: int = Query(200, ge=1, le=500, description="Max messages to return"),
offset: int = Query(0, ge=0, description="Number of messages to skip"),
ctx: OrganizationContext = ORG_MEMBER_DEP,
) -> SessionMessagesResponse:
"""Return the conversation thread for a single Claude Code session."""
result = await asyncio.to_thread(reader.get_session_messages, session_id, limit, offset)
if result is None:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Session not found")
messages, total = result
def _to_schema(m: reader.ParsedMessage) -> SessionMessage:
return SessionMessage(
uuid=m.uuid,
role=m.role,
timestamp=m.timestamp,
text_blocks=[TextBlock(text=b.text, truncated=b.truncated) for b in m.text_blocks],
thinking_blocks=[ThinkingBlock(text=b.text, truncated=b.truncated) for b in m.thinking_blocks],
tool_uses=[
ToolUseBlock(
tool_use_id=t.tool_use_id,
tool_name=t.tool_name,
input=t.input,
input_truncated=t.input_truncated,
result=t.result,
result_truncated=t.result_truncated,
is_error=t.is_error,
)
for t in m.tool_uses
],
model=m.model,
tokens=SessionTokenUsageRead(
input=m.tokens.input,
output=m.tokens.output,
cache_read=m.tokens.cache_read,
cache_write=m.tokens.cache_write,
) if m.tokens else None,
)
return SessionMessagesResponse(
session_id=session_id,
messages=[_to_schema(m) for m in messages],
total=total,
has_more=(offset + limit) < total,
)
@router.get(
"/projects",
response_model=list[ClaudeProjectRead],
summary="List discovered local Claude Code projects",
description="Returns one entry per project directory with aggregated token and cost stats.",
)
async def list_projects(
ctx: OrganizationContext = ORG_MEMBER_DEP,
) -> list[ClaudeProjectRead]:
"""Aggregate session stats grouped by project directory."""
projects = await asyncio.to_thread(reader.list_projects)
return [
ClaudeProjectRead(
project_dir=p["project_dir"],
cwd=p.get("cwd"),
session_count=p["session_count"],
total_tokens=p["total_tokens"],
total_cost_usd=p["total_cost_usd"],
last_active_at=p.get("last_active_at"),
is_active=p["is_active"],
)
for p in projects
]
# ── Tool Analytics ───────────────────────────────────────────────────────────
@router.get(
"/analytics/tools",
response_model=ToolAnalyticsResponse,
summary="Aggregate tool-use statistics across Claude Code sessions",
description=(
"Scans local JSONL session files and returns counts of each tool used, "
"top files read and written, and top Bash commands by binary name. "
"Duplicate streaming records are deduplicated by block id. "
"Use `days` to scope the analysis window and `project` to narrow by project."
),
)
async def get_tool_analytics(
project: str | None = Query(None, description="Filter by project directory name substring"),
days: int = Query(30, ge=1, le=365, description="Number of days to look back (uses file mtime)"),
ctx: OrganizationContext = ORG_MEMBER_DEP,
) -> ToolAnalyticsResponse:
"""Return aggregated tool-use analytics from local Claude Code sessions."""
data = await asyncio.to_thread(reader.get_tool_analytics, project, days)
return ToolAnalyticsResponse(
tool_counts=data["tool_counts"],
top_files_read=[FileEntry(path=e["path"], count=e["count"]) for e in data["top_files_read"]],
top_files_written=[FileEntry(path=e["path"], count=e["count"]) for e in data["top_files_written"]],
top_commands=[CommandEntry(command=e["command"], count=e["count"]) for e in data["top_commands"]],
session_count=data["session_count"],
date_range_days=data["date_range_days"],
)
# ── Config Scanner ────────────────────────────────────────────────────────────
@router.get(
"/config",
response_model=ClaudeConfigRead,
summary="Read local Claude Code and Codex CLI configuration",
description=(
"Reads `~/.claude/settings.json`, `~/.codex/config.toml`, and "
"`~/.codex/rules/*.rules`. Never returns secrets — only settings "
"and trust/rule data."
),
)
async def get_config(
ctx: OrganizationContext = ORG_MEMBER_DEP,
) -> ClaudeConfigRead:
"""Return parsed local CLI configuration."""
cfg = await asyncio.to_thread(reader.read_config)
return ClaudeConfigRead(
claude_settings=cfg.claude_settings,
codex_config=cfg.codex_config,
codex_rules=cfg.codex_rules,
claude_credentials_configured=cfg.claude_credentials_path is not None,
codex_credentials_configured=cfg.codex_credentials_path is not None,
)
# ── Direct CLI Discovery ──────────────────────────────────────────────────────
async def _cli_version(cmd: list[str]) -> str | None:
try:
proc = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
stdout, _ = await asyncio.wait_for(proc.communicate(), timeout=5)
line = stdout.decode(errors="replace").strip().splitlines()[0] if stdout else ""
return line or None
except Exception:
return None
@router.get(
"/cli",
summary="Detect installed CLI tools",
description=(
"Checks which of Claude Code, Codex CLI, and Ollama are installed "
"on the host and returns their versions. No gateway required."
),
)
async def detect_cli(
ctx: OrganizationContext = ORG_MEMBER_DEP,
) -> dict[str, Any]:
"""Return discovery metadata for locally installed CLI tools."""
claude_path = shutil.which("claude")
codex_path = shutil.which("codex")
ollama_path = shutil.which("ollama")
claude_version = await _cli_version(["claude", "--version"]) if claude_path else None
codex_version = await _cli_version(["codex", "--version"]) if codex_path else None
ollama_version = await _cli_version(["ollama", "--version"]) if ollama_path else None
return {
"claude": {
"installed": claude_path is not None,
"path": claude_path,
"version": claude_version,
"projects_dir": str(reader._projects_dir()),
"projects_dir_exists": reader._projects_dir().exists(),
},
"codex": {
"installed": codex_path is not None,
"path": codex_path,
"version": codex_version,
},
"ollama": {
"installed": ollama_path is not None,
"path": ollama_path,
"version": ollama_version,
},
}