feat(session-messages): implement endpoint to retrieve conversation messages for a session

This commit is contained in:
null 2026-05-24 16:19:24 -05:00
parent 11d950a13a
commit 1bf4e30e8c
3 changed files with 380 additions and 0 deletions

View File

@ -31,7 +31,13 @@ from app.schemas.claude_code import (
ClaudeSessionListResponse,
ClaudeSessionRead,
ClaudeSessionStatsRead,
SessionMessagesResponse,
SessionMessage,
SessionTokensRead as ClaudeSessionTokensRead,
SessionTokenUsageRead,
TextBlock,
ThinkingBlock,
ToolUseBlock,
)
from app.services import claude_code_reader as reader
from app.services.organizations import OrganizationContext
@ -122,6 +128,66 @@ async def get_session(
return _session_to_read(session)
@router.get(
"/sessions/{session_id}/messages",
response_model=SessionMessagesResponse,
summary="Get conversation messages for a Claude Code session",
description=(
"Parses the raw JSONL file for a session and returns the full conversation "
"thread: user turns, assistant text/thinking blocks, and tool calls with "
"embedded results. Tool-result-only user turns are suppressed. "
"Duplicate assistant message records (streaming artefacts) are merged."
),
)
async def get_session_messages(
session_id: str,
limit: int = Query(200, ge=1, le=500, description="Max messages to return"),
offset: int = Query(0, ge=0, description="Number of messages to skip"),
ctx: OrganizationContext = ORG_MEMBER_DEP,
) -> SessionMessagesResponse:
"""Return the conversation thread for a single Claude Code session."""
result = await asyncio.to_thread(reader.get_session_messages, session_id, limit, offset)
if result is None:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Session not found")
messages, total = result
def _to_schema(m: reader.ParsedMessage) -> SessionMessage:
return SessionMessage(
uuid=m.uuid,
role=m.role,
timestamp=m.timestamp,
text_blocks=[TextBlock(text=b.text, truncated=b.truncated) for b in m.text_blocks],
thinking_blocks=[ThinkingBlock(text=b.text, truncated=b.truncated) for b in m.thinking_blocks],
tool_uses=[
ToolUseBlock(
tool_use_id=t.tool_use_id,
tool_name=t.tool_name,
input=t.input,
input_truncated=t.input_truncated,
result=t.result,
result_truncated=t.result_truncated,
is_error=t.is_error,
)
for t in m.tool_uses
],
model=m.model,
tokens=SessionTokenUsageRead(
input=m.tokens.input,
output=m.tokens.output,
cache_read=m.tokens.cache_read,
cache_write=m.tokens.cache_write,
) if m.tokens else None,
)
return SessionMessagesResponse(
session_id=session_id,
messages=[_to_schema(m) for m in messages],
total=total,
has_more=(offset + limit) < total,
)
@router.get(
"/projects",
response_model=list[ClaudeProjectRead],

View File

@ -8,6 +8,55 @@ from typing import Any
from sqlmodel import SQLModel
# ── Session messages ──────────────────────────────────────────────────────────
class TextBlock(SQLModel):
text: str
truncated: bool
class ThinkingBlock(SQLModel):
text: str
truncated: bool
class ToolUseBlock(SQLModel):
tool_use_id: str
tool_name: str
input: dict[str, Any]
input_truncated: bool
result: str | None = None
result_truncated: bool = False
is_error: bool = False
class SessionTokenUsageRead(SQLModel):
input: int
output: int
cache_read: int
cache_write: int
class SessionMessage(SQLModel):
uuid: str
role: str # "user" | "assistant"
timestamp: datetime | None = None
text_blocks: list[TextBlock]
thinking_blocks: list[ThinkingBlock]
tool_uses: list[ToolUseBlock]
model: str | None = None
tokens: SessionTokenUsageRead | None = None
class SessionMessagesResponse(SQLModel):
session_id: str
messages: list[SessionMessage]
total: int
has_more: bool
# ── Session token totals (used in list/detail) ────────────────────────────────
class SessionTokensRead(SQLModel):
input: int
output: int

View File

@ -96,6 +96,49 @@ class ClaudeSession:
version: str | None
@dataclass
class SessionTextBlock:
text: str
truncated: bool
@dataclass
class SessionThinkingBlock:
text: str
truncated: bool
@dataclass
class SessionToolUse:
tool_use_id: str
tool_name: str
input: dict[str, Any]
input_truncated: bool
result: str | None
result_truncated: bool
is_error: bool
@dataclass
class SessionTokenUsage:
input: int
output: int
cache_read: int
cache_write: int
@dataclass
class ParsedMessage:
uuid: str
role: str # "user" | "assistant"
timestamp: datetime | None
text_blocks: list[SessionTextBlock]
thinking_blocks: list[SessionThinkingBlock]
tool_uses: list[SessionToolUse]
model: str | None
tokens: SessionTokenUsage | None
@dataclass
class ClaudeConfig:
claude_settings: dict[str, Any] = field(default_factory=dict)
@ -255,6 +298,228 @@ def get_session(session_id: str) -> ClaudeSession | None:
return None
# ---------------------------------------------------------------------------
# Session message reader
# ---------------------------------------------------------------------------
_CONTENT_TRUNCATE = 4000
_INPUT_VALUE_TRUNCATE = 2000
def _trunc(text: str, limit: int = _CONTENT_TRUNCATE) -> tuple[str, bool]:
if len(text) <= limit:
return text, False
return text[:limit], True
def _trunc_input(input_dict: dict[str, Any]) -> tuple[dict[str, Any], bool]:
"""Truncate long string values inside a tool input dict."""
truncated = False
result: dict[str, Any] = {}
for k, v in input_dict.items():
if isinstance(v, str) and len(v) > _INPUT_VALUE_TRUNCATE:
result[k] = v[:_INPUT_VALUE_TRUNCATE]
truncated = True
else:
result[k] = v
return result, truncated
def _extract_tool_result_text(raw: Any) -> str:
"""Normalise a tool_result content field to plain text."""
if isinstance(raw, str):
return raw
if isinstance(raw, list):
return "\n".join(
b.get("text", "") for b in raw if isinstance(b, dict) and b.get("type") == "text"
)
return str(raw) if raw is not None else ""
def get_session_messages(
session_id: str,
limit: int = 200,
offset: int = 0,
) -> tuple[list[ParsedMessage], int] | None:
"""Parse the full conversation from a session JSONL file.
Returns (messages[offset:offset+limit], total) or None if not found.
Tool results are embedded in the tool_use blocks of the preceding
assistant message. User records that contain only tool_results are
suppressed from the returned list.
"""
root = _projects_dir()
if not root.exists():
return None
path: Path | None = None
for p in root.rglob(f"{session_id}.jsonl"):
path = p
break
if path is None:
return None
# tool_use_id -> (result_text, is_error, truncated)
tool_results: dict[str, tuple[str, bool, bool]] = {}
# Insertion-ordered list of "role:key" to preserve conversation order.
# For assistant messages the key is message.id (deduplication handle).
# For user messages the key is their uuid.
message_order: list[str] = []
# Accumulated data per assistant message.id
assistant_acc: dict[str, dict[str, Any]] = {}
# User message data keyed by uuid
user_acc: dict[str, dict[str, Any]] = {}
try:
with open(path, encoding="utf-8", errors="replace") as fh:
for raw_line in fh:
raw_line = raw_line.strip()
if not raw_line:
continue
try:
rec = json.loads(raw_line)
except json.JSONDecodeError:
continue
if rec.get("isSidechain"):
continue
rec_type = rec.get("type")
if rec_type not in ("user", "assistant"):
continue
ts = _parse_iso(rec.get("timestamp"))
uuid = rec.get("uuid", "")
if rec_type == "user":
content = rec.get("message", {}).get("content") or []
if not isinstance(content, list):
continue
# Collect tool results for later embedding
for block in content:
if block.get("type") == "tool_result":
tid = block.get("tool_use_id", "")
text = _extract_tool_result_text(block.get("content", ""))
t, trunc = _trunc(text)
tool_results[tid] = (t, bool(block.get("is_error", False)), trunc)
# Only surface user records that carry visible text
has_text = any(b.get("type") == "text" for b in content)
if has_text and uuid not in user_acc:
user_acc[uuid] = {"ts": ts, "content": content}
message_order.append(f"user:{uuid}")
else: # assistant
msg = rec.get("message") or {}
msg_id = msg.get("id") or uuid
content = msg.get("content") if isinstance(msg.get("content"), list) else []
if msg_id not in assistant_acc:
assistant_acc[msg_id] = {
"uuid": uuid,
"ts": ts,
"model": msg.get("model"),
"usage": msg.get("usage") or {},
"blocks": [],
"seen_block_ids": set(),
}
message_order.append(f"assistant:{msg_id}")
acc = assistant_acc[msg_id]
for block in content:
bid = block.get("id")
if bid:
if bid in acc["seen_block_ids"]:
continue
acc["seen_block_ids"].add(bid)
acc["blocks"].append(block)
except (OSError, PermissionError) as exc:
logger.debug("claude_code_reader.messages_read_error path=%s error=%s", path, exc)
return None
# Build the final parsed list
parsed: list[ParsedMessage] = []
for key in message_order:
role, key_id = key.split(":", 1)
if role == "user":
data = user_acc[key_id]
text_blocks: list[SessionTextBlock] = []
for block in data["content"]:
if block.get("type") == "text":
text, trunc = _trunc(block.get("text", ""))
if text.strip():
text_blocks.append(SessionTextBlock(text=text, truncated=trunc))
if not text_blocks:
continue
parsed.append(ParsedMessage(
uuid=key_id,
role="user",
timestamp=data["ts"],
text_blocks=text_blocks,
thinking_blocks=[],
tool_uses=[],
model=None,
tokens=None,
))
else: # assistant
data = assistant_acc[key_id]
text_blocks = []
thinking_blocks: list[SessionThinkingBlock] = []
tool_uses: list[SessionToolUse] = []
for block in data["blocks"]:
btype = block.get("type")
if btype == "text":
text, trunc = _trunc(block.get("text", ""))
if text:
text_blocks.append(SessionTextBlock(text=text, truncated=trunc))
elif btype == "thinking":
text, trunc = _trunc(block.get("thinking", ""))
if text:
thinking_blocks.append(SessionThinkingBlock(text=text, truncated=trunc))
elif btype == "tool_use":
tid = block.get("id", "")
inp, inp_trunc = _trunc_input(block.get("input") or {})
result = tool_results.get(tid)
tool_uses.append(SessionToolUse(
tool_use_id=tid,
tool_name=block.get("name", "unknown"),
input=inp,
input_truncated=inp_trunc,
result=result[0] if result else None,
result_truncated=result[2] if result else False,
is_error=result[1] if result else False,
))
usage = data["usage"]
parsed.append(ParsedMessage(
uuid=data["uuid"],
role="assistant",
timestamp=data["ts"],
text_blocks=text_blocks,
thinking_blocks=thinking_blocks,
tool_uses=tool_uses,
model=data["model"],
tokens=SessionTokenUsage(
input=usage.get("input_tokens", 0),
output=usage.get("output_tokens", 0),
cache_read=usage.get("cache_read_input_tokens", 0),
cache_write=usage.get("cache_creation_input_tokens", 0),
),
))
total = len(parsed)
return parsed[offset : offset + limit], total
def list_projects() -> list[dict[str, Any]]:
"""Return discovered projects with aggregate stats."""
root = _projects_dir()