Claude API
流式输出(Streaming)
通过 Server-Sent Events 实现实时响应 — 基础用法、事件结构与工具调用中的流式处理
概述
流式输出通过 Server-Sent Events (SSE) 实时传递响应内容,提升用户感知速度,缩短首个 token 的等待时间。
基本用法
import anthropic
client = anthropic.Anthropic()
with client.messages.stream(
model="claude-opus-4-6",
max_tokens=1024,
messages=[
{"role": "user", "content": "写一首关于编程的短诗"}
]
) as stream:
for text in stream.text_stream:
print(text, end="", flush=True)SSE 事件结构
event: message_start
data: {"type":"message_start","message":{"id":"msg_...","type":"message",...}}
event: content_block_start
data: {"type":"content_block_start","index":0,"content_block":{"type":"text"}}
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"你好"}}
event: content_block_stop
data: {"type":"content_block_stop","index":0}
event: message_delta
data: {"type":"message_delta","delta":{"stop_reason":"end_turn"},"usage":{"output_tokens":10}}
event: message_stop
data: {"type":"message_stop"}处理不同事件类型
import anthropic
client = anthropic.Anthropic()
with client.messages.stream(
model="claude-opus-4-6",
max_tokens=1024,
messages=[
{"role": "user", "content": "解释量子计算"}
]
) as stream:
for event in stream:
if event.type == "message_start":
print("消息开始...")
elif event.type == "content_block_start":
if event.content_block.type == "text":
print("文本块开始")
elif event.type == "content_block_delta":
if event.delta.type == "text_delta":
print(event.delta.text, end="", flush=True)
elif event.type == "message_delta":
print(f"\n停止原因: {event.delta.stop_reason}")
print(f"输出 tokens: {event.usage.output_tokens}")
elif event.type == "message_stop":
print("\n消息完成")流式 + 工具调用
import anthropic
client = anthropic.Anthropic()
tools = [
{
"name": "get_weather",
"description": "获取天气",
"input_schema": {
"type": "object",
"properties": {"location": {"type": "string"}},
"required": ["location"]
}
}
]
# 第一次请求 — 流式获取 Claude 的响应
with client.messages.stream(
model="claude-opus-4-6",
max_tokens=1024,
tools=tools,
messages=[
{"role": "user", "content": "巴黎天气怎么样?"}
]
) as stream:
collected_message = stream.get_final_message()
# 检查是否使用了工具
for block in collected_message.content:
if block.type == "tool_use":
tool_result = f"{block.input['location']}天气:20°C,多云"
# 流式返回最终结果
with client.messages.stream(
model="claude-opus-4-6",
max_tokens=1024,
tools=tools,
messages=[
{"role": "user", "content": "巴黎天气怎么样?"},
{"role": "assistant", "content": collected_message.content},
{
"role": "user",
"content": [{
"type": "tool_result",
"tool_use_id": block.id,
"content": tool_result
}]
}
]
) as stream:
for text in stream.text_stream:
print(text, end="", flush=True)最佳实践
- 首 Token 时间:监控
message_start事件的时间 - 渐进渲染:文本到达即显示,不要等全部完成
- 断线处理:优雅处理连接中断
- 内存管理:不要将整个响应缓存在内存中
- 用户反馈:流式输出期间显示加载指示器