toMedia
Claude API

Extended Thinking(深度推理)

让 Claude 在回答前进行深度思考 — 预算控制、流式思考与工具调用

概述

Extended Thinking 让 Claude 在回答前创建内部 thinking 内容块,用于推理复杂问题,显著提升解答质量。

基本用法

import anthropic

client = anthropic.Anthropic()

response = client.messages.create(
    model="claude-sonnet-4-6",
    max_tokens=16000,
    thinking={
        "type": "enabled",
        "budget_tokens": 10000
    },
    messages=[{
        "role": "user",
        "content": "是否存在无穷多个素数 p 满足 p mod 4 == 3?"
    }]
)

# 处理响应块
for block in response.content:
    if block.type == "thinking":
        print("=== 思考过程 ===")
        print(block.thinking)
    elif block.type == "text":
        print("=== 最终回答 ===")
        print(block.text)

自适应思考(仅 Opus 4.6)

response = client.messages.create(
    model="claude-opus-4-6",
    max_tokens=16000,
    thinking={
        "type": "adaptive"  # Claude 自行决定思考预算
    },
    messages=[{
        "role": "user",
        "content": "设计一个处理每秒100万事件的实时分析平台的分布式系统架构"
    }]
)

for block in response.content:
    if block.type == "thinking":
        print(f"思考内容({len(block.thinking)} 字符):")
        print(block.thinking[:200] + "...\n")
    elif block.type == "text":
        print("最终回答:")
        print(block.text)

流式思考

with client.messages.stream(
    model="claude-sonnet-4-6",
    max_tokens=16000,
    thinking={
        "type": "enabled",
        "budget_tokens": 10000
    },
    messages=[{
        "role": "user",
        "content": "1071 和 462 的最大公约数是多少?"
    }]
) as stream:
    for event in stream:
        if event.type == "content_block_delta":
            if event.delta.type == "thinking_delta":
                # 实时显示思考过程
                print(event.delta.thinking, end="", flush=True)
            elif event.delta.type == "text_delta":
                # 实时显示最终回答
                print(event.delta.text, end="", flush=True)

思考 + 工具调用

重要:在工具调用流程中,必须将 thinking block 原样传回。

tools = [
    {
        "name": "get_weather",
        "description": "获取天气数据",
        "input_schema": {
            "type": "object",
            "properties": {
                "location": {"type": "string"},
            },
            "required": ["location"]
        }
    }
]

response = client.messages.create(
    model="claude-sonnet-4-6",
    max_tokens=16000,
    thinking={"type": "enabled", "budget_tokens": 10000},
    tools=tools,
    messages=[{
        "role": "user",
        "content": "帮我规划一趟巴黎旅行,先查一下天气"
    }]
)

# 提取 thinking block 和 tool block
thinking_block = None
tool_blocks = []

for block in response.content:
    if block.type == "thinking":
        thinking_block = block
    elif block.type == "tool_use":
        tool_blocks.append(block)

# 重要:将 thinking block 原样传回!
if thinking_block and tool_blocks:
    continuation = client.messages.create(
        model="claude-sonnet-4-6",
        max_tokens=16000,
        thinking={"type": "enabled", "budget_tokens": 10000},
        tools=tools,
        messages=[
            {"role": "user", "content": "帮我规划一趟巴黎旅行..."},
            {
                "role": "assistant",
                "content": [thinking_block] + tool_blocks  # 包含 thinking block!
            },
            {
                "role": "user",
                "content": [{
                    "type": "tool_result",
                    "tool_use_id": tool_blocks[0].id,
                    "content": "天气:18°C,晴"
                }]
            }
        ]
    )

思考显示选项

# 选项 1:摘要模式(默认)
response = client.messages.create(
    model="claude-sonnet-4-6",
    max_tokens=16000,
    thinking={
        "type": "enabled",
        "budget_tokens": 10000,
        "display": "summarized"  # 返回精简的思考内容
    },
    messages=[{"role": "user", "content": "解决这个逻辑题..."}]
)

# 选项 2:省略模式(更快的流式传输)
response = client.messages.create(
    model="claude-sonnet-4-6",
    max_tokens=16000,
    thinking={
        "type": "enabled",
        "budget_tokens": 10000,
        "display": "omitted"  # 流式传输时不发送 thinking_delta
    },
    messages=[{"role": "user", "content": "解决这个..."}]
)

适用场景

场景说明
复杂数学多步骤证明、数论问题
算法设计寻找计算问题的最优解
代码调试系统分析复杂 Bug
战略规划需要深入分析的商业决策
研究综合将多个来源整合为连贯结论

最佳实践

  1. 适度使用:用于复杂推理场景,简单问题不需要
  2. 合理预算:预算越高思考越深入,但成本也越高
  3. 监控长度:更长的思考不一定带来更好的结果
  4. 保留 thinking block:工具调用流程中必须原样传回
  5. 成本考量:思考 token 按全价计费,不享受折扣
  6. 模型选择:Opus 4.6 支持自适应思考,其他模型需要固定预算

On this page