import anthropic
client = anthropic.Anthropic()
# 带有网络搜索和缓存断点的第一个请求
messages = [
{
"role": "user",
"content": "What's the current weather in San Francisco today?"
}
]
response1 = client.messages.create(
model="claude-opus-4-1-20250805",
max_tokens=1024,
messages=messages,
tools=[{
"type": "web_search_20250305",
"name": "web_search",
"user_location": {
"type": "approximate",
"city": "San Francisco",
"region": "California",
"country": "US",
"timezone": "America/Los_Angeles"
}
}]
)
# 将Claude的响应添加到对话中
messages.append({
"role": "assistant",
"content": response1.content
})
# 在搜索结果后带有缓存断点的第二个请求
messages.append({
"role": "user",
"content": "Should I expect rain later this week?",
"cache_control": {"type": "ephemeral"} # 缓存到此点
})
response2 = client.messages.create(
model="claude-opus-4-1-20250805",
max_tokens=1024,
messages=messages,
tools=[{
"type": "web_search_20250305",
"name": "web_search",
"user_location": {
"type": "approximate",
"city": "San Francisco",
"region": "California",
"country": "US",
"timezone": "America/Los_Angeles"
}
}]
)
# 第二个响应将受益于缓存的搜索结果
# 同时仍然能够在需要时执行新搜索
print(f"Cache read tokens: {response2.usage.get('cache_read_input_tokens', 0)}")