Skip to content

Streaming

Both the OpenAI and Anthropic endpoints support SSE streaming. Set stream: true in your request.

Terminal window
curl https://api.cheapestinference.com/v1/chat/completions \
-H "Authorization: Bearer YOUR_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"model": "gpt-4o-mini",
"messages": [{"role": "user", "content": "Write a haiku about coding"}],
"stream": true
}'
stream = client.chat.completions.create(
model="gpt-4o-mini",
messages=[{"role": "user", "content": "Write a haiku about coding"}],
stream=True,
)
for chunk in stream:
content = chunk.choices[0].delta.content or ""
print(content, end="", flush=True)
const stream = await client.chat.completions.create({
model: "gpt-4o-mini",
messages: [{ role: "user", content: "Write a haiku about coding" }],
stream: true,
});
for await (const chunk of stream) {
process.stdout.write(chunk.choices[0]?.delta?.content || "");
}
with client.messages.stream(
model="claude-sonnet-4-20250514",
max_tokens=1024,
messages=[{"role": "user", "content": "Write a haiku about coding"}],
) as stream:
for text in stream.text_stream:
print(text, end="", flush=True)
const stream = await client.messages.create({
model: "claude-sonnet-4-20250514",
max_tokens: 1024,
messages: [{ role: "user", content: "Write a haiku about coding" }],
stream: true,
});
for await (const event of stream) {
if (event.type === "content_block_delta" && event.delta.type === "text_delta") {
process.stdout.write(event.delta.text);
}
}

OpenAI streaming uses the standard SSE format:

data: {"id":"chatcmpl-abc","choices":[{"delta":{"content":"Hello"}}]}
data: {"id":"chatcmpl-abc","choices":[{"delta":{"content":" world"}}]}
data: [DONE]

Anthropic streaming uses Anthropic’s event format:

event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Hello"}}
event: message_stop
data: {"type":"message_stop"}