Stream responses token-by-token for real-time UX.
from openai import OpenAI client = OpenAI( base_url="https://kymaapi.com/v1", api_key="kyma-your-key" ) stream = client.chat.completions.create( model="llama-3.3-70b", messages=[{"role": "user", "content": "Write a poem about AI"}], stream=True ) for chunk in stream: content = chunk.choices[0].delta.content if content: print(content, end="", flush=True)
import OpenAI from "openai"; const client = new OpenAI({ baseURL: "https://kymaapi.com/v1", apiKey: "kyma-your-key", }); const stream = await client.chat.completions.create({ model: "llama-3.3-70b", messages: [{ role: "user", content: "Write a poem about AI" }], stream: true, }); for await (const chunk of stream) { const content = chunk.choices[0]?.delta?.content; if (content) process.stdout.write(content); }
curl https://kymaapi.com/v1/chat/completions \ -H "Authorization: Bearer kyma-your-key" \ -H "Content-Type: application/json" \ -d '{ "model": "llama-3.3-70b", "messages": [{"role": "user", "content": "Write a poem about AI"}], "stream": true }'