Skip to main content

Documentation Index

Fetch the complete documentation index at: https://docs.kymaapi.com/llms.txt

Use this file to discover all available pages before exploring further.

Install

pip install openai

Setup

from openai import OpenAI

client = OpenAI(
    base_url="https://kymaapi.com/v1",
    api_key="ky-your-api-key"
)

Basic chat

response = client.chat.completions.create(
    model="qwen-3.6-plus",
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "Explain quantum computing simply."}
    ]
)

print(response.choices[0].message.content)

Multi-turn conversation

messages = [
    {"role": "system", "content": "You are a Python tutor."}
]

while True:
    user_input = input("You: ")
    if user_input.lower() == "quit":
        break
    
    messages.append({"role": "user", "content": user_input})
    
    response = client.chat.completions.create(
        model="llama-3.3-70b",
        messages=messages
    )
    
    reply = response.choices[0].message.content
    messages.append({"role": "assistant", "content": reply})
    print(f"AI: {reply}")

Streaming

stream = client.chat.completions.create(
    model="llama-3.3-70b",
    messages=[{"role": "user", "content": "Write a haiku about coding"}],
    stream=True
)

for chunk in stream:
    content = chunk.choices[0].delta.content
    if content:
        print(content, end="", flush=True)

Async

import asyncio
from openai import AsyncOpenAI

client = AsyncOpenAI(
    base_url="https://kymaapi.com/v1",
    api_key="ky-your-api-key"
)

async def main():
    response = await client.chat.completions.create(
        model="qwen-3.6-plus",
        messages=[{"role": "user", "content": "Hello!"}]
    )
    print(response.choices[0].message.content)

asyncio.run(main())

Parameters

response = client.chat.completions.create(
    model="llama-3.3-70b",
    messages=[{"role": "user", "content": "Be creative"}],
    temperature=0.9,      # 0-2, higher = more creative
    max_tokens=1000,       # max response length
    top_p=0.95,           # nucleus sampling
)

Generating images and videos

Image and video models use a separate async endpoint. POST to /v1/images/generations or /v1/videos/generations, get a job_id back, poll /v1/jobs/{id} until status is succeeded. Use requests directly - the OpenAI SDK does not cover these endpoints.
import requests, time

API = "https://kymaapi.com/v1"
HEADERS = {"Authorization": "Bearer ky-your-api-key"}

submit = requests.post(
    f"{API}/videos/generations",
    headers=HEADERS,
    json={
        "model": "kling-3-pro",
        "prompt": "A drone shot over a misty mountain range at sunrise",
        "duration": 5,
    },
).json()

job_id = submit["id"]

while True:
    job = requests.get(f"{API}/jobs/{job_id}", headers=HEADERS).json()
    if job["status"] in ("succeeded", "failed", "expired", "refunded"):
        break
    time.sleep(3)

print(job["output"]["url"])
Same pattern for /v1/images/generations: swap the model (flux-1.1-ultra, ideogram-v3, recraft-v3, flux-kontext-pro) and omit duration.