from openai import OpenAI
client = OpenAI(
base_url="https://kymaapi.com/v1",
api_key="ky-your-api-key"
)
def retrieve_context(query: str) -> list[str]:
# Your retrieval logic: vector DB, keyword search, etc.
return [
"Kyma API exposes active models through /v1/models and one OpenAI-compatible endpoint.",
"All models use OpenAI-compatible /v1/chat/completions endpoint.",
"Gemini 2.5 Flash provides 1M context for large-document RAG.",
]
def rag_answer(question: str) -> str:
chunks = retrieve_context(question)
context = "\n\n".join(f"[{i+1}] {c}" for i, c in enumerate(chunks))
response = client.chat.completions.create(
model="gemini-2.5-flash",
messages=[
{
"role": "system",
"content": "Answer using only the provided context. "
"Cite sources with [1], [2] etc. "
"If the answer isn't in the context, say so."
},
{
"role": "user",
"content": f"Context:\n{context}\n\nQuestion: {question}"
}
]
)
return response.choices[0].message.content
print(rag_answer("What models does Kyma API support?"))