OVERVIEW
CHUNGUS provides an OpenAI-compatible API for chat completions. The API supports both streaming and non-streaming responses, allowing you to integrate LLM capabilities into your applications using the same interface as OpenAI's API.
Loading...Authentication: Bearer token via
Authorization
header
LIST MODELS
GET /api/v1/models
Retrieve a list of available models. Only active models are returned.
Example Request
curl -X GET https://your-domain.com/api/v1/models \
-H "Authorization: Bearer YOUR_API_KEY"
import requests
url = "https://your-domain.com/api/v1/models"
headers = {
"Authorization": "Bearer YOUR_API_KEY"
}
response = requests.get(url, headers=headers)
models = response.json()
for model in models['data']:
print(f"Model: {model['id']}")
const response = await fetch('https://your-domain.com/api/v1/models', {
headers: {
'Authorization': 'Bearer YOUR_API_KEY'
}
});
const data = await response.json();
data.data.forEach(model => {
console.log(`Model: ${model.id}`);
});
Response Format
{
"object": "list",
"data": [
{
"id": "llama-2-7b-chat",
"object": "model",
"created": 1694268190,
"owned_by": "chungus",
"permission": [],
"root": "llama-2-7b-chat",
"parent": null
},
{
"id": "mistral-7b-instruct",
"object": "model",
"created": 1694268200,
"owned_by": "chungus",
"permission": [],
"root": "mistral-7b-instruct",
"parent": null
}
]
}
id field when making chat completion requests.
NON-STREAMING CHAT COMPLETIONS
POST /api/v1/chat/completions
Request Parameters
| Parameter | Type | Required | Description |
|---|---|---|---|
model |
string | Yes | Name of the model to use (e.g., "llama-2-7b-chat") |
messages |
array | Yes | Array of message objects with role and content |
stream |
boolean | No | Set to false or omit for non-streaming (default: false) |
temperature |
float | No | Sampling temperature (0.0 to 2.0, default: model default) |
max_tokens |
integer | No | Maximum tokens to generate (default: model default) |
top_p |
float | No | Nucleus sampling parameter |
top_k |
integer | No | Top-k sampling parameter |
Example Request
curl -X POST https://your-domain.com/api/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer YOUR_API_KEY" \
-d '{
"model": "llama-2-7b-chat",
"messages": [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "What is the capital of France?"
}
],
"temperature": 0.7,
"max_tokens": 512,
"stream": false
}'
import requests
url = "https://your-domain.com/api/v1/chat/completions"
headers = {
"Content-Type": "application/json",
"Authorization": "Bearer YOUR_API_KEY"
}
data = {
"model": "llama-2-7b-chat",
"messages": [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "What is the capital of France?"
}
],
"temperature": 0.7,
"max_tokens": 512,
"stream": False
}
response = requests.post(url, headers=headers, json=data)
result = response.json()
print(result["choices"][0]["message"]["content"])
const response = await fetch('https://your-domain.com/api/v1/chat/completions', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': 'Bearer YOUR_API_KEY'
},
body: JSON.stringify({
model: 'llama-2-7b-chat',
messages: [
{
role: 'system',
content: 'You are a helpful assistant.'
},
{
role: 'user',
content: 'What is the capital of France?'
}
],
temperature: 0.7,
max_tokens: 512,
stream: false
})
});
const result = await response.json();
console.log(result.choices[0].message.content);
Response Format
{
"id": "chatcmpl-123",
"object": "chat.completion",
"created": 1694268190,
"model": "llama-2-7b-chat",
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "The capital of France is Paris."
},
"finish_reason": "stop"
}
],
"usage": {
"prompt_tokens": 25,
"completion_tokens": 8,
"total_tokens": 33
}
}
STREAMING CHAT COMPLETIONS
POST /api/v1/chat/completionsNote: Set
"stream": true in the request body
Example Request
curl -X POST https://your-domain.com/api/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer YOUR_API_KEY" \
-d '{
"model": "llama-2-7b-chat",
"messages": [
{
"role": "user",
"content": "Write a short story about a robot."
}
],
"temperature": 0.7,
"max_tokens": 512,
"stream": true
}'
import requests
import json
url = "https://your-domain.com/api/v1/chat/completions"
headers = {
"Content-Type": "application/json",
"Authorization": "Bearer YOUR_API_KEY"
}
data = {
"model": "llama-2-7b-chat",
"messages": [
{
"role": "user",
"content": "Write a short story about a robot."
}
],
"temperature": 0.7,
"max_tokens": 512,
"stream": True
}
response = requests.post(url, headers=headers, json=data, stream=True)
for line in response.iter_lines():
if line:
line = line.decode('utf-8')
if line.startswith('data: '):
data_str = line[6:] # Remove 'data: ' prefix
if data_str == '[DONE]':
break
try:
chunk = json.loads(data_str)
if 'choices' in chunk and len(chunk['choices']) > 0:
delta = chunk['choices'][0].get('delta', {})
if 'content' in delta:
print(delta['content'], end='', flush=True)
except json.JSONDecodeError:
continue
const response = await fetch('https://your-domain.com/api/v1/chat/completions', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': 'Bearer YOUR_API_KEY'
},
body: JSON.stringify({
model: 'llama-2-7b-chat',
messages: [
{
role: 'user',
content: 'Write a short story about a robot.'
}
],
temperature: 0.7,
max_tokens: 512,
stream: true
})
});
const reader = response.body.getReader();
const decoder = new TextDecoder();
while (true) {
const { done, value } = await reader.read();
if (done) break;
const chunk = decoder.decode(value);
const lines = chunk.split('\\n');
for (const line of lines) {
if (line.startsWith('data: ')) {
const data = line.slice(6);
if (data === '[DONE]') break;
try {
const json = JSON.parse(data);
if (json.choices && json.choices[0].delta.content) {
process.stdout.write(json.choices[0].delta.content);
}
} catch (e) {
// Skip invalid JSON
}
}
}
}
Streaming Response Format
Streaming responses use Server-Sent Events (SSE) format. Each chunk is a JSON object prefixed with
data: .
data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1694268190,"model":"llama-2-7b-chat","choices":[{"index":0,"delta":{"content":"The"},"finish_reason":null}]}
data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1694268190,"model":"llama-2-7b-chat","choices":[{"index":0,"delta":{"content":" capital"},"finish_reason":null}]}
data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1694268190,"model":"llama-2-7b-chat","choices":[{"index":0,"delta":{"content":" of"},"finish_reason":null}]}
data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1694268190,"model":"llama-2-7b-chat","choices":[{"index":0,"delta":{"content":" France"},"finish_reason":null}]}
data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1694268190,"model":"llama-2-7b-chat","choices":[{"index":0,"delta":{"content":" is"},"finish_reason":null}]}
data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1694268190,"model":"llama-2-7b-chat","choices":[{"index":0,"delta":{"content":" Paris"},"finish_reason":null}]}
data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1694268190,"model":"llama-2-7b-chat","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"prompt_tokens":25,"completion_tokens":8,"total_tokens":33}}
data: [DONE]
IMAGE INPUT (MULTIMODAL)
POST /v1/chat/completionsRequirement: A vision-capable model (e.g., LLaVA, Qwen-VL, Phi-3-Vision)
To send images, set the message content field to an array of content parts instead of a plain
string. Each part has a type of either "text" or "image_url".
Images can be supplied as an HTTPS URL or as a base64-encoded data URI.
Content Part Format
| Field | Type | Description |
|---|---|---|
type |
string | "text" or "image_url" |
text |
string | The text content (when type is "text") |
image_url.url |
string | An HTTPS image URL or a base64 data URI (data:image/jpeg;base64,...) (when type is "image_url") |
Example — Image from URL
curl -X POST https://your-domain.com/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer YOUR_API_KEY" \
-d '{
"model": "llava-1.5-7b",
"messages": [
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {
"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/4/47/PNG_transparency_demonstration_1.png/280px-PNG_transparency_demonstration_1.png"
}
},
{
"type": "text",
"text": "What is in this image?"
}
]
}
]
}'
import requests
url = "https://your-domain.com/v1/chat/completions"
headers = {
"Content-Type": "application/json",
"Authorization": "Bearer YOUR_API_KEY"
}
data = {
"model": "llava-1.5-7b",
"messages": [
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {
"url": "https://example.com/image.jpg"
}
},
{
"type": "text",
"text": "What is in this image?"
}
]
}
]
}
response = requests.post(url, headers=headers, json=data)
print(response.json()["choices"][0]["message"]["content"])
const response = await fetch('https://your-domain.com/v1/chat/completions', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': 'Bearer YOUR_API_KEY'
},
body: JSON.stringify({
model: 'llava-1.5-7b',
messages: [
{
role: 'user',
content: [
{
type: 'image_url',
image_url: {
url: 'https://example.com/image.jpg'
}
},
{
type: 'text',
text: 'What is in this image?'
}
]
}
]
})
});
const result = await response.json();
console.log(result.choices[0].message.content);
Example — Base64 Image
IMAGE_B64=$(base64 -i /path/to/image.jpg)
curl -X POST https://your-domain.com/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer YOUR_API_KEY" \
-d "{
\"model\": \"llava-1.5-7b\",
\"messages\": [
{
\"role\": \"user\",
\"content\": [
{
\"type\": \"image_url\",
\"image_url\": {
\"url\": \"data:image/jpeg;base64,${IMAGE_B64}\"
}
},
{
\"type\": \"text\",
\"text\": \"Describe this image.\"
}
]
}
]
}"
import base64
import requests
with open("/path/to/image.jpg", "rb") as f:
image_b64 = base64.b64encode(f.read()).decode("utf-8")
url = "https://your-domain.com/v1/chat/completions"
headers = {
"Content-Type": "application/json",
"Authorization": "Bearer YOUR_API_KEY"
}
data = {
"model": "llava-1.5-7b",
"messages": [
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{image_b64}"
}
},
{
"type": "text",
"text": "Describe this image."
}
]
}
]
}
response = requests.post(url, headers=headers, json=data)
print(response.json()["choices"][0]["message"]["content"])
const fs = require('fs');
const imageBuffer = fs.readFileSync('/path/to/image.jpg');
const imageB64 = imageBuffer.toString('base64');
const response = await fetch('https://your-domain.com/v1/chat/completions', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': 'Bearer YOUR_API_KEY'
},
body: JSON.stringify({
model: 'llava-1.5-7b',
messages: [
{
role: 'user',
content: [
{
type: 'image_url',
image_url: {
url: `data:image/jpeg;base64,${imageB64}`
}
},
{
type: 'text',
text: 'Describe this image.'
}
]
}
]
})
});
const result = await response.json();
console.log(result.choices[0].message.content);
• The
content array can contain multiple image_url and text parts in any order.• Supported data URI formats:
data:image/jpeg;base64,..., data:image/png;base64,..., data:image/webp;base64,...• For URL-based images, the server fetches the image at inference time — the URL must be publicly accessible from the server.
• Only vision-capable models support image input. Use
GET /v1/models to list available models and select one that supports multimodal inputs.
EMBEDDINGS
POST /api/v1/embeddings
Generate embeddings for text inputs. Supports both single strings and arrays of strings. Embeddings are vector representations of text that can be used for semantic search, similarity, and other machine learning tasks.
Request Parameters
| Parameter | Type | Required | Description |
|---|---|---|---|
model |
string | Yes | Name of the embedding model to use (e.g., "nomic-embed-text") |
input |
string or array | Yes | Text to embed. Can be a single string or an array of strings |
Example Request
curl https://your-domain.com/api/v1/embeddings \
-H "Content-Type: application/json" \
-H "Authorization: Bearer YOUR_API_KEY" \
-d '{
"model": "nomic-embed-text",
"input": "Your text here"
}'
import requests
url = "https://your-domain.com/api/v1/embeddings"
headers = {
"Content-Type": "application/json",
"Authorization": "Bearer YOUR_API_KEY"
}
data = {
"model": "nomic-embed-text",
"input": "Your text here"
}
response = requests.post(url, headers=headers, json=data)
result = response.json()
embedding = result["data"][0]["embedding"]
print(f"Embedding dimension: {len(embedding)}")
const response = await fetch('https://your-domain.com/api/v1/embeddings', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': 'Bearer YOUR_API_KEY'
},
body: JSON.stringify({
model: 'nomic-embed-text',
input: 'Your text here'
})
});
const result = await response.json();
const embedding = result.data[0].embedding;
console.log(`Embedding dimension: ${embedding.length}`);
Multiple Inputs Example
curl https://your-domain.com/api/v1/embeddings \
-H "Content-Type: application/json" \
-H "Authorization: Bearer YOUR_API_KEY" \
-d '{
"model": "nomic-embed-text",
"input": [
"First text to embed",
"Second text to embed",
"Third text to embed"
]
}'
Response Format
{
"object": "list",
"data": [
{
"object": "embedding",
"index": 0,
"embedding": [
0.0023064255,
-0.009327292,
... (embedding vector values)
]
}
],
"model": "nomic-embed-text",
"usage": {
"prompt_tokens": 8,
"total_tokens": 8
}
}
MESSAGE FORMAT
Messages are arrays of objects with role and content fields.
| Role | Description |
|---|---|
system |
System message that sets the behavior of the assistant (optional) |
user |
User message containing the input/question |
assistant |
Assistant message (used for multi-turn conversations) |
Example Multi-Turn Conversation
{
"model": "llama-2-7b-chat",
"messages": [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "What is 2+2?"
},
{
"role": "assistant",
"content": "2+2 equals 4."
},
{
"role": "user",
"content": "What about 3+3?"
}
],
"temperature": 0.7,
"max_tokens": 512
}
ERROR HANDLING
Errors are returned in the following format:
{
"error": {
"message": "Model 'invalid-model' not found or not active",
"type": "invalid_request_error",
"code": "model_not_found"
}
}
•
model_not_found - The specified model doesn't exist or is inactive•
invalid_model_type - The specified model is not an embedding model (for embeddings endpoint)•
invalid_messages - Messages array is empty or invalid•
missing_input - Input field is missing (for embeddings endpoint)•
invalid_json - Request body is not valid JSON•
rate_limit_exceeded - API key rate limit exceeded