OVERVIEW
CHUNGUS provides an OpenAI-compatible API for chat completions. The API supports both streaming and non-streaming responses, allowing you to integrate LLM capabilities into your applications using the same interface as OpenAI's API.
Loading...Authentication: Bearer token via
Authorization
header
LIST MODELS
GET /api/v1/models
Retrieve a list of available models. Only active models are returned.
Example Request
curl -X GET https://your-domain.com/api/v1/models \
-H "Authorization: Bearer YOUR_API_KEY"
import requests
url = "https://your-domain.com/api/v1/models"
headers = {
"Authorization": "Bearer YOUR_API_KEY"
}
response = requests.get(url, headers=headers)
models = response.json()
for model in models['data']:
print(f"Model: {model['id']}")
const response = await fetch('https://your-domain.com/api/v1/models', {
headers: {
'Authorization': 'Bearer YOUR_API_KEY'
}
});
const data = await response.json();
data.data.forEach(model => {
console.log(`Model: ${model.id}`);
});
Response Format
{
"object": "list",
"data": [
{
"id": "llama-2-7b-chat",
"object": "model",
"created": 1694268190,
"owned_by": "chungus",
"permission": [],
"root": "llama-2-7b-chat",
"parent": null
},
{
"id": "mistral-7b-instruct",
"object": "model",
"created": 1694268200,
"owned_by": "chungus",
"permission": [],
"root": "mistral-7b-instruct",
"parent": null
}
]
}
id field when making chat completion requests.
NON-STREAMING CHAT COMPLETIONS
POST /api/v1/chat/completions
Request Parameters
| Parameter | Type | Required | Description |
|---|---|---|---|
model |
string | Yes | Name of the model to use (e.g., "llama-2-7b-chat") |
messages |
array | Yes | Array of message objects with role and content |
stream |
boolean | No | Set to false or omit for non-streaming (default: false) |
temperature |
float | No | Sampling temperature (0.0 to 2.0, default: model default) |
max_tokens |
integer | No | Maximum tokens to generate (default: model default) |
top_p |
float | No | Nucleus sampling parameter |
top_k |
integer | No | Top-k sampling parameter |
Example Request
curl -X POST https://your-domain.com/api/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer YOUR_API_KEY" \
-d '{
"model": "llama-2-7b-chat",
"messages": [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "What is the capital of France?"
}
],
"temperature": 0.7,
"max_tokens": 512,
"stream": false
}'
import requests
url = "https://your-domain.com/api/v1/chat/completions"
headers = {
"Content-Type": "application/json",
"Authorization": "Bearer YOUR_API_KEY"
}
data = {
"model": "llama-2-7b-chat",
"messages": [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "What is the capital of France?"
}
],
"temperature": 0.7,
"max_tokens": 512,
"stream": False
}
response = requests.post(url, headers=headers, json=data)
result = response.json()
print(result["choices"][0]["message"]["content"])
const response = await fetch('https://your-domain.com/api/v1/chat/completions', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': 'Bearer YOUR_API_KEY'
},
body: JSON.stringify({
model: 'llama-2-7b-chat',
messages: [
{
role: 'system',
content: 'You are a helpful assistant.'
},
{
role: 'user',
content: 'What is the capital of France?'
}
],
temperature: 0.7,
max_tokens: 512,
stream: false
})
});
const result = await response.json();
console.log(result.choices[0].message.content);
Response Format
{
"id": "chatcmpl-123",
"object": "chat.completion",
"created": 1694268190,
"model": "llama-2-7b-chat",
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "The capital of France is Paris."
},
"finish_reason": "stop"
}
],
"usage": {
"prompt_tokens": 25,
"completion_tokens": 8,
"total_tokens": 33
}
}
STREAMING CHAT COMPLETIONS
POST /api/v1/chat/completionsNote: Set
"stream": true in the request body
Example Request
curl -X POST https://your-domain.com/api/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer YOUR_API_KEY" \
-d '{
"model": "llama-2-7b-chat",
"messages": [
{
"role": "user",
"content": "Write a short story about a robot."
}
],
"temperature": 0.7,
"max_tokens": 512,
"stream": true
}'
import requests
import json
url = "https://your-domain.com/api/v1/chat/completions"
headers = {
"Content-Type": "application/json",
"Authorization": "Bearer YOUR_API_KEY"
}
data = {
"model": "llama-2-7b-chat",
"messages": [
{
"role": "user",
"content": "Write a short story about a robot."
}
],
"temperature": 0.7,
"max_tokens": 512,
"stream": True
}
response = requests.post(url, headers=headers, json=data, stream=True)
for line in response.iter_lines():
if line:
line = line.decode('utf-8')
if line.startswith('data: '):
data_str = line[6:] # Remove 'data: ' prefix
if data_str == '[DONE]':
break
try:
chunk = json.loads(data_str)
if 'choices' in chunk and len(chunk['choices']) > 0:
delta = chunk['choices'][0].get('delta', {})
if 'content' in delta:
print(delta['content'], end='', flush=True)
except json.JSONDecodeError:
continue
const response = await fetch('https://your-domain.com/api/v1/chat/completions', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': 'Bearer YOUR_API_KEY'
},
body: JSON.stringify({
model: 'llama-2-7b-chat',
messages: [
{
role: 'user',
content: 'Write a short story about a robot.'
}
],
temperature: 0.7,
max_tokens: 512,
stream: true
})
});
const reader = response.body.getReader();
const decoder = new TextDecoder();
while (true) {
const { done, value } = await reader.read();
if (done) break;
const chunk = decoder.decode(value);
const lines = chunk.split('\\n');
for (const line of lines) {
if (line.startsWith('data: ')) {
const data = line.slice(6);
if (data === '[DONE]') break;
try {
const json = JSON.parse(data);
if (json.choices && json.choices[0].delta.content) {
process.stdout.write(json.choices[0].delta.content);
}
} catch (e) {
// Skip invalid JSON
}
}
}
}
Streaming Response Format
Streaming responses use Server-Sent Events (SSE) format. Each chunk is a JSON object prefixed with
data: .
data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1694268190,"model":"llama-2-7b-chat","choices":[{"index":0,"delta":{"content":"The"},"finish_reason":null}]}
data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1694268190,"model":"llama-2-7b-chat","choices":[{"index":0,"delta":{"content":" capital"},"finish_reason":null}]}
data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1694268190,"model":"llama-2-7b-chat","choices":[{"index":0,"delta":{"content":" of"},"finish_reason":null}]}
data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1694268190,"model":"llama-2-7b-chat","choices":[{"index":0,"delta":{"content":" France"},"finish_reason":null}]}
data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1694268190,"model":"llama-2-7b-chat","choices":[{"index":0,"delta":{"content":" is"},"finish_reason":null}]}
data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1694268190,"model":"llama-2-7b-chat","choices":[{"index":0,"delta":{"content":" Paris"},"finish_reason":null}]}
data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1694268190,"model":"llama-2-7b-chat","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"prompt_tokens":25,"completion_tokens":8,"total_tokens":33}}
data: [DONE]
EMBEDDINGS
POST /api/v1/embeddings
Generate embeddings for text inputs. Supports both single strings and arrays of strings. Embeddings are vector representations of text that can be used for semantic search, similarity, and other machine learning tasks.
Request Parameters
| Parameter | Type | Required | Description |
|---|---|---|---|
model |
string | Yes | Name of the embedding model to use (e.g., "nomic-embed-text") |
input |
string or array | Yes | Text to embed. Can be a single string or an array of strings |
Example Request
curl https://your-domain.com/api/v1/embeddings \
-H "Content-Type: application/json" \
-H "Authorization: Bearer YOUR_API_KEY" \
-d '{
"model": "nomic-embed-text",
"input": "Your text here"
}'
import requests
url = "https://your-domain.com/api/v1/embeddings"
headers = {
"Content-Type": "application/json",
"Authorization": "Bearer YOUR_API_KEY"
}
data = {
"model": "nomic-embed-text",
"input": "Your text here"
}
response = requests.post(url, headers=headers, json=data)
result = response.json()
embedding = result["data"][0]["embedding"]
print(f"Embedding dimension: {len(embedding)}")
const response = await fetch('https://your-domain.com/api/v1/embeddings', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': 'Bearer YOUR_API_KEY'
},
body: JSON.stringify({
model: 'nomic-embed-text',
input: 'Your text here'
})
});
const result = await response.json();
const embedding = result.data[0].embedding;
console.log(`Embedding dimension: ${embedding.length}`);
Multiple Inputs Example
curl https://your-domain.com/api/v1/embeddings \
-H "Content-Type: application/json" \
-H "Authorization: Bearer YOUR_API_KEY" \
-d '{
"model": "nomic-embed-text",
"input": [
"First text to embed",
"Second text to embed",
"Third text to embed"
]
}'
Response Format
{
"object": "list",
"data": [
{
"object": "embedding",
"index": 0,
"embedding": [
0.0023064255,
-0.009327292,
... (embedding vector values)
]
}
],
"model": "nomic-embed-text",
"usage": {
"prompt_tokens": 8,
"total_tokens": 8
}
}
MESSAGE FORMAT
Messages are arrays of objects with role and content fields.
| Role | Description |
|---|---|
system |
System message that sets the behavior of the assistant (optional) |
user |
User message containing the input/question |
assistant |
Assistant message (used for multi-turn conversations) |
Example Multi-Turn Conversation
{
"model": "llama-2-7b-chat",
"messages": [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "What is 2+2?"
},
{
"role": "assistant",
"content": "2+2 equals 4."
},
{
"role": "user",
"content": "What about 3+3?"
}
],
"temperature": 0.7,
"max_tokens": 512
}
ERROR HANDLING
Errors are returned in the following format:
{
"error": {
"message": "Model 'invalid-model' not found or not active",
"type": "invalid_request_error",
"code": "model_not_found"
}
}
•
model_not_found - The specified model doesn't exist or is inactive•
invalid_model_type - The specified model is not an embedding model (for embeddings endpoint)•
invalid_messages - Messages array is empty or invalid•
missing_input - Input field is missing (for embeddings endpoint)•
invalid_json - Request body is not valid JSON•
rate_limit_exceeded - API key rate limit exceeded