⚠️ Important: Authentication Required
All client operations now require authentication with the HF-Inferoxy server. This is part of the Role-Based Access Control (RBAC) system that provides secure access to the proxy services.
Getting Your API Key
-
Default Admin User: The system creates a default admin user on first run. Check your server logs or the
users.json
file for the default admin credentials. - Create a User Account: Use the admin account to create a regular user account:
curl -X POST "http://localhost:8000/admin/users" \ -H "Authorization: Bearer ADMIN_API_KEY" \ -H "Content-Type: application/json" \ -d '{"username": "youruser", "email": "user@example.com", "full_name": "Your Name", "role": "user"}'
- Use the Generated API Key: The response will include an API key that you’ll use in all client operations.
For detailed RBAC setup and user management, see RBAC_README.md.
Chat Completion (LLM)
Basic Usage
import os
from huggingface_hub import InferenceClient
client = InferenceClient(
provider="sambanova",
api_key=os.environ["HF_TOKEN"],
)
completion = client.chat.completions.create(
model="meta-llama/Llama-3.1-8B-Instruct",
messages=[
{
"role": "user",
"content": "What is the capital of France?"
}
],
)
print(completion.choices[0].message)
cURL Example
curl https://router.huggingface.co/v1/chat/completions \
-H "Authorization: Bearer $HF_TOKEN" \
-H 'Content-Type: application/json' \
-d '{
"messages": [
{
"role": "user",
"content": "What is the capital of France?"
}
],
"model": "meta-llama/Llama-3.1-8B-Instruct:sambanova",
"stream": false
}'
With HF-Inferoxy Token Management
from huggingface_hub import InferenceClient
from huggingface_hub.errors import HfHubHTTPError
from hf_token_utils import get_proxy_token, report_token_status
def sambanova_chat_completion(messages: list, model: str = "meta-llama/Llama-3.1-8B-Instruct", proxy_api_key: str = None):
# Get token from proxy server (requires authentication)
token, token_id = get_proxy_token(api_key=proxy_api_key)
# Create client with managed token
client = InferenceClient(
provider="sambanova",
api_key=token
)
try:
# Make chat completion request
completion = client.chat.completions.create(
model=model,
messages=messages,
)
# Report success
report_token_status(token_id, "success", api_key=proxy_api_key)
print(completion.choices[0].message)
return completion
except HfHubHTTPError as e:
# Report the error
report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
raise
except Exception as e:
# Report generic error
report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
raise
# Usage
if __name__ == "__main__":
# You need to get your API key from the admin or create a user account
# See RBAC_README.md for details on user management
proxy_api_key = "your_proxy_api_key_here" # Get this from admin
messages = [
{
"role": "user",
"content": "What is the capital of France?"
}
]
sambanova_chat_completion(messages, proxy_api_key=proxy_api_key)
Chat Completion (VLM)
Basic Usage
import os
from huggingface_hub import InferenceClient
client = InferenceClient(
provider="sambanova",
api_key=os.environ["HF_TOKEN"],
)
completion = client.chat.completions.create(
model="meta-llama/Llama-4-Maverick-17B-128E-Instruct",
messages=[
{
"role": "user",
"content": [
{
"type": "text",
"text": "Describe this image in one sentence."
},
{
"type": "image_url",
"image_url": {
"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
}
}
]
}
],
)
print(completion.choices[0].message)
cURL Example
curl https://router.huggingface.co/v1/chat/completions \
-H "Authorization: Bearer $HF_TOKEN" \
-H 'Content-Type: application/json' \
-d '{
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": "Describe this image in one sentence."
},
{
"type": "image_url",
"image_url": {
"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
}
}
]
}
],
"model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct:sambanova",
"stream": false
}'
With HF-Inferoxy Token Management
from huggingface_hub import InferenceClient
from huggingface_hub.errors import HfHubHTTPError
from hf_token_utils import get_proxy_token, report_token_status
def sambanova_chat_vlm(proxy_api_key: str = None):
# Get token from proxy server (requires authentication)
token, token_id = get_proxy_token(api_key=proxy_api_key)
# Create client with managed token
client = InferenceClient(
provider="sambanova",
api_key=token
)
try:
# Make vision-language chat completion request
completion = client.chat.completions.create(
model="meta-llama/Llama-4-Maverick-17B-128E-Instruct",
messages=[
{
"role": "user",
"content": [
{
"type": "text",
"text": "Describe this image in one sentence."
},
{
"type": "image_url",
"image_url": {
"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
}
}
]
}
],
)
# Report success
report_token_status(token_id, "success", api_key=proxy_api_key)
print(completion.choices[0].message)
return completion
except HfHubHTTPError as e:
# Report the error
report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
raise
except Exception as e:
# Report generic error
report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
raise
# Usage
if __name__ == "__main__":
# You need to get your API key from the admin or create a user account
# See RBAC_README.md for details on user management
proxy_api_key = "your_proxy_api_key_here" # Get this from admin
sambanova_chat_vlm(proxy_api_key=proxy_api_key)
Feature Extraction
Basic Usage
import os
from huggingface_hub import InferenceClient
client = InferenceClient(
provider="sambanova",
api_key=os.environ["HF_TOKEN"],
)
result = client.feature_extraction(
"Today is a sunny day and I will get some ice cream.",
model="intfloat/e5-mistral-7b-instruct",
)
cURL Example
curl https://router.huggingface.co/sambanova/v1/embeddings \
-X POST \
-H "Authorization: Bearer $HF_TOKEN" \
-H 'Content-Type: application/json' \
-d '{
"model": "E5-Mistral-7B-Instruct",
"input": "\"Today is a sunny day and I will get some ice cream.\"",
"inputs": "\"Today is a sunny day and I will get some ice cream.\""
}'
With HF-Inferoxy Token Management
from huggingface_hub import InferenceClient
from huggingface_hub.errors import HfHubHTTPError
from hf_token_utils import get_proxy_token, report_token_status
def sambanova_feature_extraction(text: str, model: str = "intfloat/e5-mistral-7b-instruct", proxy_api_key: str = None):
# Get token from proxy server (requires authentication)
token, token_id = get_proxy_token(api_key=proxy_api_key)
# Create client with managed token
client = InferenceClient(
provider="sambanova",
api_key=token
)
try:
# Extract features from text
result = client.feature_extraction(text, model=model)
# Report success
report_token_status(token_id, "success", api_key=proxy_api_key)
return result
except HfHubHTTPError as e:
# Report the error
report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
raise
except Exception as e:
# Report generic error
report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
raise
# Usage
if __name__ == "__main__":
# You need to get your API key from the admin or create a user account
# See RBAC_README.md for details on user management
proxy_api_key = "your_proxy_api_key_here" # Get this from admin
result = sambanova_feature_extraction("Today is a sunny day and I will get some ice cream.", proxy_api_key=proxy_api_key)
print(f"Feature vector shape: {len(result)}")