⚠️ Important: Authentication Required

All client operations now require authentication with the HF-Inferoxy server. This is part of the Role-Based Access Control (RBAC) system that provides secure access to the proxy services.

Getting Your API Key

Default Admin User: The system creates a default admin user on first run. Check your server logs or the users.json file for the default admin credentials.

Create a User Account: Use the admin account to create a regular user account:

curl -X POST "http://localhost:8000/admin/users" \
  -H "Authorization: Bearer ADMIN_API_KEY" \
  -H "Content-Type: application/json" \
  -d '{"username": "youruser", "email": "user@example.com", "full_name": "Your Name", "role": "user"}'

Use the Generated API Key: The response will include an API key that you’ll use in all client operations.

For detailed RBAC setup and user management, see RBAC_README.md.

Chat Completion (LLM)

Basic Usage

import os
from huggingface_hub import InferenceClient

client = InferenceClient(
    provider="nebius",
    api_key=os.environ["HF_TOKEN"],
)

completion = client.chat.completions.create(
    model="Qwen/Qwen3-235B-A22B-Instruct-2507",
    messages=[
        {
            "role": "user",
            "content": "What is the capital of France?"
        }
    ],
)

print(completion.choices[0].message)

cURL Example

curl https://router.huggingface.co/v1/chat/completions \
    -H "Authorization: Bearer $HF_TOKEN" \
    -H 'Content-Type: application/json' \
    -d '{
        "messages": [
            {
                "role": "user",
                "content": "What is the capital of France?"
            }
        ],
        "model": "Qwen/Qwen3-235B-A22B-Instruct-2507:nebius",
        "stream": false
    }'

With HF-Inferoxy Token Management

from huggingface_hub import InferenceClient
from huggingface_hub.errors import HfHubHTTPError
from hf_token_utils import get_proxy_token, report_token_status

def nebius_chat_llm(proxy_api_key: str = None):
    # Get token from proxy server (requires authentication)
    token, token_id = get_proxy_token(api_key=proxy_api_key)
    
    # Create client with managed token
    client = InferenceClient(
        provider="nebius",
        api_key=token
    )
    
    try:
        # Make chat completion request
        completion = client.chat.completions.create(
            model="Qwen/Qwen3-235B-A22B-Instruct-2507",
            messages=[
                {
                    "role": "user",
                    "content": "What is the capital of France?"
                }
            ],
        )
        
        # Report success
        report_token_status(token_id, "success", api_key=proxy_api_key)
        
        print(completion.choices[0].message)
        return completion
        
    except HfHubHTTPError as e:
        # Report the error
        report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
        raise
    except Exception as e:
        # Report generic error
        report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
        raise

# Usage
if __name__ == "__main__":
    # You need to get your API key from the admin or create a user account
    # See RBAC_README.md for details on user management
    proxy_api_key = "your_proxy_api_key_here"  # Get this from admin
    nebius_chat_llm(proxy_api_key)

Chat Completion (VLM)

Basic Usage

import os
from huggingface_hub import InferenceClient

client = InferenceClient(
    provider="nebius",
    api_key=os.environ["HF_TOKEN"],
)

completion = client.chat.completions.create(
    model="google/gemma-3-27b-it",
    messages=[
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": "Describe this image in one sentence."
                },
                {
                    "type": "image_url",
                    "image_url": {
                        "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
                    }
                }
            ]
        }
    ],
)

print(completion.choices[0].message)

cURL Example

curl https://router.huggingface.co/v1/chat/completions \
    -H "Authorization: Bearer $HF_TOKEN" \
    -H 'Content-Type: application/json' \
    -d '{
        "messages": [
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": "Describe this image in one sentence."
                    },
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
                        }
                    }
                ]
            }
        ],
        "model": "google/gemma-3-27b-it:nebius",
        "stream": false
    }'

With HF-Inferoxy Token Management

from huggingface_hub import InferenceClient
from huggingface_hub.errors import HfHubHTTPError
from hf_token_utils import get_proxy_token, report_token_status

def nebius_chat_vlm(proxy_api_key: str = None):
    # Get token from proxy server (requires authentication)
    token, token_id = get_proxy_token(api_key=proxy_api_key)
    
    # Create client with managed token
    client = InferenceClient(
        provider="nebius",
        api_key=token
    )
    
    try:
        # Make vision-language chat completion request
        completion = client.chat.completions.create(
            model="google/gemma-3-27b-it",
            messages=[
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "text",
                            "text": "Describe this image in one sentence."
                        },
                        {
                            "type": "image_url",
                            "image_url": {
                                "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
                            }
                        }
                    ]
                }
            ],
        )
        
        # Report success
        report_token_status(token_id, "success", api_key=proxy_api_key)
        
        print(completion.choices[0].message)
        return completion
        
    except HfHubHTTPError as e:
        # Report the error
        report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
        raise
    except Exception as e:
        # Report generic error
        report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
        raise

# Usage
if __name__ == "__main__":
    # You need to get your API key from the admin or create a user account
    # See RBAC_README.md for details on user management
    proxy_api_key = "your_proxy_api_key_here"  # Get this from admin
    nebius_chat_vlm(proxy_api_key)

Feature Extraction

Basic Usage

import os
from huggingface_hub import InferenceClient

client = InferenceClient(
    provider="nebius",
    api_key=os.environ["HF_TOKEN"],
)

result = client.feature_extraction(
    "Today is a sunny day and I will get some ice cream.",
    model="Qwen/Qwen3-Embedding-8B",
)

cURL Example

curl https://router.huggingface.co/nebius/v1/embeddings \
    -X POST \
    -H "Authorization: Bearer $HF_TOKEN" \
    -H 'Content-Type: application/json' \
    -d '{
        "input": "\"Today is a sunny day and I will get some ice cream.\"",
        "model": "Qwen/Qwen3-Embedding-8B"
    }'

With HF-Inferoxy Token Management

from huggingface_hub import InferenceClient
from huggingface_hub.errors import HfHubHTTPError
from hf_token_utils import get_proxy_token, report_token_status

def nebius_feature_extraction(text: str, model: str = "Qwen/Qwen3-Embedding-8B", proxy_api_key: str = None):
    # Get token from proxy server (requires authentication)
    token, token_id = get_proxy_token(api_key=proxy_api_key)
    
    # Create client with managed token
    client = InferenceClient(
        provider="nebius",
        api_key=token
    )
    
    try:
        # Extract features
        result = client.feature_extraction(text, model=model)
        
        # Report success
        report_token_status(token_id, "success", api_key=proxy_api_key)
        
        return result
        
    except HfHubHTTPError as e:
        # Report the error
        report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
        raise
    except Exception as e:
        # Report generic error
        report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
        raise

# Usage
if __name__ == "__main__":
    # You need to get your API key from the admin or create a user account
    # See RBAC_README.md for details on user management
    proxy_api_key = "your_proxy_api_key_here"  # Get this from admin
    embeddings = nebius_feature_extraction("Today is a sunny day and I will get some ice cream.", proxy_api_key=proxy_api_key)
    print(f"Embedding shape: {len(embeddings)}")

Text Generation

Basic Usage

import os
from huggingface_hub import InferenceClient

client = InferenceClient(
    provider="nebius",
    api_key=os.environ["HF_TOKEN"],
)

completion = client.chat.completions.create(
    model="Qwen/Qwen3-235B-A22B-Instruct-2507",
    messages=[
        {
            "role": "user",
            "content": "Can you please let us know more details about your request?"
        }
    ],
)

print(completion.choices[0].message)

cURL Example

curl https://router.huggingface.co/nebius/v1/completions \
    -H "Authorization: Bearer $HF_TOKEN" \
    -H 'Content-Type: application/json' \
    -d '{
        "messages": "\"Can you please let us know more details about your \"",
        "model": "Qwen/Qwen3-235B-A22B-Instruct-2507:nebius",
        "stream": false
    }'

With HF-Inferoxy Token Management

from huggingface_hub import InferenceClient
from huggingface_hub.errors import HfHubHTTPError
from hf_token_utils import get_proxy_token, report_token_status

def nebius_text_generation(prompt: str, model: str = "Qwen/Qwen3-235B-A22B-Instruct-2507", proxy_api_key: str = None):
    # Get token from proxy server (requires authentication)
    token, token_id = get_proxy_token(api_key=proxy_api_key)
    
    # Create client with managed token
    client = InferenceClient(
        provider="nebius",
        api_key=token
    )
    
    try:
        # Generate text
        completion = client.chat.completions.create(
            model=model,
            messages=[
                {
                    "role": "user",
                    "content": prompt
                }
            ],
        )
        
        # Report success
        report_token_status(token_id, "success", api_key=proxy_api_key)
        
        print(completion.choices[0].message)
        return completion
        
    except HfHubHTTPError as e:
        # Report the error
        report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
        raise
    except Exception as e:
        # Report generic error
        report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
        raise

# Usage
if __name__ == "__main__":
    # You need to get your API key from the admin or create a user account
    # See RBAC_README.md for details on user management
    proxy_api_key = "your_proxy_api_key_here"  # Get this from admin
    nebius_text_generation("Can you please let us know more details about your request?", proxy_api_key=proxy_api_key)

Text To Image

Basic Usage

import os
from huggingface_hub import InferenceClient

client = InferenceClient(
    provider="nebius",
    api_key=os.environ["HF_TOKEN"],
)

# output is a PIL.Image object
image = client.text_to_image(
    "Astronaut riding a horse",
    model="black-forest-labs/FLUX.1-dev",
)

cURL Example

curl -X POST "https://router.huggingface.co/nebius/v1/images/generations" \
  -H "Authorization: Bearer $HF_TOKEN" \
  -H "Content-Type: application/json" \
  -d '{
    "model": "black-forest-labs/flux-dev",
    "prompt": "Astronaut riding a horse",
    "response_format": "b64_json"
  }' \
| jq -r '.data[0].b64_json' \
| base64 --decode > astronaut.png

With HF-Inferoxy Token Management

from huggingface_hub import InferenceClient
from huggingface_hub.errors import HfHubHTTPError
from hf_token_utils import get_proxy_token, report_token_status

def nebius_text_to_image(prompt: str, model: str = "black-forest-labs/FLUX.1-dev", proxy_api_key: str = None):
    # Get token from proxy server (requires authentication)
    token, token_id = get_proxy_token(api_key=proxy_api_key)
    
    # Create client with managed token
    client = InferenceClient(
        provider="nebius",
        api_key=token
    )
    
    try:
        # Generate image from text
        image = client.text_to_image(prompt, model=model)
        
        # Report success
        report_token_status(token_id, "success", api_key=proxy_api_key)
        
        return image
        
    except HfHubHTTPError as e:
        # Report the error
        report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
        raise
    except Exception as e:
        # Report generic error
        report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
        raise

# Usage
if __name__ == "__main__":
    # You need to get your API key from the admin or create a user account
    # See RBAC_README.md for details on user management
    proxy_api_key = "your_proxy_api_key_here"  # Get this from admin
    image = nebius_text_to_image("Astronaut riding a horse", proxy_api_key=proxy_api_key)
    # image is a PIL.Image object
    print(f"Generated image: {image.size}")