⚠️ Important: Authentication Required

All client operations now require authentication with the HF-Inferoxy server. This is part of the Role-Based Access Control (RBAC) system that provides secure access to the proxy services.

Getting Your API Key

Default Admin User: The system creates a default admin user on first run. Check your server logs or the users.json file for the default admin credentials.

Create a User Account: Use the admin account to create a regular user account:

curl -X POST "http://localhost:8000/admin/users" \
  -H "Authorization: Bearer ADMIN_API_KEY" \
  -H "Content-Type: application/json" \
  -d '{"username": "youruser", "email": "user@example.com", "full_name": "Your Name", "role": "user"}'

Use the Generated API Key: The response will include an API key that you’ll use in all client operations.

For detailed RBAC setup and user management, see RBAC_README.md.

Automatic Speech Recognition

Basic Usage

import os
from huggingface_hub import InferenceClient

client = InferenceClient(
    provider="hf-inference",
    api_key=os.environ["HF_TOKEN"],
)

output = client.automatic_speech_recognition("sample1.flac", model="openai/whisper-large-v3")

cURL Example

curl https://router.huggingface.co/hf-inference/models/openai/whisper-large-v3 \
    -X POST \
    -H "Authorization: Bearer $HF_TOKEN" \
    -H 'Content-Type: audio/flac' \
    --data-binary @"sample1.flac"

With HF-Inferoxy Token Management

from huggingface_hub import InferenceClient
from huggingface_hub.errors import HfHubHTTPError
from hf_token_utils import get_proxy_token, report_token_status

def transcribe_audio(audio_file: str, model: str = "openai/whisper-large-v3", proxy_api_key: str = None):
    # Get token from proxy server (requires authentication)
    token, token_id = get_proxy_token(api_key=proxy_api_key)
    
    # Create client with managed token
    client = InferenceClient(
        provider="hf-inference",
        api_key=token
    )
    
    try:
        # Transcribe audio
        result = client.automatic_speech_recognition(audio_file, model=model)
        
        # Report success
        report_token_status(token_id, "success", api_key=proxy_api_key)
        
        return result
        
    except HfHubHTTPError as e:
        report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
        raise
    except Exception as e:
        report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
        raise

# Usage
# You need to get your API key from the admin or create a user account
# See RBAC_README.md for details on user management
proxy_api_key = "your_proxy_api_key_here"  # Get this from admin
output = transcribe_audio("sample1.flac", proxy_api_key=proxy_api_key)

Chat Completion (LLM)

Basic Usage

import os
from huggingface_hub import InferenceClient

client = InferenceClient(
    provider="hf-inference",
    api_key=os.environ["HF_TOKEN"],
)

completion = client.chat.completions.create(
    model="HuggingFaceTB/SmolLM3-3B",
    messages=[
        {
            "role": "user",
            "content": "What is the capital of France?"
        }
    ],
)

print(completion.choices[0].message)

cURL Example

curl https://router.huggingface.co/v1/chat/completions \
    -H "Authorization: Bearer $HF_TOKEN" \
    -H 'Content-Type: application/json' \
    -d '{
        "messages": [
            {
                "role": "user",
                "content": "What is the capital of France?"
            }
        ],
        "model": "HuggingFaceTB/SmolLM3-3B:hf-inference",
        "stream": false
    }'

With HF-Inferoxy Token Management

from huggingface_hub import InferenceClient
from huggingface_hub.errors import HfHubHTTPError
from hf_token_utils import get_proxy_token, report_token_status

def chat_completion(messages: list, model: str = "HuggingFaceTB/SmolLM3-3B", proxy_api_key: str = None):
    # Get token from proxy server (requires authentication)
    token, token_id = get_proxy_token(api_key=proxy_api_key)
    
    # Create client with managed token
    client = InferenceClient(
        provider="hf-inference",
        api_key=token
    )
    
    try:
        # Make chat completion request
        completion = client.chat.completions.create(
            model=model,
            messages=messages
        )
        
        # Report success
        report_token_status(token_id, "success", api_key=proxy_api_key)
        
        print(completion.choices[0].message)
        return completion
        
    except HfHubHTTPError as e:
        report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
        raise
    except Exception as e:
        report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
        raise

# Usage
# You need to get your API key from the admin or create a user account
# See RBAC_README.md for details on user management
proxy_api_key = "your_proxy_api_key_here"  # Get this from admin
messages = [{"role": "user", "content": "What is the capital of France?"}]
completion = chat_completion(messages, proxy_api_key=proxy_api_key)

Feature Extraction

Basic Usage

import os
from huggingface_hub import InferenceClient

client = InferenceClient(
    provider="hf-inference",
    api_key=os.environ["HF_TOKEN"],
)

result = client.feature_extraction(
    "Today is a sunny day and I will get some ice cream.",
    model="intfloat/multilingual-e5-large",
)

cURL Example

curl https://router.huggingface.co/hf-inference/models/intfloat/multilingual-e5-large/pipeline/feature-extraction \
    -X POST \
    -H "Authorization: Bearer $HF_TOKEN" \
    -H 'Content-Type: application/json' \
    -d '{
        "inputs": "\"Today is a sunny day and I will get some ice cream.\""
    }'

With HF-Inferoxy Token Management

from huggingface_hub import InferenceClient
from huggingface_hub.errors import HfHubHTTPError
from hf_token_utils import get_proxy_token, report_token_status

def extract_features(text: str, model: str = "intfloat/multilingual-e5-large", proxy_api_key: str = None):
    # Get managed token (requires authentication)
    token, token_id = get_proxy_token(api_key=proxy_api_key)
    
    # Create client
    client = InferenceClient(provider="hf-inference", api_key=token)
    
    try:
        # Extract features
        result = client.feature_extraction(text, model=model)
        
        # Report success
        report_token_status(token_id, "success", api_key=proxy_api_key)
        
        return result
        
    except HfHubHTTPError as e:
        report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
        raise
    except Exception as e:
        report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
        raise

# Usage
# You need to get your API key from the admin or create a user account
# See RBAC_README.md for details on user management
proxy_api_key = "your_proxy_api_key_here"  # Get this from admin
embeddings = extract_features("Today is a sunny day and I will get some ice cream.", proxy_api_key=proxy_api_key)

Fill Mask

Basic Usage

import os
from huggingface_hub import InferenceClient

client = InferenceClient(
    provider="hf-inference",
    api_key=os.environ["HF_TOKEN"],
)

result = client.fill_mask(
    "The answer to the universe is undefined.",
    model="google-bert/bert-base-uncased",
)

cURL Example

curl https://router.huggingface.co/hf-inference/models/google-bert/bert-base-uncased \
    -X POST \
    -H "Authorization: Bearer $HF_TOKEN" \
    -H 'Content-Type: application/json' \
    -d '{
        "inputs": "\"The answer to the universe is undefined.\""
    }'

With HF-Inferoxy Token Management

from huggingface_hub import InferenceClient
from huggingface_hub.errors import HfHubHTTPError
from hf_token_utils import get_proxy_token, report_token_status

def fill_mask_task(text: str, model: str = "google-bert/bert-base-uncased", proxy_api_key: str = None):
    # Get managed token (requires authentication)
    token, token_id = get_proxy_token(api_key=proxy_api_key)
    
    # Create client
    client = InferenceClient(provider="hf-inference", api_key=token)
    
    try:
        # Fill mask
        result = client.fill_mask(text, model=model)
        
        # Report success
        report_token_status(token_id, "success", api_key=proxy_api_key)
        
        return result
        
    except HfHubHTTPError as e:
        report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
        raise
    except Exception as e:
        report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
        raise

# Usage
# You need to get your API key from the admin or create a user account
# See RBAC_README.md for details on user management
proxy_api_key = "your_proxy_api_key_here"  # Get this from admin
result = fill_mask_task("The answer to the universe is undefined.", proxy_api_key=proxy_api_key)

Image Classification

Basic Usage

import os
from huggingface_hub import InferenceClient

client = InferenceClient(
    provider="hf-inference",
    api_key=os.environ["HF_TOKEN"],
)

output = client.image_classification("cats.jpg", model="Falconsai/nsfw_image_detection")

cURL Example

curl https://router.huggingface.co/hf-inference/models/Falconsai/nsfw_image_detection \
    -X POST \
    -H "Authorization: Bearer $HF_TOKEN" \
    -H 'Content-Type: image/jpeg' \
    --data-binary @"cats.jpg"

With HF-Inferoxy Token Management

from huggingface_hub import InferenceClient
from huggingface_hub.errors import HfHubHTTPError
from hf_token_utils import get_proxy_token, report_token_status

def classify_image(image_file: str, model: str = "Falconsai/nsfw_image_detection", proxy_api_key: str = None):
    # Get managed token (requires authentication)
    token, token_id = get_proxy_token(api_key=proxy_api_key)
    
    # Create client
    client = InferenceClient(provider="hf-inference", api_key=token)
    
    try:
        # Classify image
        result = client.image_classification(image_file, model=model)
        
        # Report success
        report_token_status(token_id, "success", api_key=proxy_api_key)
        
        return result
        
    except HfHubHTTPError as e:
        report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
        raise
    except Exception as e:
        report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
        raise

# Usage
# You need to get your API key from the admin or create a user account
# See RBAC_README.md for details on user management
proxy_api_key = "your_proxy_api_key_here"  # Get this from admin
output = classify_image("cats.jpg", proxy_api_key=proxy_api_key)

Image Segmentation

Basic Usage

import os
from huggingface_hub import InferenceClient

client = InferenceClient(
    provider="hf-inference",
    api_key=os.environ["HF_TOKEN"],
)

output = client.image_segmentation("cats.jpg", model="jonathandinu/face-parsing")

cURL Example

curl https://router.huggingface.co/hf-inference/models/jonathandinu/face-parsing \
    -X POST \
    -H "Authorization: Bearer $HF_TOKEN" \
    -H 'Content-Type: image/jpeg' \
    --data-binary @"cats.jpg"

With HF-Inferoxy Token Management

from huggingface_hub import InferenceClient
from huggingface_hub.errors import HfHubHTTPError
from hf_token_utils import get_proxy_token, report_token_status

def segment_image(image_file: str, model: str = "jonathandinu/face-parsing", proxy_api_key: str = None):
    # Get managed token (requires authentication)
    token, token_id = get_proxy_token(api_key=proxy_api_key)
    
    # Create client
    client = InferenceClient(provider="hf-inference", api_key=token)
    
    try:
        # Segment image
        result = client.image_segmentation(image_file, model=model)
        
        # Report success
        report_token_status(token_id, "success", api_key=proxy_api_key)
        
        return result
        
    except HfHubHTTPError as e:
        report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
        raise
    except Exception as e:
        report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
        raise

# Usage
# You need to get your API key from the admin or create a user account
# See RBAC_README.md for details on user management
proxy_api_key = "your_proxy_api_key_here"  # Get this from admin
output = segment_image("cats.jpg", proxy_api_key=proxy_api_key)

Object Detection

Basic Usage

import os
from huggingface_hub import InferenceClient

client = InferenceClient(
    provider="hf-inference",
    api_key=os.environ["HF_TOKEN"],
)

output = client.object_detection("cats.jpg", model="facebook/detr-resnet-50")

cURL Example

curl https://router.huggingface.co/hf-inference/models/facebook/detr-resnet-50 \
    -X POST \
    -H "Authorization: Bearer $HF_TOKEN" \
    -H 'Content-Type: image/jpeg' \
    --data-binary @"cats.jpg"

With HF-Inferoxy Token Management

from huggingface_hub import InferenceClient
from huggingface_hub.errors import HfHubHTTPError
from hf_token_utils import get_proxy_token, report_token_status

def detect_objects(image_file: str, model: str = "facebook/detr-resnet-50", proxy_api_key: str = None):
    # Get managed token (requires authentication)
    token, token_id = get_proxy_token(api_key=proxy_api_key)
    
    # Create client
    client = InferenceClient(provider="hf-inference", api_key=token)
    
    try:
        # Detect objects
        result = client.object_detection(image_file, model=model)
        
        # Report success
        report_token_status(token_id, "success", api_key=proxy_api_key)
        
        return result
        
    except HfHubHTTPError as e:
        report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
        raise
    except Exception as e:
        report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
        raise

# Usage
# You need to get your API key from the admin or create a user account
# See RBAC_README.md for details on user management
proxy_api_key = "your_proxy_api_key_here"  # Get this from admin
output = detect_objects("cats.jpg", proxy_api_key=proxy_api_key)

Question Answering

Basic Usage

import os
from huggingface_hub import InferenceClient

client = InferenceClient(
    provider="hf-inference",
    api_key=os.environ["HF_TOKEN"],
)

answer = client.question_answering(
    question="What is my name?",
    context="My name is Clara and I live in Berkeley.",
    model="deepset/roberta-base-squad2",
)

With HF-Inferoxy Token Management

from huggingface_hub import InferenceClient
from huggingface_hub.errors import HfHubHTTPError
from hf_token_utils import get_proxy_token, report_token_status

def answer_question(question: str, context: str, model: str = "deepset/roberta-base-squad2", proxy_api_key: str = None):
    # Get managed token (requires authentication)
    token, token_id = get_proxy_token(api_key=proxy_api_key)
    
    # Create client
    client = InferenceClient(provider="hf-inference", api_key=token)
    
    try:
        # Answer question
        result = client.question_answering(
            question=question,
            context=context,
            model=model
        )
        
        # Report success
        report_token_status(token_id, "success", api_key=proxy_api_key)
        
        return result
        
    except HfHubHTTPError as e:
        report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
        raise
    except Exception as e:
        report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
        raise

# Usage
# You need to get your API key from the admin or create a user account
# See RBAC_README.md for details on user management
proxy_api_key = "your_proxy_api_key_here"  # Get this from admin
answer = answer_question("What is my name?", "My name is Clara and I live in Berkeley.", proxy_api_key=proxy_api_key)

Summarization

Basic Usage

import os
from huggingface_hub import InferenceClient

client = InferenceClient(
    provider="hf-inference",
    api_key=os.environ["HF_TOKEN"],
)

result = client.summarization(
    "The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct.",
    model="facebook/bart-large-cnn",
)

cURL Example

curl https://router.huggingface.co/hf-inference/models/facebook/bart-large-cnn \
    -X POST \
    -H "Authorization: Bearer $HF_TOKEN" \
    -H 'Content-Type: application/json' \
    -d '{
        "inputs": "\"The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct.\""
    }'

With HF-Inferoxy Token Management

from huggingface_hub import InferenceClient
from huggingface_hub.errors import HfHubHTTPError
from hf_token_utils import get_proxy_token, report_token_status

def summarize_text(text: str, model: str = "facebook/bart-large-cnn", proxy_api_key: str = None):
    # Get managed token (requires authentication)
    token, token_id = get_proxy_token(api_key=proxy_api_key)
    
    # Create client
    client = InferenceClient(provider="hf-inference", api_key=token)
    
    try:
        # Summarize text
        result = client.summarization(text, model=model)
        
        # Report success
        report_token_status(token_id, "success", api_key=proxy_api_key)
        
        return result
        
    except HfHubHTTPError as e:
        report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
        raise
    except Exception as e:
        report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
        raise

# Usage
# You need to get your API key from the admin or create a user account
# See RBAC_README.md for details on user management
proxy_api_key = "your_proxy_api_key_here"  # Get this from admin
long_text = "The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct."
result = summarize_text(long_text, proxy_api_key=proxy_api_key)

Table Question Answering

Basic Usage

import os
from huggingface_hub import InferenceClient

client = InferenceClient(
    provider="hf-inference",
    api_key=os.environ["HF_TOKEN"],
)

answer = client.table_question_answering(
    query="How many stars does the transformers repository have?",
    table={"Repository":["Transformers","Datasets","Tokenizers"],"Stars":["36542","4512","3934"],"Contributors":["651","77","34"],"Programming language":["Python","Python","Rust, Python and NodeJS"]},
    model="google/tapas-base-finetuned-wtq",
)

With HF-Inferoxy Token Management

from huggingface_hub import InferenceClient
from huggingface_hub.errors import HfHubHTTPError
from hf_token_utils import get_proxy_token, report_token_status

def answer_table_question(query: str, table: dict, model: str = "google/tapas-base-finetuned-wtq", proxy_api_key: str = None):
    # Get managed token (requires authentication)
    token, token_id = get_proxy_token(api_key=proxy_api_key)
    
    # Create client
    client = InferenceClient(provider="hf-inference", api_key=token)
    
    try:
        # Answer table question
        result = client.table_question_answering(
            query=query,
            table=table,
            model=model
        )
        
        # Report success
        report_token_status(token_id, "success", api_key=proxy_api_key)
        
        return result
        
    except HfHubHTTPError as e:
        report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
        raise
    except Exception as e:
        report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
        raise

# Usage
# You need to get your API key from the admin or create a user account
# See RBAC_README.md for details on user management
proxy_api_key = "your_proxy_api_key_here"  # Get this from admin
table_data = {
    "Repository": ["Transformers", "Datasets", "Tokenizers"],
    "Stars": ["36542", "4512", "3934"],
    "Contributors": ["651", "77", "34"],
    "Programming language": ["Python", "Python", "Rust, Python and NodeJS"]
}
answer = answer_table_question("How many stars does the transformers repository have?", table_data, proxy_api_key=proxy_api_key)

Text Classification

Basic Usage

import os
from huggingface_hub import InferenceClient

client = InferenceClient(
    provider="hf-inference",
    api_key=os.environ["HF_TOKEN"],
)

result = client.text_classification(
    "I like you. I love you",
    model="tabularisai/multilingual-sentiment-analysis",
)

cURL Example

curl https://router.huggingface.co/hf-inference/models/tabularisai/multilingual-sentiment-analysis \
    -X POST \
    -H "Authorization: Bearer $HF_TOKEN" \
    -H 'Content-Type: application/json' \
    -d '{
        "inputs": "\"I like you. I love you\""
    }'

With HF-Inferoxy Token Management

from huggingface_hub import InferenceClient
from huggingface_hub.errors import HfHubHTTPError
from hf_token_utils import get_proxy_token, report_token_status

def classify_text(text: str, model: str = "tabularisai/multilingual-sentiment-analysis", proxy_api_key: str = None):
    # Get managed token (requires authentication)
    token, token_id = get_proxy_token(api_key=proxy_api_key)
    
    # Create client
    client = InferenceClient(provider="hf-inference", api_key=token)
    
    try:
        # Classify text
        result = client.text_classification(text, model=model)
        
        # Report success
        report_token_status(token_id, "success", api_key=proxy_api_key)
        
        return result
        
    except HfHubHTTPError as e:
        report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
        raise
    except Exception as e:
        report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
        raise

# Usage
# You need to get your API key from the admin or create a user account
# See RBAC_README.md for details on user management
proxy_api_key = "your_proxy_api_key_here"  # Get this from admin
result = classify_text("I like you. I love you", proxy_api_key=proxy_api_key)

Text Generation

Basic Usage

import os
from huggingface_hub import InferenceClient

client = InferenceClient(
    provider="hf-inference",
    api_key=os.environ["HF_TOKEN"],
)

completion = client.chat.completions.create(
    model="HuggingFaceTB/SmolLM3-3B",
    messages=[
        {
            "role": "user",
            "content": "Can you please let us know more details about your project?"
        }
    ],
)

print(completion.choices[0].message)

cURL Example

curl https://router.huggingface.co/hf-inference/models/HuggingFaceTB/SmolLM3-3B \
    -H "Authorization: Bearer $HF_TOKEN" \
    -H 'Content-Type: application/json' \
    -d '{
        "messages": "\"Can you please let us know more details about your \"",
        "model": "HuggingFaceTB/SmolLM3-3B:hf-inference",
        "stream": false
    }'

With HF-Inferoxy Token Management

from huggingface_hub import InferenceClient
from huggingface_hub.errors import HfHubHTTPError
from hf_token_utils import get_proxy_token, report_token_status

def generate_text(messages: str, model: str = "HuggingFaceTB/SmolLM3-3B", proxy_api_key: str = None):
    # Get managed token (requires authentication)
    token, token_id = get_proxy_token(api_key=proxy_api_key)
    
    # Create client
    client = InferenceClient(provider="hf-inference", api_key=token)
    
    try:
        # Generate text
        completion = client.chat.completions.create(
            model=model,
            messages=[
                {
                    "role": "user",
                    "content": messages
                }
            ]
        )
        
        # Report success
        report_token_status(token_id, "success", api_key=proxy_api_key)
        
        print(completion.choices[0].message)
        return completion
        
    except HfHubHTTPError as e:
        report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
        raise
    except Exception as e:
        report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
        raise

# Usage
# You need to get your API key from the admin or create a user account
# See RBAC_README.md for details on user management
proxy_api_key = "your_proxy_api_key_here"  # Get this from admin
completion = generate_text("Can you please let us know more details about your project?", proxy_api_key=proxy_api_key)

Text To Image

Basic Usage

import os
from huggingface_hub import InferenceClient

client = InferenceClient(
    provider="hf-inference",
    api_key=os.environ["HF_TOKEN"],
)

# output is a PIL.Image object
image = client.text_to_image(
    "Astronaut riding a horse",
    model="stabilityai/stable-diffusion-xl-base-1.0",
)

cURL Example

curl -X POST "https://router.huggingface.co/hf-inference/models/stabilityai/stable-diffusion-xl-base-1.0" \
  -H "Authorization: Bearer $HF_TOKEN" \
  -H "Content-Type: application/json" \
  -d '{
    "inputs": "Astronaut riding a horse"
  }' -o astronaut.jpeg

With HF-Inferoxy Token Management

from huggingface_hub import InferenceClient
from huggingface_hub.errors import HfHubHTTPError
from hf_token_utils import get_proxy_token, report_token_status

def generate_image(prompt: str, model: str = "stabilityai/stable-diffusion-xl-base-1.0", proxy_api_key: str = None):
    # Get managed token (requires authentication)
    token, token_id = get_proxy_token(api_key=proxy_api_key)
    
    # Create client
    client = InferenceClient(provider="hf-inference", api_key=token)
    
    try:
        # Generate image
        image = client.text_to_image(prompt, model=model)
        
        # Report success
        report_token_status(token_id, "success", api_key=proxy_api_key)
        
        return image
        
    except HfHubHTTPError as e:
        report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
        raise
    except Exception as e:
        report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
        raise

# Usage
# You need to get your API key from the admin or create a user account
# See RBAC_README.md for details on user management
proxy_api_key = "your_proxy_api_key_here"  # Get this from admin
image = generate_image("Astronaut riding a horse", proxy_api_key=proxy_api_key)

Token Classification

Basic Usage

import os
from huggingface_hub import InferenceClient

client = InferenceClient(
    provider="hf-inference",
    api_key=os.environ["HF_TOKEN"],
)

result = client.token_classification(
    "My name is Sarah Jessica Parker but you can call me Jessica",
    model="dslim/bert-base-NER",
)

cURL Example

curl https://router.huggingface.co/hf-inference/models/dslim/bert-base-NER \
    -X POST \
    -H "Authorization: Bearer $HF_TOKEN" \
    -H 'Content-Type: application/json' \
    -d '{
        "inputs": "\"My name is Sarah Jessica Parker but you can call me Jessica\""
    }'

With HF-Inferoxy Token Management

from huggingface_hub import InferenceClient
from huggingface_hub.errors import HfHubHTTPError
from hf_token_utils import get_proxy_token, report_token_status

def classify_tokens(text: str, model: str = "dslim/bert-base-NER", proxy_api_key: str = None):
    # Get managed token (requires authentication)
    token, token_id = get_proxy_token(api_key=proxy_api_key)
    # Create client
    client = InferenceClient(provider="hf-inference", api_key=token)
    
    try:
        # Classify tokens
        result = client.token_classification(text, model=model)
        
        # Report success
        report_token_status(token_id, "success", api_key=proxy_api_key)
        
        return result
        
    except HfHubHTTPError as e:
        report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
        raise
    except Exception as e:
        report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
        raise

# Usage
# You need to get your API key from the admin or create a user account
# See RBAC_README.md for details on user management
proxy_api_key = "your_proxy_api_key_here"  # Get this from admin
result = classify_tokens("My name is Sarah Jessica Parker but you can call me Jessica", proxy_api_key=proxy_api_key)

Translation

Basic Usage

import os
from huggingface_hub import InferenceClient

client = InferenceClient(
    provider="hf-inference",
    api_key=os.environ["HF_TOKEN"],
)

result = client.translation(
    "Меня зовут Вольфганг и я живу в Берлине",
    model="google-t5/t5-small",
)

cURL Example

curl https://router.huggingface.co/hf-inference/models/google-t5/t5-small \
    -X POST \
    -H "Authorization: Bearer $HF_TOKEN" \
    -H 'Content-Type: application/json' \
    -d '{
        "inputs": "\"Меня зовут Вольфганг и я живу в Берлине\""
    }'

With HF-Inferoxy Token Management

from huggingface_hub import InferenceClient
from huggingface_hub.errors import HfHubHTTPError
from hf_token_utils import get_proxy_token, report_token_status

def translate_text(text: str, model: str = "google-t5/t5-small", proxy_api_key: str = None):
    # Get managed token (requires authentication)
    token, token_id = get_proxy_token(api_key=proxy_api_key)
    
    # Create client
    client = InferenceClient(provider="hf-inference", api_key=token)
    
    try:
        # Translate text
        result = client.translation(text, model=model)
        
        # Report success
        report_token_status(token_id, "success", api_key=proxy_api_key)
        return result
        
    except HfHubHTTPError as e:
        report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
        raise
    except Exception as e:
        report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
        raise

# Usage
# You need to get your API key from the admin or create a user account
# See RBAC_README.md for details on user management
proxy_api_key = "your_proxy_api_key_here"  # Get this from admin
result = translate_text("Меня зовут Вольфганг и я живу в Берлине", proxy_api_key=proxy_api_key)

Zero Shot Classification

Basic Usage

import os
import requests

API_URL = "https://router.huggingface.co/hf-inference/models/facebook/bart-large-mnli"
headers = {
    "Authorization": f"Bearer {os.environ['HF_TOKEN']}",
}

def query(payload):
    response = requests.post(API_URL, headers=headers, json=payload)
    return response.json()

output = query({
    "inputs": "Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!",
    "parameters": {"candidate_labels": ["refund", "legal", "faq"]},
})

cURL Example

curl https://router.huggingface.co/hf-inference/models/facebook/bart-large-mnli \
    -X POST \
    -d '{"inputs": "Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!", "parameters": {"candidate_labels": ["refund", "legal", "faq"]}}' \
    -H 'Content-Type: application/json' \
    -H "Authorization: Bearer $HF_TOKEN"

With HF-Inferoxy Token Management

from huggingface_hub import InferenceClient
from huggingface_hub.errors import HfHubHTTPError
from hf_token_utils import get_proxy_token, report_token_status

def zero_shot_classify(text: str, candidate_labels: list, model: str = "facebook/bart-large-mnli", proxy_api_key: str = None):
    # Get managed token (requires authentication)
    token, token_id = get_proxy_token(api_key=proxy_api_key)
    
    # Create client
    client = InferenceClient(provider="hf-inference", api_key=token)
    
    try:
        # Zero-shot classification
        result = client.zero_shot_classification(
            text,
            candidate_labels=candidate_labels,
            model=model
        )
        
        # Report success
        report_token_status(token_id, "success", api_key=proxy_api_key)
        
        return result
        
    except HfHubHTTPError as e:
        report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
        raise
    except Exception as e:
        report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
        raise

# Usage
# You need to get your API key from the admin or create a user account
# See RBAC_README.md for details on user management
proxy_api_key = "your_proxy_api_key_here"  # Get this from admin
text = "Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!"
labels = ["refund", "legal", "faq"]
output = zero_shot_classify(text, labels, proxy_api_key=proxy_api_key)