Skip to content

Python SDK Reference

The PaiTIENT Secure Model Python SDK provides a comprehensive interface to the Secure Model Service, allowing data scientists and ML engineers to deploy, manage, and use AI models in a HIPAA/SOC2 compliant environment.

Installation

bash
pip install paitient-secure-model

Client Initialization

python
from paitient_secure_model import SecureModelClient

# Initialize from environment variables
client = SecureModelClient()

# Or specify credentials directly
client = SecureModelClient(
    api_key="your-api-key",
    client_id="your-client-id",
    endpoint="https://api.paitient.ai"  # Optional, defaults to production
)

Core Classes

SecureModelClient

The main client for interacting with the PaiTIENT Secure Model Service.

Constructor Parameters

ParameterTypeDescriptionRequiredDefault
api_keystrYour PaiTIENT API keyNo (if in env)os.environ.get("PAITIENT_API_KEY")
client_idstrYour PaiTIENT client IDNo (if in env)os.environ.get("PAITIENT_CLIENT_ID")
endpointstrAPI endpointNo"https://api.paitient.ai"
timeoutfloatRequest timeout in secondsNo60.0
max_retriesintMax number of retriesNo3

Deployment Management

Deploy a Model

python
def deploy_model():
    deployment = client.deploy_model(
        model_name="ZimaBlueAI/HuatuoGPT-o1-8B",
        deployment_name="clinical-assistant",
        use_gpu=True,
        region="us-west-2",
        replicas=1,
        instance_type="ml.g4dn.xlarge"
    )
    
    print(f"Deployment ID: {deployment.id}")
    return deployment

Deploy Model Parameters

ParameterTypeDescriptionRequiredDefault
model_namestrHuggingFace model nameNo"ZimaBlueAI/HuatuoGPT-o1-8B"
deployment_namestrName for the deploymentYes-
use_gpuboolWhether to use GPUNoFalse
regionstrAWS regionNo"us-west-2"
replicasintNumber of replicasNo1
instance_typestrEC2 instance typeNoDepends on use_gpu

Get Deployment Status

python
def check_status(deployment_id):
    status = client.get_deployment_status(deployment_id)
    print(f"Status: {status.status}")
    print(f"Message: {status.message}")
    return status

List All Deployments

python
def list_deployments():
    deployments = client.list_deployments()
    for deployment in deployments:
        print(f"ID: {deployment.id}, Name: {deployment.name}, Status: {deployment.status}")
    return deployments

Delete a Deployment

python
def delete_deployment(deployment_id):
    client.delete_deployment(deployment_id)
    print("Deployment deleted successfully")

Text Generation

Generate Text

python
def generate_text(deployment_id, prompt):
    result = client.generate(
        deployment_id=deployment_id,
        prompt=prompt,
        max_tokens=500,
        temperature=0.7,
        top_p=0.9,
        presence_penalty=0,
        frequency_penalty=0
    )
    
    print(result.text)
    print(f"Token usage: {result.usage.total_tokens}")
    return result

Generate Parameters

ParameterTypeDescriptionRequiredDefault
deployment_idstrID of the deploymentYes-
promptstrInput text promptYes-
max_tokensintMax tokens to generateNo100
temperaturefloatSampling temperature (0-2)No0.7
top_pfloatNucleus sampling parameter (0-1)No0.9
presence_penaltyfloatPresence penalty (0-2)No0.0
frequency_penaltyfloatFrequency penalty (0-2)No0.0

Stream Text Generation

python
def stream_text(deployment_id, prompt):
    for chunk in client.generate_stream(
        deployment_id=deployment_id,
        prompt=prompt,
        max_tokens=1000
    ):
        print(chunk.text, end="", flush=True)
        # or handle the chunk in your application

Subscription Management

Get Subscription Status

python
def check_subscription():
    subscription = client.get_subscription()
    print(f"Tier: {subscription.tier}")
    print(f"Features: {', '.join(subscription.features)}")
    print(f"Usage: {subscription.current_usage}/{subscription.usage_limit}")
    return subscription

LoRA Fine-tuning

Create a Fine-tuning Job

python
def fine_tune_model(deployment_id, training_file):
    fine_tuning_job = client.create_fine_tuning_job(
        deployment_id=deployment_id,
        training_file=training_file,
        epochs=3,
        learning_rate=3e-5,
        batch_size=8
    )
    
    print(f"Fine-tuning job ID: {fine_tuning_job.id}")
    return fine_tuning_job

Get Fine-tuning Job Status

python
def check_fine_tuning_status(job_id):
    status = client.get_fine_tuning_job_status(job_id)
    print(f"Status: {status.status}")
    print(f"Progress: {status.progress}%")
    return status

List Fine-tuning Jobs

python
def list_fine_tuning_jobs():
    jobs = client.list_fine_tuning_jobs()
    for job in jobs:
        print(f"ID: {job.id}, Model: {job.base_model}, Status: {job.status}")
    return jobs

Error Handling

The SDK provides specific exception classes for different types of errors:

python
from paitient_secure_model import (
    SecureModelClient,
    AuthenticationError,
    ValidationError,
    ResourceNotFoundError,
    RateLimitError,
    ServiceError
)

def handle_errors():
    try:
        # Your SDK operations here
        client.deploy_model(
            deployment_name="my-deployment"
        )
    except AuthenticationError as e:
        print(f"Authentication failed: {e}")
    except ValidationError as e:
        print(f"Validation error: {e}")
    except ResourceNotFoundError as e:
        print(f"Resource not found: {e}")
    except RateLimitError as e:
        print(f"Rate limit exceeded: {e}")
        print(f"Retry after {e.retry_after} seconds")
    except ServiceError as e:
        print(f"Service error: {e}")
    except Exception as e:
        print(f"Unexpected error: {e}")

Advanced Usage

Custom Headers

python
client = SecureModelClient(
    api_key="your-api-key",
    client_id="your-client-id",
    headers={
        'X-Custom-Header': 'custom-value'
    }
)

Set Request Timeout

python
client = SecureModelClient(
    api_key="your-api-key",
    client_id="your-client-id",
    timeout=120.0  # 2 minutes
)

Retry Configuration

python
client = SecureModelClient(
    api_key="your-api-key",
    client_id="your-client-id",
    max_retries=5,
    retry_delay=1.0  # seconds
)

Type Hints

The SDK includes type hints for better IDE integration and static type checking with tools like mypy:

python
from paitient_secure_model import SecureModelClient
from paitient_secure_model.types import (
    DeploymentOptions,
    GenerateOptions,
    DeploymentStatus,
    GenerationResult
)

# Example with type hints
def deploy(options: DeploymentOptions) -> None:
    client = SecureModelClient()
    deployment = client.deploy_model(**options)
    status: DeploymentStatus = client.get_deployment_status(deployment.id)
    print(status)

# Usage
deploy({
    "model_name": "ZimaBlueAI/HuatuoGPT-o1-8B",
    "deployment_name": "typed-deployment",
    "use_gpu": True
})

Environment Variables

The SDK recognizes the following environment variables:

VariableDescription
PAITIENT_API_KEYYour API key
PAITIENT_CLIENT_IDYour client ID
PAITIENT_ENDPOINTAPI endpoint
PAITIENT_TIMEOUTRequest timeout in seconds
PAITIENT_MAX_RETRIESMaximum number of retries
PAITIENT_RETRY_DELAYDelay between retries in seconds
PAITIENT_LOG_LEVELLog level (DEBUG, INFO, WARNING, ERROR)

Asynchronous Support

The SDK also provides asynchronous versions of all methods, which are useful for integrating with asynchronous applications:

python
import asyncio
from paitient_secure_model.async_client import AsyncSecureModelClient

async def main():
    # Initialize client
    client = AsyncSecureModelClient(
        api_key="your-api-key",
        client_id="your-client-id"
    )
    
    # Deploy model
    deployment = await client.deploy_model(
        deployment_name="async-example",
        use_gpu=True
    )
    
    # Check status
    status = await client.get_deployment_status(deployment.id)
    print(f"Status: {status.status}")
    
    # Generate text
    result = await client.generate(
        deployment_id=deployment.id,
        prompt="Explain HIPAA compliance in healthcare AI",
        max_tokens=500
    )
    
    print(result.text)

# Run the async function
asyncio.run(main())

Complete Example

Here's a complete example that deploys a model, waits for it to be ready, and generates text:

python
from paitient_secure_model import SecureModelClient
import time
import os

def main():
    try:
        # Initialize client
        client = SecureModelClient(
            api_key=os.environ.get("PAITIENT_API_KEY"),
            client_id=os.environ.get("PAITIENT_CLIENT_ID")
        )
        
        # Deploy model
        print("Deploying model...")
        deployment = client.deploy_model(
            deployment_name="example-deployment",
            use_gpu=True
        )
        
        print(f"Deployment ID: {deployment.id}")
        
        # Check deployment status
        status = client.get_deployment_status(deployment.id)
        print(f"Initial status: {status.status}")
        
        # Wait for deployment to be ready
        while status.status != "DEPLOYED":
            if status.status == "FAILED":
                print(f"Deployment failed: {status.message}")
                return
            
            print(f"Waiting for deployment... Current status: {status.status}")
            time.sleep(30)  # Wait 30 seconds
            status = client.get_deployment_status(deployment.id)
        
        print("Deployment successful!")
        
        # Generate text
        print("Generating text...")
        result = client.generate(
            deployment_id=deployment.id,
            prompt="Explain the importance of HIPAA compliance in healthcare AI applications.",
            max_tokens=500
        )
        
        print("\nGenerated Text:")
        print(result.text)
        print(f"\nToken usage: {result.usage.total_tokens}")
        
        # Check subscription
        subscription = client.get_subscription()
        print(f"\nSubscription tier: {subscription.tier}")
        print(f"Usage: {subscription.current_usage}/{subscription.usage_limit}")
        
    except Exception as e:
        print(f"Error: {e}")

if __name__ == "__main__":
    main()

Released under the MIT License.