Skip to content

Python SDK: Deployment Guide

This guide covers the deployment capabilities of the PaiTIENT Secure Model Service Python SDK, allowing you to securely deploy AI models in a HIPAA/SOC2 compliant environment.

Prerequisites

Before deploying models, ensure you have:

  1. Installed the PaiTIENT Python SDK
  2. Set up authentication credentials
  3. Selected a model for deployment

Basic Deployment

The simplest way to deploy a model is:

python
from paitient_secure_model import Client

# Initialize client
client = Client()

# Create a basic deployment
deployment = client.create_deployment(
    model_name="ZimaBlueAI/HuatuoGPT-o1-8B",
    deployment_name="clinical-assistant"
)

print(f"Deployment ID: {deployment.id}")
print(f"Status: {deployment.status}")

# Wait for deployment to complete
deployment.wait_until_ready()
print(f"Deployment is now {deployment.status}")
print(f"Endpoint: {deployment.endpoint}")

Deployment Options

Compute Resources

Configure compute resources for your deployment:

python
# Deployment with specific compute resources
deployment = client.create_deployment(
    model_name="ZimaBlueAI/HuatuoGPT-o1-8B",
    deployment_name="clinical-assistant",
    compute_type="gpu",           # "gpu" or "cpu"
    instance_type="g4dn.xlarge",  # AWS instance type
    min_replicas=1,               # Minimum number of replicas
    max_replicas=3,               # Maximum number of replicas
    auto_scaling=True             # Enable auto-scaling
)

Environment Configuration

Configure the deployment environment:

python
# Deployment with environment configuration
deployment = client.create_deployment(
    model_name="ZimaBlueAI/HuatuoGPT-o1-8B",
    deployment_name="clinical-assistant",
    environment="production",      # "production", "staging", or "development"
    region="us-east-1",            # AWS region
    vpc_config={
        "subnet_ids": ["subnet-abc123", "subnet-def456"],
        "security_group_ids": ["sg-123456"]
    },
    tags={
        "department": "clinical-research",
        "project": "diabetes-assistant",
        "environment": "production"
    }
)

Security Settings

Configure security settings for your deployment:

python
from paitient_secure_model import Client
from paitient_secure_model.security import SecuritySettings

# Initialize client
client = Client()

# Create deployment with security settings
deployment = client.create_deployment(
    model_name="ZimaBlueAI/HuatuoGPT-o1-8B",
    deployment_name="hipaa-clinical-assistant",
    security_settings=SecuritySettings(
        network_isolation=True,        # Enable network isolation
        private_endpoints=True,        # Use private endpoints
        encryption_level="maximum",    # Maximum encryption level
        audit_logging=True,            # Enable comprehensive audit logging
        compliance_mode="hipaa"        # Enable HIPAA compliance mode
    )
)

Custom Configuration

Apply custom configuration options:

python
# Deployment with advanced configuration
deployment = client.create_deployment(
    model_name="ZimaBlueAI/HuatuoGPT-o1-8B",
    deployment_name="custom-clinical-assistant",
    model_config={
        "context_length": 4096,
        "max_output_tokens": 1024,
        "default_temperature": 0.7,
        "default_top_p": 0.95
    },
    scaling_config={
        "target_cpu_utilization": 70,
        "target_memory_utilization": 80,
        "scale_down_delay_seconds": 300,
        "scale_up_delay_seconds": 60
    }
)

Deployment Management

Check Deployment Status

Monitor the status of your deployment:

python
# Get deployment status
deployment = client.get_deployment("dep_12345abcde")
print(f"Status: {deployment.status}")
print(f"Created: {deployment.created_at}")
print(f"Updated: {deployment.updated_at}")
print(f"Endpoint: {deployment.endpoint}")

# Get detailed deployment information
details = deployment.get_details()
print(f"Model: {details.model_name}")
print(f"Instance Type: {details.instance_type}")
print(f"Replicas: {details.current_replicas}/{details.max_replicas}")
print(f"Compute Type: {details.compute_type}")

List Deployments

Retrieve a list of all your deployments:

python
# List all deployments
deployments = client.list_deployments()
for dep in deployments:
    print(f"{dep.id}: {dep.name} - {dep.status}")

# Filter deployments
prod_deployments = client.list_deployments(
    filters={
        "status": "running",
        "tags": {"environment": "production"}
    }
)
for dep in prod_deployments:
    print(f"{dep.id}: {dep.name} - Running in production")

Update Deployment

Modify an existing deployment:

python
# Update deployment
client.update_deployment(
    deployment_id="dep_12345abcde",
    min_replicas=2,
    max_replicas=5,
    tags={"environment": "production", "version": "2.0"}
)

# Update security settings
client.update_deployment(
    deployment_id="dep_12345abcde",
    security_settings=SecuritySettings(
        network_isolation=True,
        private_endpoints=True
    )
)

Delete Deployment

Remove a deployment when it's no longer needed:

python
# Delete deployment
client.delete_deployment("dep_12345abcde")

# Delete with confirmation bypass
client.delete_deployment("dep_12345abcde", force=True)

Deployment Metrics

Monitor performance metrics for your deployment:

python
# Get deployment metrics
metrics = client.get_deployment_metrics(
    deployment_id="dep_12345abcde",
    start_time="2023-11-01T00:00:00Z",
    end_time="2023-11-30T23:59:59Z",
    metrics=["latency", "throughput", "error_rate", "token_usage"]
)

# Print metrics
print(f"Average latency: {metrics.average_latency} ms")
print(f"P95 latency: {metrics.p95_latency} ms")
print(f"Throughput: {metrics.throughput} requests/sec")
print(f"Error rate: {metrics.error_rate}%")
print(f"Token usage: {metrics.token_usage} tokens")

Deployment Logs

Access logs for your deployment:

python
# Get deployment logs
logs = client.get_deployment_logs(
    deployment_id="dep_12345abcde",
    start_time="2023-11-01T00:00:00Z",
    end_time="2023-11-01T01:00:00Z",
    limit=100,
    filter="level=error"
)

for log in logs:
    print(f"[{log.timestamp}] {log.level}: {log.message}")

Advanced Deployment Scenarios

Blue-Green Deployment

Implement blue-green deployments for zero-downtime updates:

python
# Deploy the "blue" version
blue_deployment = client.create_deployment(
    model_name="ZimaBlueAI/HuatuoGPT-o1-8B",
    deployment_name="clinical-assistant-blue",
    tags={"environment": "production", "color": "blue"}
)

# Create a production endpoint pointing to the blue deployment
production_endpoint = client.create_endpoint(
    name="clinical-assistant-production",
    deployment_id=blue_deployment.id
)

# Later, deploy the "green" version
green_deployment = client.create_deployment(
    model_name="ZimaBlueAI/HuatuoGPT-o1-8B-v2",  # Updated model version
    deployment_name="clinical-assistant-green",
    tags={"environment": "production", "color": "green"}
)

# Test the green deployment
# ...

# Switch traffic to the green deployment
client.update_endpoint(
    endpoint_id=production_endpoint.id,
    deployment_id=green_deployment.id
)

# If needed, rollback to the blue deployment
client.update_endpoint(
    endpoint_id=production_endpoint.id,
    deployment_id=blue_deployment.id
)

Canary Deployment

Gradually shift traffic to a new deployment:

python
# Deploy the stable version
stable_deployment = client.create_deployment(
    model_name="ZimaBlueAI/HuatuoGPT-o1-8B",
    deployment_name="clinical-assistant-stable"
)

# Deploy the canary version
canary_deployment = client.create_deployment(
    model_name="ZimaBlueAI/HuatuoGPT-o1-8B-v2",  # Updated model version
    deployment_name="clinical-assistant-canary"
)

# Create a production endpoint with traffic splitting
canary_endpoint = client.create_canary_endpoint(
    name="clinical-assistant-canary",
    deployments=[
        {"id": stable_deployment.id, "traffic_percentage": 90},
        {"id": canary_deployment.id, "traffic_percentage": 10}
    ]
)

# Monitor canary performance
# ...

# Gradually increase traffic to the canary deployment
for percentage in [25, 50, 75, 100]:
    client.update_canary_endpoint(
        endpoint_id=canary_endpoint.id,
        deployments=[
            {"id": stable_deployment.id, "traffic_percentage": 100 - percentage},
            {"id": canary_deployment.id, "traffic_percentage": percentage}
        ]
    )
    # Wait and evaluate metrics before increasing traffic
    time.sleep(3600)  # 1 hour for evaluation

Multi-region Deployment

Deploy models across multiple regions:

python
import time

# Deploy across multiple regions
deployments = []
regions = ["us-east-1", "us-west-2", "eu-west-1"]

for region in regions:
    deployment = client.create_deployment(
        model_name="ZimaBlueAI/HuatuoGPT-o1-8B",
        deployment_name=f"clinical-assistant-{region}",
        region=region,
        tags={"region": region, "deployment-group": "global-clinical-assistant"}
    )
    deployments.append(deployment)
    
    # Wait for deployment to be ready
    deployment.wait_until_ready()
    print(f"Deployment in {region} is ready: {deployment.id}")

# Create a global endpoint that routes to the nearest regional deployment
global_endpoint = client.create_global_endpoint(
    name="global-clinical-assistant",
    deployment_ids=[d.id for d in deployments],
    routing_strategy="latency"  # Options: "latency", "geolocation", "weighted"
)

print(f"Global endpoint: {global_endpoint.url}")

Error Handling

Implement proper error handling for deployments:

python
from paitient_secure_model import Client
from paitient_secure_model.exceptions import (
    DeploymentError,
    ResourceNotFoundError,
    QuotaExceededError,
    InvalidParameterError
)

client = Client()

try:
    deployment = client.create_deployment(
        model_name="ZimaBlueAI/HuatuoGPT-o1-8B",
        deployment_name="clinical-assistant"
    )
except InvalidParameterError as e:
    print(f"Invalid parameter: {e}")
except QuotaExceededError as e:
    print(f"Quota exceeded: {e}")
except DeploymentError as e:
    print(f"Deployment failed: {e}")
    print(f"Deployment ID: {e.deployment_id}")
    print(f"Status: {e.status}")
    print(f"Reason: {e.reason}")
    
    # Get detailed error information
    if e.deployment_id:
        logs = client.get_deployment_logs(
            deployment_id=e.deployment_id,
            limit=10,
            filter="level=error"
        )
        print("Error logs:")
        for log in logs:
            print(f"  {log.message}")

Best Practices

Resource Optimization

Optimize resource usage to reduce costs:

  1. Right-size your deployment: Choose the appropriate instance type
  2. Enable auto-scaling: Scale based on demand
  3. Use scale-to-zero: For non-critical deployments
  4. Set resource limits: Prevent runaway usage
  5. Monitor usage: Regularly check metrics

Security

Follow these security best practices:

  1. Enable network isolation: For sensitive deployments
  2. Use private endpoints: When possible
  3. Implement least privilege: Restrict access appropriately
  4. Enable audit logging: For compliance
  5. Rotate credentials: Regularly update API keys

Reliability

Ensure reliable deployments:

  1. Multi-region deployments: For critical applications
  2. Regular backups: Save model state and configuration
  3. Monitoring and alerting: Detect issues early
  4. Gradual rollouts: Use canary deployments for updates
  5. Automated testing: Validate deployments before full release

Next Steps

Released under the MIT License.