Python SDK Reference
The PaiTIENT Secure Model Python SDK provides a comprehensive interface to the Secure Model Service, allowing data scientists and ML engineers to deploy, manage, and use AI models in a HIPAA/SOC2 compliant environment.
Installation
bash
pip install paitient-secure-modelClient Initialization
python
from paitient_secure_model import SecureModelClient
# Initialize from environment variables
client = SecureModelClient()
# Or specify credentials directly
client = SecureModelClient(
api_key="your-api-key",
client_id="your-client-id",
endpoint="https://api.paitient.ai" # Optional, defaults to production
)Core Classes
SecureModelClient
The main client for interacting with the PaiTIENT Secure Model Service.
Constructor Parameters
| Parameter | Type | Description | Required | Default |
|---|---|---|---|---|
api_key | str | Your PaiTIENT API key | No (if in env) | os.environ.get("PAITIENT_API_KEY") |
client_id | str | Your PaiTIENT client ID | No (if in env) | os.environ.get("PAITIENT_CLIENT_ID") |
endpoint | str | API endpoint | No | "https://api.paitient.ai" |
timeout | float | Request timeout in seconds | No | 60.0 |
max_retries | int | Max number of retries | No | 3 |
Deployment Management
Deploy a Model
python
def deploy_model():
deployment = client.deploy_model(
model_name="ZimaBlueAI/HuatuoGPT-o1-8B",
deployment_name="clinical-assistant",
use_gpu=True,
region="us-west-2",
replicas=1,
instance_type="ml.g4dn.xlarge"
)
print(f"Deployment ID: {deployment.id}")
return deploymentDeploy Model Parameters
| Parameter | Type | Description | Required | Default |
|---|---|---|---|---|
model_name | str | HuggingFace model name | No | "ZimaBlueAI/HuatuoGPT-o1-8B" |
deployment_name | str | Name for the deployment | Yes | - |
use_gpu | bool | Whether to use GPU | No | False |
region | str | AWS region | No | "us-west-2" |
replicas | int | Number of replicas | No | 1 |
instance_type | str | EC2 instance type | No | Depends on use_gpu |
Get Deployment Status
python
def check_status(deployment_id):
status = client.get_deployment_status(deployment_id)
print(f"Status: {status.status}")
print(f"Message: {status.message}")
return statusList All Deployments
python
def list_deployments():
deployments = client.list_deployments()
for deployment in deployments:
print(f"ID: {deployment.id}, Name: {deployment.name}, Status: {deployment.status}")
return deploymentsDelete a Deployment
python
def delete_deployment(deployment_id):
client.delete_deployment(deployment_id)
print("Deployment deleted successfully")Text Generation
Generate Text
python
def generate_text(deployment_id, prompt):
result = client.generate(
deployment_id=deployment_id,
prompt=prompt,
max_tokens=500,
temperature=0.7,
top_p=0.9,
presence_penalty=0,
frequency_penalty=0
)
print(result.text)
print(f"Token usage: {result.usage.total_tokens}")
return resultGenerate Parameters
| Parameter | Type | Description | Required | Default |
|---|---|---|---|---|
deployment_id | str | ID of the deployment | Yes | - |
prompt | str | Input text prompt | Yes | - |
max_tokens | int | Max tokens to generate | No | 100 |
temperature | float | Sampling temperature (0-2) | No | 0.7 |
top_p | float | Nucleus sampling parameter (0-1) | No | 0.9 |
presence_penalty | float | Presence penalty (0-2) | No | 0.0 |
frequency_penalty | float | Frequency penalty (0-2) | No | 0.0 |
Stream Text Generation
python
def stream_text(deployment_id, prompt):
for chunk in client.generate_stream(
deployment_id=deployment_id,
prompt=prompt,
max_tokens=1000
):
print(chunk.text, end="", flush=True)
# or handle the chunk in your applicationSubscription Management
Get Subscription Status
python
def check_subscription():
subscription = client.get_subscription()
print(f"Tier: {subscription.tier}")
print(f"Features: {', '.join(subscription.features)}")
print(f"Usage: {subscription.current_usage}/{subscription.usage_limit}")
return subscriptionLoRA Fine-tuning
Create a Fine-tuning Job
python
def fine_tune_model(deployment_id, training_file):
fine_tuning_job = client.create_fine_tuning_job(
deployment_id=deployment_id,
training_file=training_file,
epochs=3,
learning_rate=3e-5,
batch_size=8
)
print(f"Fine-tuning job ID: {fine_tuning_job.id}")
return fine_tuning_jobGet Fine-tuning Job Status
python
def check_fine_tuning_status(job_id):
status = client.get_fine_tuning_job_status(job_id)
print(f"Status: {status.status}")
print(f"Progress: {status.progress}%")
return statusList Fine-tuning Jobs
python
def list_fine_tuning_jobs():
jobs = client.list_fine_tuning_jobs()
for job in jobs:
print(f"ID: {job.id}, Model: {job.base_model}, Status: {job.status}")
return jobsError Handling
The SDK provides specific exception classes for different types of errors:
python
from paitient_secure_model import (
SecureModelClient,
AuthenticationError,
ValidationError,
ResourceNotFoundError,
RateLimitError,
ServiceError
)
def handle_errors():
try:
# Your SDK operations here
client.deploy_model(
deployment_name="my-deployment"
)
except AuthenticationError as e:
print(f"Authentication failed: {e}")
except ValidationError as e:
print(f"Validation error: {e}")
except ResourceNotFoundError as e:
print(f"Resource not found: {e}")
except RateLimitError as e:
print(f"Rate limit exceeded: {e}")
print(f"Retry after {e.retry_after} seconds")
except ServiceError as e:
print(f"Service error: {e}")
except Exception as e:
print(f"Unexpected error: {e}")Advanced Usage
Custom Headers
python
client = SecureModelClient(
api_key="your-api-key",
client_id="your-client-id",
headers={
'X-Custom-Header': 'custom-value'
}
)Set Request Timeout
python
client = SecureModelClient(
api_key="your-api-key",
client_id="your-client-id",
timeout=120.0 # 2 minutes
)Retry Configuration
python
client = SecureModelClient(
api_key="your-api-key",
client_id="your-client-id",
max_retries=5,
retry_delay=1.0 # seconds
)Type Hints
The SDK includes type hints for better IDE integration and static type checking with tools like mypy:
python
from paitient_secure_model import SecureModelClient
from paitient_secure_model.types import (
DeploymentOptions,
GenerateOptions,
DeploymentStatus,
GenerationResult
)
# Example with type hints
def deploy(options: DeploymentOptions) -> None:
client = SecureModelClient()
deployment = client.deploy_model(**options)
status: DeploymentStatus = client.get_deployment_status(deployment.id)
print(status)
# Usage
deploy({
"model_name": "ZimaBlueAI/HuatuoGPT-o1-8B",
"deployment_name": "typed-deployment",
"use_gpu": True
})Environment Variables
The SDK recognizes the following environment variables:
| Variable | Description |
|---|---|
PAITIENT_API_KEY | Your API key |
PAITIENT_CLIENT_ID | Your client ID |
PAITIENT_ENDPOINT | API endpoint |
PAITIENT_TIMEOUT | Request timeout in seconds |
PAITIENT_MAX_RETRIES | Maximum number of retries |
PAITIENT_RETRY_DELAY | Delay between retries in seconds |
PAITIENT_LOG_LEVEL | Log level (DEBUG, INFO, WARNING, ERROR) |
Asynchronous Support
The SDK also provides asynchronous versions of all methods, which are useful for integrating with asynchronous applications:
python
import asyncio
from paitient_secure_model.async_client import AsyncSecureModelClient
async def main():
# Initialize client
client = AsyncSecureModelClient(
api_key="your-api-key",
client_id="your-client-id"
)
# Deploy model
deployment = await client.deploy_model(
deployment_name="async-example",
use_gpu=True
)
# Check status
status = await client.get_deployment_status(deployment.id)
print(f"Status: {status.status}")
# Generate text
result = await client.generate(
deployment_id=deployment.id,
prompt="Explain HIPAA compliance in healthcare AI",
max_tokens=500
)
print(result.text)
# Run the async function
asyncio.run(main())Complete Example
Here's a complete example that deploys a model, waits for it to be ready, and generates text:
python
from paitient_secure_model import SecureModelClient
import time
import os
def main():
try:
# Initialize client
client = SecureModelClient(
api_key=os.environ.get("PAITIENT_API_KEY"),
client_id=os.environ.get("PAITIENT_CLIENT_ID")
)
# Deploy model
print("Deploying model...")
deployment = client.deploy_model(
deployment_name="example-deployment",
use_gpu=True
)
print(f"Deployment ID: {deployment.id}")
# Check deployment status
status = client.get_deployment_status(deployment.id)
print(f"Initial status: {status.status}")
# Wait for deployment to be ready
while status.status != "DEPLOYED":
if status.status == "FAILED":
print(f"Deployment failed: {status.message}")
return
print(f"Waiting for deployment... Current status: {status.status}")
time.sleep(30) # Wait 30 seconds
status = client.get_deployment_status(deployment.id)
print("Deployment successful!")
# Generate text
print("Generating text...")
result = client.generate(
deployment_id=deployment.id,
prompt="Explain the importance of HIPAA compliance in healthcare AI applications.",
max_tokens=500
)
print("\nGenerated Text:")
print(result.text)
print(f"\nToken usage: {result.usage.total_tokens}")
# Check subscription
subscription = client.get_subscription()
print(f"\nSubscription tier: {subscription.tier}")
print(f"Usage: {subscription.current_usage}/{subscription.usage_limit}")
except Exception as e:
print(f"Error: {e}")
if __name__ == "__main__":
main()