Node.js SDK: Deployment Guide
This guide covers the deployment capabilities of the PaiTIENT Secure Model Service Node.js SDK, allowing you to securely deploy AI models in a HIPAA/SOC2 compliant environment.
Prerequisites
Before deploying models, ensure you have:
- Installed the PaiTIENT Node.js SDK
- Set up authentication credentials
- Selected a model for deployment
Installation
# Install the PaiTIENT Node.js SDK
npm install paitient-secure-model
# or with yarn
yarn add paitient-secure-modelBasic Deployment
The simplest way to deploy a model:
const { PaiTIENTClient } = require('paitient-secure-model');
// Initialize client
const client = new PaiTIENTClient({
apiKey: process.env.PAITIENT_API_KEY,
clientId: process.env.PAITIENT_CLIENT_ID
});
// Create a basic deployment
async function deployModel() {
try {
const deployment = await client.createDeployment({
modelName: "ZimaBlueAI/HuatuoGPT-o1-8B",
deploymentName: "clinical-assistant"
});
console.log(`Deployment ID: ${deployment.id}`);
console.log(`Status: ${deployment.status}`);
// Wait for deployment to complete
await deployment.waitUntilReady();
console.log(`Deployment is now ${deployment.status}`);
console.log(`Endpoint: ${deployment.endpoint}`);
} catch (error) {
console.error('Deployment failed:', error);
}
}
deployModel();Deployment Options
Compute Resources
Configure compute resources for your deployment:
// Deployment with specific compute resources
const deployment = await client.createDeployment({
modelName: "ZimaBlueAI/HuatuoGPT-o1-8B",
deploymentName: "clinical-assistant",
computeType: "gpu", // "gpu" or "cpu"
instanceType: "g4dn.xlarge", // AWS instance type
minReplicas: 1, // Minimum number of replicas
maxReplicas: 3, // Maximum number of replicas
autoScaling: true // Enable auto-scaling
});Environment Configuration
Configure the deployment environment:
// Deployment with environment configuration
const deployment = await client.createDeployment({
modelName: "ZimaBlueAI/HuatuoGPT-o1-8B",
deploymentName: "clinical-assistant",
environment: "production", // "production", "staging", or "development"
region: "us-east-1", // AWS region
vpcConfig: {
subnetIds: ["subnet-abc123", "subnet-def456"],
securityGroupIds: ["sg-123456"]
},
tags: {
department: "clinical-research",
project: "diabetes-assistant",
environment: "production"
}
});Security Settings
Configure security settings for your deployment:
const { PaiTIENTClient, Security } = require('paitient-secure-model');
// Initialize client
const client = new PaiTIENTClient({
apiKey: process.env.PAITIENT_API_KEY,
clientId: process.env.PAITIENT_CLIENT_ID
});
// Create deployment with security settings
const deployment = await client.createDeployment({
modelName: "ZimaBlueAI/HuatuoGPT-o1-8B",
deploymentName: "hipaa-clinical-assistant",
securitySettings: new Security.SecuritySettings({
networkIsolation: true, // Enable network isolation
privateEndpoints: true, // Use private endpoints
encryptionLevel: "maximum", // Maximum encryption level
auditLogging: true, // Enable comprehensive audit logging
complianceMode: "hipaa" // Enable HIPAA compliance mode
})
});Custom Configuration
Apply custom configuration options:
// Deployment with advanced configuration
const deployment = await client.createDeployment({
modelName: "ZimaBlueAI/HuatuoGPT-o1-8B",
deploymentName: "custom-clinical-assistant",
modelConfig: {
contextLength: 4096,
maxOutputTokens: 1024,
defaultTemperature: 0.7,
defaultTopP: 0.95
},
scalingConfig: {
targetCpuUtilization: 70,
targetMemoryUtilization: 80,
scaleDownDelaySeconds: 300,
scaleUpDelaySeconds: 60
}
});Deployment Management
Check Deployment Status
Monitor the status of your deployment:
// Get deployment status
const deployment = await client.getDeployment("dep_12345abcde");
console.log(`Status: ${deployment.status}`);
console.log(`Created: ${deployment.createdAt}`);
console.log(`Updated: ${deployment.updatedAt}`);
console.log(`Endpoint: ${deployment.endpoint}`);
// Get detailed deployment information
const details = await deployment.getDetails();
console.log(`Model: ${details.modelName}`);
console.log(`Instance Type: ${details.instanceType}`);
console.log(`Replicas: ${details.currentReplicas}/${details.maxReplicas}`);
console.log(`Compute Type: ${details.computeType}`);List Deployments
Retrieve a list of all your deployments:
// List all deployments
const deployments = await client.listDeployments();
for (const dep of deployments) {
console.log(`${dep.id}: ${dep.name} - ${dep.status}`);
}
// Filter deployments
const prodDeployments = await client.listDeployments({
filters: {
status: "running",
tags: { environment: "production" }
}
});
for (const dep of prodDeployments) {
console.log(`${dep.id}: ${dep.name} - Running in production`);
}Update Deployment
Modify an existing deployment:
// Update deployment
await client.updateDeployment({
deploymentId: "dep_12345abcde",
minReplicas: 2,
maxReplicas: 5,
tags: { environment: "production", version: "2.0" }
});
// Update security settings
const { Security } = require('paitient-secure-model');
await client.updateDeployment({
deploymentId: "dep_12345abcde",
securitySettings: new Security.SecuritySettings({
networkIsolation: true,
privateEndpoints: true
})
});Delete Deployment
Remove a deployment when it's no longer needed:
// Delete deployment
await client.deleteDeployment("dep_12345abcde");
// Delete with confirmation bypass
await client.deleteDeployment("dep_12345abcde", { force: true });Deployment Metrics
Monitor performance metrics for your deployment:
// Get deployment metrics
const metrics = await client.getDeploymentMetrics({
deploymentId: "dep_12345abcde",
startTime: "2023-11-01T00:00:00Z",
endTime: "2023-11-30T23:59:59Z",
metrics: ["latency", "throughput", "errorRate", "tokenUsage"]
});
// Print metrics
console.log(`Average latency: ${metrics.averageLatency} ms`);
console.log(`P95 latency: ${metrics.p95Latency} ms`);
console.log(`Throughput: ${metrics.throughput} requests/sec`);
console.log(`Error rate: ${metrics.errorRate}%`);
console.log(`Token usage: ${metrics.tokenUsage} tokens`);Deployment Logs
Access logs for your deployment:
// Get deployment logs
const logs = await client.getDeploymentLogs({
deploymentId: "dep_12345abcde",
startTime: "2023-11-01T00:00:00Z",
endTime: "2023-11-01T01:00:00Z",
limit: 100,
filter: "level=error"
});
for (const log of logs) {
console.log(`[${log.timestamp}] ${log.level}: ${log.message}`);
}Advanced Deployment Scenarios
Blue-Green Deployment
Implement blue-green deployments for zero-downtime updates:
// Deploy the "blue" version
const blueDeployment = await client.createDeployment({
modelName: "ZimaBlueAI/HuatuoGPT-o1-8B",
deploymentName: "clinical-assistant-blue",
tags: { environment: "production", color: "blue" }
});
// Create a production endpoint pointing to the blue deployment
const productionEndpoint = await client.createEndpoint({
name: "clinical-assistant-production",
deploymentId: blueDeployment.id
});
// Later, deploy the "green" version
const greenDeployment = await client.createDeployment({
modelName: "ZimaBlueAI/HuatuoGPT-o1-8B-v2", // Updated model version
deploymentName: "clinical-assistant-green",
tags: { environment: "production", color: "green" }
});
// Test the green deployment
// ...
// Switch traffic to the green deployment
await client.updateEndpoint({
endpointId: productionEndpoint.id,
deploymentId: greenDeployment.id
});
// If needed, rollback to the blue deployment
await client.updateEndpoint({
endpointId: productionEndpoint.id,
deploymentId: blueDeployment.id
});Canary Deployment
Gradually shift traffic to a new deployment:
// Deploy the stable version
const stableDeployment = await client.createDeployment({
modelName: "ZimaBlueAI/HuatuoGPT-o1-8B",
deploymentName: "clinical-assistant-stable"
});
// Deploy the canary version
const canaryDeployment = await client.createDeployment({
modelName: "ZimaBlueAI/HuatuoGPT-o1-8B-v2", // Updated model version
deploymentName: "clinical-assistant-canary"
});
// Create a production endpoint with traffic splitting
const canaryEndpoint = await client.createCanaryEndpoint({
name: "clinical-assistant-canary",
deployments: [
{ id: stableDeployment.id, trafficPercentage: 90 },
{ id: canaryDeployment.id, trafficPercentage: 10 }
]
});
// Monitor canary performance
// ...
// Gradually increase traffic to the canary deployment
const sleep = (ms) => new Promise(resolve => setTimeout(resolve, ms));
async function increaseCanaryTraffic() {
for (const percentage of [25, 50, 75, 100]) {
await client.updateCanaryEndpoint({
endpointId: canaryEndpoint.id,
deployments: [
{ id: stableDeployment.id, trafficPercentage: 100 - percentage },
{ id: canaryDeployment.id, trafficPercentage: percentage }
]
});
// Wait and evaluate metrics before increasing traffic
console.log(`Canary at ${percentage}% traffic, evaluating performance...`);
await sleep(3600000); // 1 hour for evaluation
}
}
increaseCanaryTraffic();Multi-region Deployment
Deploy models across multiple regions:
// Deploy across multiple regions
async function deployToMultipleRegions() {
const deployments = [];
const regions = ["us-east-1", "us-west-2", "eu-west-1"];
for (const region of regions) {
const deployment = await client.createDeployment({
modelName: "ZimaBlueAI/HuatuoGPT-o1-8B",
deploymentName: `clinical-assistant-${region}`,
region: region,
tags: { region: region, "deployment-group": "global-clinical-assistant" }
});
deployments.push(deployment);
// Wait for deployment to be ready
await deployment.waitUntilReady();
console.log(`Deployment in ${region} is ready: ${deployment.id}`);
}
// Create a global endpoint that routes to the nearest regional deployment
const globalEndpoint = await client.createGlobalEndpoint({
name: "global-clinical-assistant",
deploymentIds: deployments.map(d => d.id),
routingStrategy: "latency" // Options: "latency", "geolocation", "weighted"
});
console.log(`Global endpoint: ${globalEndpoint.url}`);
}
deployToMultipleRegions();Error Handling
Implement proper error handling for deployments:
const { PaiTIENTClient, errors } = require('paitient-secure-model');
const client = new PaiTIENTClient({
apiKey: process.env.PAITIENT_API_KEY,
clientId: process.env.PAITIENT_CLIENT_ID
});
async function createDeploymentWithErrorHandling() {
try {
const deployment = await client.createDeployment({
modelName: "ZimaBlueAI/HuatuoGPT-o1-8B",
deploymentName: "clinical-assistant"
});
console.log(`Deployment created: ${deployment.id}`);
} catch (error) {
if (error instanceof errors.InvalidParameterError) {
console.error(`Invalid parameter: ${error.message}`);
} else if (error instanceof errors.QuotaExceededError) {
console.error(`Quota exceeded: ${error.message}`);
} else if (error instanceof errors.DeploymentError) {
console.error(`Deployment failed: ${error.message}`);
console.error(`Deployment ID: ${error.deploymentId}`);
console.error(`Status: ${error.status}`);
console.error(`Reason: ${error.reason}`);
// Get detailed error information
if (error.deploymentId) {
try {
const logs = await client.getDeploymentLogs({
deploymentId: error.deploymentId,
limit: 10,
filter: "level=error"
});
console.error("Error logs:");
for (const log of logs) {
console.error(` ${log.message}`);
}
} catch (logError) {
console.error(`Failed to fetch logs: ${logError.message}`);
}
}
} else {
console.error(`Unexpected error: ${error.message}`);
}
}
}
createDeploymentWithErrorHandling();Best Practices
Resource Optimization
Optimize resource usage to reduce costs:
- Right-size your deployment: Choose the appropriate instance type
- Enable auto-scaling: Scale based on demand
- Use scale-to-zero: For non-critical deployments
- Set resource limits: Prevent runaway usage
- Monitor usage: Regularly check metrics
Security
Follow these security best practices:
- Enable network isolation: For sensitive deployments
- Use private endpoints: When possible
- Implement least privilege: Restrict access appropriately
- Enable audit logging: For compliance
- Rotate credentials: Regularly update API keys
Reliability
Ensure reliable deployments:
- Multi-region deployments: For critical applications
- Regular backups: Save model state and configuration
- Monitoring and alerting: Detect issues early
- Gradual rollouts: Use canary deployments for updates
- Automated testing: Validate deployments before full release
Next Steps
- Learn about Text Generation
- Explore Fine-tuning
- Understand Security Best Practices
- Review Troubleshooting