Guardrails
Comprehensive safety and validation engine for AI applications. Content safety, prompt injection detection, output validation, and intelligent rate limiting.
Installation
pnpm add @lov3kaizen/agentsea-guardrailsGuard Categories
Content Safety
Protect against harmful content
Security
Prevent attacks and data leakage
Validation
Ensure output quality and format
Operational
Manage resources and costs
Quick Start
import {
createGuardrailsEngine,
ToxicityGuard,
PIIGuard,
PromptInjectionGuard,
} from '@lov3kaizen/agentsea-guardrails';
// Create the engine
const engine = createGuardrailsEngine({
guards: [
{ name: 'toxicity', enabled: true, type: 'input', action: 'block' },
{ name: 'pii', enabled: true, type: 'both', action: 'transform' },
{ name: 'prompt-injection', enabled: true, type: 'input', action: 'block' },
],
failureMode: 'fail-fast',
defaultAction: 'allow',
});
// Register guards
engine.registerGuard(new ToxicityGuard({ sensitivity: 'medium' }));
engine.registerGuard(new PIIGuard({ types: ['email', 'phone'], maskingStrategy: 'redact' }));
engine.registerGuard(new PromptInjectionGuard({ sensitivity: 'high' }));
// Check input
const result = await engine.checkInput('What is the weather today?', {
sessionId: 'session-1',
userId: 'user-1',
});
if (result.passed) {
console.log('Input is safe');
} else {
console.log(`Blocked: ${result.message}`);
}Content Guards
ToxicityGuard
Detects toxic, harmful, or inappropriate content:
import { ToxicityGuard } from '@lov3kaizen/agentsea-guardrails';
const guard = new ToxicityGuard({
sensitivity: 'medium', // 'low' | 'medium' | 'high'
categories: ['hate', 'violence', 'harassment', 'sexual'],
});PIIGuard
Detects and optionally masks personally identifiable information:
import { PIIGuard } from '@lov3kaizen/agentsea-guardrails';
const guard = new PIIGuard({
types: ['email', 'phone', 'ssn', 'creditCard', 'address', 'name'],
maskingStrategy: 'redact', // 'redact' | 'mask' | 'hash'
customPatterns: [
{ name: 'employeeId', pattern: /EMP-\d{6}/ },
],
});TopicGuard
Filters content based on allowed/blocked topics:
import { TopicGuard } from '@lov3kaizen/agentsea-guardrails';
const guard = new TopicGuard({
allowedTopics: ['technology', 'science', 'general'],
blockedTopics: ['politics', 'religion'],
confidenceThreshold: 0.7,
});BiasGuard
Detects biased language:
import { BiasGuard } from '@lov3kaizen/agentsea-guardrails';
const guard = new BiasGuard({
categories: ['gender', 'race', 'religion', 'political'],
sensitivity: 'medium',
});Security Guards
PromptInjectionGuard
Detects prompt injection attempts:
import { PromptInjectionGuard } from '@lov3kaizen/agentsea-guardrails';
const guard = new PromptInjectionGuard({
sensitivity: 'high',
customPatterns: [
/reveal.*system.*prompt/i,
/ignore.*previous.*instructions/i,
],
});JailbreakGuard
Detects jailbreak attempts (DAN, roleplay attacks):
import { JailbreakGuard } from '@lov3kaizen/agentsea-guardrails';
const guard = new JailbreakGuard({
sensitivity: 'high',
});DataLeakageGuard
Prevents sensitive data from being exposed in outputs:
import { DataLeakageGuard } from '@lov3kaizen/agentsea-guardrails';
const guard = new DataLeakageGuard({
patterns: ['apiKey', 'password', 'secret', 'token'],
customPatterns: [
{ name: 'internalUrl', pattern: /internal\.company\.com/ },
],
});Validation Guards
SchemaGuard
Validates output against a Zod schema:
import { SchemaGuard } from '@lov3kaizen/agentsea-guardrails';
import { z } from 'zod';
const ResponseSchema = z.object({
answer: z.string(),
confidence: z.number().min(0).max(1),
});
const guard = new SchemaGuard({
schema: ResponseSchema,
});FormatGuard
Ensures output matches expected format:
import { FormatGuard } from '@lov3kaizen/agentsea-guardrails';
const guard = new FormatGuard({
format: 'json', // 'json' | 'xml' | 'markdown' | 'custom'
customValidator: (content) => content.startsWith('{'),
});Operational Guards
TokenBudgetGuard
Enforces token limits:
import { TokenBudgetGuard } from '@lov3kaizen/agentsea-guardrails';
const guard = new TokenBudgetGuard({
maxTokensPerRequest: 4096,
maxTokensPerSession: 50000,
maxTokensPerDay: 1000000,
warningThreshold: 0.8,
});RateLimitGuard
Limits request rates:
import { RateLimitGuard } from '@lov3kaizen/agentsea-guardrails';
const guard = new RateLimitGuard({
requestsPerMinute: 60,
requestsPerHour: 1000,
requestsPerDay: 10000,
});CostGuard
Tracks and limits costs:
import { CostGuard } from '@lov3kaizen/agentsea-guardrails';
const guard = new CostGuard({
maxCostPerRequest: 0.10,
maxCostPerSession: 5.00,
maxCostPerDay: 100.00,
currency: 'USD',
});Rules Engine
Define policies with JSON rules:
import { createRulesEngine, type RuleSet } from '@lov3kaizen/agentsea-guardrails';
const rules: RuleSet = {
id: 'content-policy',
name: 'Content Policy',
version: '1.0.0',
rules: [
{
id: 'block-profanity',
name: 'Block Profanity',
conditions: [
{ field: 'input', operator: 'matches', value: '\\b(bad|word)\\b' },
],
actions: [
{ type: 'block', params: { reason: 'Profanity detected' } },
],
priority: 100,
enabled: true,
},
{
id: 'redact-emails',
name: 'Redact Emails',
conditions: [
{ field: 'input', operator: 'matches', value: '[a-z]+@[a-z]+\\.[a-z]+' },
],
actions: [
{ type: 'transform', params: { pattern: '...', replacement: '[EMAIL]' } },
],
priority: 80,
enabled: true,
},
],
};
const engine = createRulesEngine({ defaultAction: 'allow' });
engine.loadRuleSet(rules);
const result = await engine.evaluate({
input: 'Contact me at user@example.com',
type: 'input',
metadata: {},
});NestJS Integration
import { Module, Controller, Post, Body } from '@nestjs/common';
import { GuardrailsModule, Guardrailed, BypassGuards } from '@lov3kaizen/agentsea-guardrails/nestjs';
import { z } from 'zod';
@Module({
imports: [
GuardrailsModule.forRoot({
guards: [
{ name: 'toxicity', enabled: true, type: 'input', action: 'block' },
{ name: 'pii', enabled: true, type: 'both', action: 'transform' },
{ name: 'prompt-injection', enabled: true, type: 'input', action: 'block' },
],
failureMode: 'fail-fast',
defaultAction: 'allow',
}),
],
})
export class AppModule {}
const ResponseSchema = z.object({
answer: z.string(),
confidence: z.number(),
});
@Controller('chat')
export class ChatController {
@Post()
@Guardrailed({
input: ['toxicity', 'prompt-injection', 'pii'],
output: ['pii', 'schema'],
schema: ResponseSchema,
})
async chat(@Body() body: { message: string }) {
return { answer: '...', confidence: 0.95 };
}
@Post('admin')
@BypassGuards()
async adminChat(@Body() body: { message: string }) {
// Bypasses all guardrails
return { answer: '...' };
}
}AgentSea Integration
import { Agent } from '@lov3kaizen/agentsea-core';
import { GuardrailsMiddleware, GuardedAgent } from '@lov3kaizen/agentsea-guardrails/agentsea';
// Middleware approach
const agent = new Agent({ /* config */ });
agent.use(new GuardrailsMiddleware(guardrailsConfig));
// Wrapper approach
const guardedAgent = new GuardedAgent(agent, guardrailsEngine);
const response = await guardedAgent.run('User message');Configuration
interface GuardrailsConfig {
// Array of guard configurations
guards: GuardConfig[];
// How to handle failures
// - 'fail-fast': Stop on first failure
// - 'fail-safe': Continue with warnings
// - 'collect-all': Run all guards, collect results
failureMode: 'fail-fast' | 'fail-safe' | 'collect-all';
// Default action when no guard blocks
defaultAction: 'allow' | 'block' | 'warn';
// Telemetry settings
telemetry?: {
logging?: { enabled: boolean; level: string };
metrics?: { enabled: boolean; prefix: string };
tracing?: { enabled: boolean; serviceName: string };
};
}