Massive v2 rewrite

2025-08-02 15:34:04 -05:00 · 2025-08-02 15:34:04 -05:00 · 5f1328f626
commit 5f1328f626
parent 1025f3b523
77 changed files with 28105 additions and 3542 deletions
--- a/src/utils/threat-scoring/database.ts
+++ b/src/utils/threat-scoring/database.ts
@ -0,0 +1,503 @@
+// =============================================================================
+// DATABASE OPERATIONS FOR THREAT SCORING (TypeScript)
+// =============================================================================
+
+import { Level } from 'level';
+// @ts-ignore - level-ttl doesn't have TypeScript definitions
+import ttl from 'level-ttl';
+import { rootDir } from '../../index.js';
+import { join } from 'path';
+import { Readable } from 'stream';
+import * as fs from 'fs';
+import { DB_TTL_CONFIG } from './constants.js';
+
+// Import types from the main threat scoring module
+// Local type definitions for database operations
+type ThreatFeatures = Record<string, any>;
+type AssessmentData = Record<string, any>;
+type SanitizedFeatures = Record<string, any>;
+
+// =============================================================================
+// TYPE DEFINITIONS
+// =============================================================================
+
+interface DatabaseOperation {
+  readonly type: 'put' | 'del';
+  readonly key: string;
+  readonly value?: unknown;
+}
+
+interface ThreatAssessment {
+  readonly score: number;
+  readonly action: 'allow' | 'challenge' | 'block';
+  readonly features: Record<string, unknown>;
+  readonly scoreComponents: Record<string, number>;
+  readonly confidence: number;
+  readonly timestamp: number;
+}
+
+interface BehaviorData {
+  readonly lastScore: number;
+  readonly lastSeen: number;
+  readonly features: Record<string, unknown>;
+  readonly requestCount: number;
+}
+
+interface ReputationData {
+  score: number;
+  incidents: number;
+  blacklisted: boolean;
+  tags: string[];
+  notes?: string;
+  firstSeen?: number;
+  lastUpdate: number;
+  source: 'static_migration' | 'dynamic' | 'manual';
+  migrated?: boolean;
+}
+
+interface RequestHistoryEntry {
+  readonly timestamp: number;
+  readonly method?: string;
+  readonly path?: string;
+  readonly userAgent?: string;
+  readonly score?: number;
+}
+
+interface MigrationRecord {
+  readonly completed: number;
+  readonly count: number;
+}
+
+interface StaticReputationEntry {
+  readonly score?: number;
+  readonly incidents?: number;
+  readonly blacklisted?: boolean;
+  readonly tags?: readonly string[];
+  readonly notes?: string;
+}
+
+interface LevelDatabase {
+  put(key: string, value: unknown): Promise<void>;
+  get(key: string): Promise<unknown>;
+  del(key: string): Promise<void>;
+  batch(operations: readonly DatabaseOperation[]): Promise<void>;
+  createReadStream(options?: DatabaseStreamOptions): AsyncIterable<DatabaseEntry>;
+  iterator(options?: DatabaseStreamOptions): AsyncIterable<[string, unknown]>;
+}
+
+interface DatabaseStreamOptions {
+  readonly gte?: string;
+  readonly lte?: string;
+  readonly limit?: number;
+  readonly reverse?: boolean;
+}
+
+interface DatabaseEntry {
+  readonly key: string;
+  readonly value: unknown;
+}
+
+type SanitizeFeaturesFunction = (features: Record<string, unknown> | ThreatFeatures) => SanitizedFeatures;
+
+// =============================================================================
+// DATABASE INITIALIZATION
+// =============================================================================
+
+// Database paths
+const threatDBPath = join(rootDir, 'db', 'threats');
+const behaviorDBPath = join(rootDir, 'db', 'behavior');
+
+// Ensure database directories exist
+fs.mkdirSync(threatDBPath, { recursive: true });
+fs.mkdirSync(behaviorDBPath, { recursive: true });
+
+// Add read stream support for LevelDB
+function addReadStreamSupport(dbInstance: any): LevelDatabase {
+  if (!dbInstance.createReadStream) {
+    dbInstance.createReadStream = (opts?: DatabaseStreamOptions): AsyncIterable<DatabaseEntry> => 
+      Readable.from((async function* () {
+        for await (const [key, value] of dbInstance.iterator(opts)) {
+          yield { key, value };
+        }
+      })());
+  }
+  return dbInstance as LevelDatabase;
+}
+
+// Initialize databases with proper TTL and stream support
+const rawThreatDB = addReadStreamSupport(new Level(threatDBPath, { valueEncoding: 'json' }));
+export const threatDB: LevelDatabase = addReadStreamSupport(
+  ttl(rawThreatDB, { defaultTTL: DB_TTL_CONFIG.THREAT_DB_TTL })
+);
+
+const rawBehaviorDB = addReadStreamSupport(new Level(behaviorDBPath, { valueEncoding: 'json' }));
+export const behaviorDB: LevelDatabase = addReadStreamSupport(
+  ttl(rawBehaviorDB, { defaultTTL: DB_TTL_CONFIG.BEHAVIOR_DB_TTL })
+);
+
+// =============================================================================
+// DATABASE OPERATIONS
+// =============================================================================
+
+/**
+ * Stores a threat assessment in the database with automatic TTL
+ * @param clientIP - The IP address being assessed
+ * @param assessment - The threat assessment data
+ */
+export async function storeAssessment(clientIP: string, assessment: ThreatAssessment | AssessmentData): Promise<void> {
+  try {
+    // Input validation
+    if (!clientIP || typeof clientIP !== 'string') {
+      throw new Error('Invalid client IP provided');
+    }
+    
+    if (!assessment || typeof assessment !== 'object') {
+      throw new Error('Invalid assessment data provided');
+    }
+
+    const key = `assessment:${clientIP}:${Date.now()}`;
+    
+    // Store assessment with TTL to prevent unbounded growth
+    await threatDB.put(key, assessment);
+  } catch (err) {
+    const error = err as Error;
+    // CRITICAL: Database errors should not crash the threat scorer
+    // Log the error but continue processing - the system can function without
+    // storing assessments, though learning capabilities will be reduced
+    console.error('Failed to store threat assessment:', error.message);
+  }
+}
+
+/**
+ * Updates behavioral models based on observed client behavior
+ * @param clientIP - The IP address to update
+ * @param features - Extracted threat features
+ * @param score - Calculated threat score
+ * @param sanitizeFeatures - Function to sanitize features for storage
+ */
+export async function updateBehavioralModels(
+  clientIP: string,
+  features: Record<string, unknown> | ThreatFeatures,
+  score: number,
+  sanitizeFeatures: SanitizeFeaturesFunction
+): Promise<void> {
+  try {
+    // Input validation
+    if (!clientIP || typeof clientIP !== 'string') {
+      throw new Error('Invalid client IP provided');
+    }
+    
+    if (typeof score !== 'number' || score < 0 || score > 100) {
+      throw new Error('Invalid threat score provided');
+    }
+
+    // Batch database operations for better performance
+    const operations: DatabaseOperation[] = [];
+
+    // Update IP behavior history
+    const behaviorKey = `behavior:${clientIP}`;
+    const existingBehavior = await getBehaviorData(clientIP);
+    
+    const behaviorData: BehaviorData = {
+      lastScore: score,
+      lastSeen: Date.now(),
+      features: sanitizeFeatures(features) as unknown as Record<string, unknown>,
+      requestCount: (existingBehavior?.requestCount || 0) + 1
+    };
+
+    operations.push({
+      type: 'put',
+      key: behaviorKey,
+      value: behaviorData
+    });
+
+    // Update reputation based on observed behavior (automatic reputation management)
+    await updateIPReputation(clientIP, score, features as ThreatFeatures, operations);
+
+    // Execute batch operation if we have operations to perform
+    if (operations.length > 0) {
+      await behaviorDB.batch(operations);
+    }
+  } catch (err) {
+    const error = err as Error;
+    // Log but don't throw - behavioral model updates shouldn't crash the system
+    console.error('Failed to update behavioral models:', error.message);
+  }
+}
+
+/**
+ * Automatic IP reputation management based on observed behavior
+ * @param clientIP - The IP address to update
+ * @param score - Current threat score
+ * @param features - Threat features detected
+ * @param operations - Array to append database operations to
+ */
+export async function updateIPReputation(
+  clientIP: string,
+  score: number,
+  features: ThreatFeatures,
+  operations: DatabaseOperation[]
+): Promise<void> {
+  try {
+    const currentRep: ReputationData = await getReputationData(clientIP) || {
+      score: 0,
+      incidents: 0,
+      blacklisted: false,
+      tags: [],
+      firstSeen: Date.now(),
+      lastUpdate: Date.now(),
+      source: 'dynamic'
+    };
+
+    let reputationChanged = false;
+    const now = Date.now();
+
+    // Automatic reputation scoring based on behavior
+    if (score >= 90) {
+      // Critical threat - significant reputation penalty
+      currentRep.score = Math.min(100, currentRep.score + 25);
+      currentRep.incidents += 1;
+      currentRep.tags = Array.from(new Set([...currentRep.tags, 'critical_threat']));
+      reputationChanged = true;
+    } else if (score >= 75) {
+      // High threat - moderate reputation penalty  
+      currentRep.score = Math.min(100, currentRep.score + 15);
+      currentRep.incidents += 1;
+      currentRep.tags = Array.from(new Set([...currentRep.tags, 'high_threat']));
+      reputationChanged = true;
+    } else if (score >= 50) {
+      // Medium threat - small reputation penalty
+      currentRep.score = Math.min(100, currentRep.score + 5);
+      currentRep.tags = Array.from(new Set([...currentRep.tags, 'medium_threat']));
+      reputationChanged = true;
+    } else if (score <= 10) {
+      // Very low threat - slowly improve reputation for good behavior
+      currentRep.score = Math.max(0, currentRep.score - 1);
+      if (currentRep.score === 0) {
+        currentRep.tags = currentRep.tags.filter(tag => !tag.includes('threat'));
+      }
+      reputationChanged = true;
+    }
+
+    // Add specific behavior tags for detailed tracking
+    if (features.userAgent?.isAttackTool) {
+      currentRep.tags = Array.from(new Set([...currentRep.tags, 'attack_tool']));
+      currentRep.score = Math.min(100, currentRep.score + 20);
+      reputationChanged = true;
+    }
+
+    if (features.pattern?.patternAnomalies?.includes('enumeration_detected')) {
+      currentRep.tags = Array.from(new Set([...currentRep.tags, 'enumeration']));
+      currentRep.score = Math.min(100, currentRep.score + 10);
+      reputationChanged = true;
+    }
+
+    if (features.pattern?.patternAnomalies?.includes('bruteforce_detected')) {
+      currentRep.tags = Array.from(new Set([...currentRep.tags, 'bruteforce']));
+      currentRep.score = Math.min(100, currentRep.score + 15);
+      reputationChanged = true;
+    }
+
+    if (features.velocity?.impossibleTravel) {
+      currentRep.tags = Array.from(new Set([...currentRep.tags, 'impossible_travel']));
+      currentRep.score = Math.min(100, currentRep.score + 12);
+      reputationChanged = true;
+    }
+
+    // Automatic blacklisting for consistently bad actors
+    if (currentRep.score >= 80 && currentRep.incidents >= 5) {
+      currentRep.blacklisted = true;
+      currentRep.tags = Array.from(new Set([...currentRep.tags, 'auto_blacklisted']));
+      reputationChanged = true;
+      console.log(`Threat scorer: Auto-blacklisted ${clientIP} (score: ${currentRep.score}, incidents: ${currentRep.incidents})`);
+    }
+
+    // Automatic reputation decay over time (good IPs recover slowly)
+    const daysSinceLastUpdate = (now - currentRep.lastUpdate) / (1000 * 60 * 60 * 24);
+    if (daysSinceLastUpdate > 7 && currentRep.score > 0) {
+      // Decay reputation by 1 point per week for inactive IPs
+      const decayAmount = Math.floor(daysSinceLastUpdate / 7);
+      currentRep.score = Math.max(0, currentRep.score - decayAmount);
+      if (currentRep.score < 50) {
+        currentRep.blacklisted = false; // Unblacklist if score drops
+      }
+      reputationChanged = true;
+    }
+
+    // Only update database if reputation actually changed
+    if (reputationChanged) {
+      currentRep.lastUpdate = now;
+      operations.push({
+        type: 'put',
+        key: `reputation:${clientIP}`,
+        value: currentRep
+      });
+      
+      console.log(`Threat scorer: Updated reputation for ${clientIP}: score=${currentRep.score}, incidents=${currentRep.incidents}, tags=[${currentRep.tags.join(', ')}]`);
+    }
+  } catch (err) {
+    const error = err as Error;
+    console.error('Failed to update IP reputation:', error.message);
+  }
+}
+
+// =============================================================================
+// HELPER METHODS
+// =============================================================================
+
+/**
+ * Retrieves behavioral data for a specific IP address
+ * @param clientIP - The IP address to look up
+ * @returns Behavioral data or null if not found
+ */
+export async function getBehaviorData(clientIP: string): Promise<BehaviorData | null> {
+  try {
+    if (!clientIP || typeof clientIP !== 'string') {
+      return null;
+    }
+    
+    const data = await behaviorDB.get(`behavior:${clientIP}`);
+    return data as BehaviorData;
+  } catch (err) {
+    return null; // Key doesn't exist or database error
+  }
+}
+
+/**
+ * Retrieves reputation data for a specific IP address
+ * @param clientIP - The IP address to look up
+ * @returns Reputation data or null if not found
+ */
+export async function getReputationData(clientIP: string): Promise<ReputationData | null> {
+  try {
+    if (!clientIP || typeof clientIP !== 'string') {
+      return null;
+    }
+    
+    const data = await threatDB.get(`reputation:${clientIP}`);
+    return data as ReputationData;
+  } catch (err) {
+    return null; // Key doesn't exist or database error
+  }
+}
+
+/**
+ * Gets request history from database within a specific time window
+ * @param ip - The IP address to get history for
+ * @param timeWindow - Time window in milliseconds
+ * @returns Array of request history entries
+ */
+export async function getRequestHistory(ip: string, timeWindow: number): Promise<RequestHistoryEntry[]> {
+  const history: RequestHistoryEntry[] = [];
+  
+  // Input validation
+  if (!ip || typeof ip !== 'string') {
+    return history;
+  }
+  
+  if (typeof timeWindow !== 'number' || timeWindow <= 0) {
+    return history;
+  }
+
+  const cutoff = Date.now() - timeWindow;
+
+  try {
+    // Get from database
+    const stream = threatDB.createReadStream({
+      gte: `request:${ip}:${cutoff}`,
+      lte: `request:${ip}:${Date.now()}`,
+      limit: 1000
+    });
+
+    for await (const { value } of stream) {
+      const entry = value as RequestHistoryEntry;
+      if (entry.timestamp && entry.timestamp > cutoff) {
+        history.push(entry);
+      }
+    }
+  } catch (err) {
+    const error = err as Error;
+    console.warn('Failed to get request history:', error.message);
+  }
+
+  return history;
+}
+
+/**
+ * One-time migration of static IP reputation data to database
+ * Safely migrates existing JSON reputation data to the new database format
+ */
+export async function migrateStaticReputationData(): Promise<void> {
+  try {
+    const ipReputationPath = join(rootDir, 'data', 'ip-reputation.json');
+    
+    if (!fs.existsSync(ipReputationPath)) {
+      return;
+    }
+
+    // Check if we've already migrated
+    const migrationKey = 'reputation:migration:completed';
+    try {
+      await threatDB.get(migrationKey);
+      return; // Already migrated
+    } catch (err) {
+      // Not migrated yet, proceed
+    }
+
+    console.log('Threat scorer: Migrating static IP reputation data to database...');
+    
+    const staticDataRaw = fs.readFileSync(ipReputationPath, 'utf8');
+    const staticData = JSON.parse(staticDataRaw) as Record<string, StaticReputationEntry>;
+    const operations: DatabaseOperation[] = [];
+
+    for (const [ip, repData] of Object.entries(staticData)) {
+      // Validate IP format (basic validation)
+      if (!ip || typeof ip !== 'string') {
+        console.warn(`Skipping invalid IP during migration: ${ip}`);
+        continue;
+      }
+
+      const migratedData: ReputationData = {
+        score: repData.score || 0,
+        incidents: repData.incidents || 0,
+        blacklisted: repData.blacklisted || false,
+        tags: Array.isArray(repData.tags) ? [...repData.tags] : [],
+        notes: repData.notes || '',
+        lastUpdate: Date.now(),
+        source: 'static_migration',
+        migrated: true
+      };
+
+      operations.push({
+        type: 'put',
+        key: `reputation:${ip}`,
+        value: migratedData
+      });
+    }
+
+    // Mark migration as complete
+    const migrationRecord: MigrationRecord = {
+      completed: Date.now(),
+      count: operations.length
+    };
+
+    operations.push({
+      type: 'put',
+      key: migrationKey,
+      value: migrationRecord
+    });
+
+    if (operations.length > 1) {
+      await threatDB.batch(operations);
+      console.log(`Threat scorer: Migrated ${operations.length - 1} IP reputation records to database`);
+
+      // Optionally archive the static file
+      const archivePath = ipReputationPath + '.migrated';
+      fs.renameSync(ipReputationPath, archivePath);
+      console.log(`Threat scorer: Static IP reputation file archived to ${archivePath}`);
+    }
+  } catch (err) {
+    const error = err as Error;
+    console.error('Failed to migrate static IP reputation data:', error.message);
+  }
+}