johnzfitch · Copilot · Mar 15, 2026 · Mar 15, 2026 · Mar 15, 2026 · Mar 16, 2026
diff --git a/stubs/cowork/credential_classifier.js b/stubs/cowork/credential_classifier.js
@@ -1,5 +1,27 @@
 'use strict';
 
+// ============================================================================
+// CREDENTIAL CLASSIFIER
+// ============================================================================
+// This module protects user privacy by detecting and redacting credentials
+// from logs, traces, and IPC messages. It's a critical security component
+// that prevents accidental leakage of API keys, tokens, and passwords.
+//
+// PRIVACY PROTECTION:
+//   - Redacts OAuth tokens before logging (complies with Anthropic AUP)
+//   - Prevents API keys from appearing in debug output
+//   - Protects session cookies and authentication headers
+//
+// DETECTION METHODS:
+//   1. Known token prefixes (sk-ant-, eyJ, ghp_, etc.)
+//   2. Shannon entropy analysis for high-randomness strings
+//   3. Environment variable key patterns (TOKEN, SECRET, PASSWORD, etc.)
+//   4. HTTP header patterns (Authorization, Cookie)
+//
+// Used throughout the codebase wherever logging or tracing occurs.
+
+// Known token prefixes for various services
+// These are used to quickly identify credentials without entropy analysis
 const TOKEN_PREFIXES = [
   { prefix: 'sk-ant-sid',  label: 'anthropic-session-key' },
   { prefix: 'sk-ant-',     label: 'anthropic-api-key' },
@@ -14,13 +36,30 @@ const TOKEN_PREFIXES = [
   { prefix: 'sk-proj-',    label: 'openai-project-key' },
 ];
 
+// Entropy threshold for detecting high-randomness strings (likely secrets)
+// Shannon entropy measures the randomness of a string - high entropy
+// indicates a cryptographically random value (like an API key or token)
 const HIGH_ENTROPY_THRESHOLD = 3.5;
 const MIN_SECRET_LENGTH = 16;
 
 function shannonEntropy(str) {
+  // Calculate Shannon entropy to measure string randomness.
+  // High entropy (>3.5) indicates a cryptographically random value.
+  // 
+  // Formula: H(X) = -Σ p(x) * log₂(p(x))
+  // where p(x) is the probability of character x
+  //
+  // Example entropies:
+  //   "aaaaaaaa" = 0 (no randomness)
+  //   "abcdefgh" = 3.0 (some variety)
+  //   "xK9mP2qL" = 3.8 (high randomness, likely a token)
   if (typeof str !== 'string' || str.length === 0) return 0;
+
+  // Count character frequencies
   const freq = {};
   for (const ch of str) freq[ch] = (freq[ch] || 0) + 1;
+
+  // Calculate entropy
   const len = str.length;
   let entropy = 0;
   for (const count of Object.values(freq)) {
@@ -31,46 +70,75 @@ function shannonEntropy(str) {
 }
 
 function isLikelyCredentialValue(value) {
+  // Detect if a value looks like a credential based on:
+  //   1. Known token prefixes (sk-ant-, eyJ, ghp_, etc.)
+  //   2. High entropy + sufficient length (cryptographic randomness)
   if (typeof value !== 'string' || value.length < MIN_SECRET_LENGTH) return false;
   if (TOKEN_PREFIXES.some(t => value.startsWith(t.prefix))) return true;
   if (value.length >= 20 && shannonEntropy(value) >= HIGH_ENTROPY_THRESHOLD) return true;
   return false;
 }
 
 function isLikelyCredentialKey(key) {
+  // Detect if an environment variable or object key name suggests credentials.
+  // Looks for patterns like: TOKEN, SECRET, PASSWORD, API_KEY, etc.
+  // Excludes safe environment variables like PATH, HOME, etc.
   if (typeof key !== 'string') return false;
   return /token|secret|key|credential|auth|password|cookie/i.test(key)
     && !/^(PATH|HOME|USER|SHELL|TERM|LANG|NODE_ENV)$/i.test(key);
 }
 
 function classifyEnvEntry(key, value) {
+  // Classify an environment variable as safe, suspect, or credential.
+  // Returns: 'credential' (definitely sensitive), 'suspect' (possibly sensitive), 'safe'
+  //
+  // Used to determine logging behavior:
+  //   - 'credential': Always redact, log as [REDACTED]
+  //   - 'suspect': Redact if suspicious key + non-trivial value
+  //   - 'safe': Log as-is
   if (isLikelyCredentialValue(value)) return 'credential';
   if (isLikelyCredentialKey(key) && typeof value === 'string' && value.length > 8) return 'suspect';
   return 'safe';
 }
 
 function redactCredentials(text) {
+  // PRIVACY PROTECTION: Redact credentials from any text before logging.
+  // This is the primary defense against credential leakage in logs and traces.
+  //
+  // Handles multiple formats:
+  //   1. Environment variables: ANTHROPIC_API_KEY=sk-ant-123... → ANTHROPIC_API_KEY=[REDACTED]
+  //   2. JSON objects: {"token": "eyJ..."} → {"token": "[REDACTED]"}
+  //   3. HTTP headers: Authorization: Bearer sk-ant-123 → Authorization: Bearer [REDACTED]
+  //   4. Bare tokens: sk-ant-sid123456... → [REDACTED]
+  //
+  // IMPORTANT: This function is called on ALL trace() and log() output to ensure
+  // OAuth tokens never appear in logs (required by Anthropic Acceptable Use Policy).
   let result = String(text);
-  // 1. Env-var-style: KEY=value
+
+  // Pattern 1: Environment variable style (KEY=value)
   result = result.replace(/([A-Z_][A-Z0-9_]*=)([^\s&"]+)/g, (match, prefix, value) => {
     const key = prefix.slice(0, -1);
     if (classifyEnvEntry(key, value) !== 'safe') return prefix + '[REDACTED]';
     return match;
   });
-  // 2. JSON-style: "key": "value"
+
+  // Pattern 2: JSON style ("key": "value")
   result = result.replace(/("[^"]*"\s*:\s*")([^"]+)(")/g, (match, pre, value, post) => {
     const key = pre.match(/"([^"]*)"/)?.[1] || '';
     if (classifyEnvEntry(key, value) !== 'safe') return pre + '[REDACTED]' + post;
     return match;
   });
-  // 3. HTTP headers
+
+  // Pattern 3: HTTP Authorization and Cookie headers
   result = result.replace(/(Authorization:\s*(?:Bearer\s+|Basic\s+))([^\s\r\n]+)/gi, '$1[REDACTED]');
   result = result.replace(/(Cookie:\s*)([^\r\n]+)/gi, '$1[REDACTED]');
-  // 4. Bare high-entropy tokens
+
+  // Pattern 4: Bare high-entropy tokens (last resort catch-all)
   result = result.replace(/\b([A-Za-z0-9_-]{32,})\b/g, (match) => {
     if (isLikelyCredentialValue(match)) return '[REDACTED]';
     return match;
   });
+
   return result;
 }
 

diff --git a/stubs/cowork/dirs.js b/stubs/cowork/dirs.js
@@ -2,14 +2,41 @@ const os = require('os');
 const path = require('path');
 const fs = require('fs');
 
+// ============================================================================
+// DIRECTORY STRUCTURE & PATH MANAGEMENT
+// ============================================================================
+// This module provides XDG-compliant directory paths for Linux and handles
+// VM-to-host path translation. It's the foundation for all file operations
+// in the cowork system.
+//
+// XDG BASE DIRECTORY SPECIFICATION:
+//   - XDG_CONFIG_HOME: ~/.config (application configuration)
+//   - XDG_DATA_HOME: ~/.local/share (application data)
+//   - XDG_CACHE_HOME: ~/.cache (disposable cache files)
+//   - XDG_STATE_HOME: ~/.local/state (persistent state/logs)
+//
+// PATH TRANSLATION:
+//   VM path:   /sessions/<name>/mnt/.claude
+//   Host path: ~/.config/Claude/local-agent-mode-sessions/sessions/<name>/mnt/.claude
+//
+// SECURITY:
+//   - Path traversal protection (blocks ../ patterns)
+//   - Validates all paths stay within sessions base directory
+//   - Canonicalization to resolve symlinks safely
+
 function resolveAbsoluteDirectory(value, fallbackPath) {
+  // Resolve directory path with fallback if value is not a valid absolute path.
+  // Used for XDG environment variables that may not be set.
   if (typeof value === 'string' && value.trim() && path.isAbsolute(value)) {
     return path.resolve(value);
   }
   return path.resolve(fallbackPath);
 }
 
 function getCoworkSessionDataDir(dirs, localSessionId) {
+  // Get the data directory for a specific session.
+  // Path: ~/.local/share/claude-cowork/sessions/<sessionId>
+  // Used for persistent session data (file registry, etc.)
   if (!dirs || typeof dirs !== 'object') {
     return null;
   }
@@ -20,6 +47,9 @@ function getCoworkSessionDataDir(dirs, localSessionId) {
 }
 
 function getCoworkSessionStateDir(dirs, localSessionId) {
+  // Get the state directory for a specific session.
+  // Path: ~/.local/state/claude-cowork/sessions/<sessionId>
+  // Used for runtime state (watch state, ephemeral data)
   if (!dirs || typeof dirs !== 'object') {
     return null;
   }
@@ -30,34 +60,52 @@ function getCoworkSessionStateDir(dirs, localSessionId) {
 }
 
 function getSessionFileRegistryPath(dirs, localSessionId) {
+  // Get path to session's file registry (files.jsonl).
+  // This JSONL file tracks all files the session has accessed.
   const sessionDir = getCoworkSessionDataDir(dirs, localSessionId);
   return sessionDir ? path.join(sessionDir, 'files.jsonl') : null;
 }
 
 function getSessionWatchStatePath(dirs, localSessionId) {
+  // Get path to session's file watch state (watch-state.json).
+  // Tracks which files are being watched for changes.
   const sessionDir = getCoworkSessionStateDir(dirs, localSessionId);
   return sessionDir ? path.join(sessionDir, 'watch-state.json') : null;
 }
 
 function createDirs(options) {
+  // Create and return all directory paths used by claude-cowork-linux.
+  // Follows XDG Base Directory Specification for Linux compliance.
+  //
+  // Directory structure:
+  //   Config:  ~/.config/Claude (app settings, session metadata)
+  //   Data:    ~/.local/share/claude-cowork (persistent data)
+  //   Cache:   ~/.cache/claude-cowork (disposable cache)
+  //   State:   ~/.local/state/claude-cowork (logs, runtime state)
+  //
+  // Legacy compatibility: Also includes macOS paths for migration support.
   const env = options && options.env && typeof options.env === 'object' ? options.env : process.env;
   const homeDir = options && typeof options.homeDir === 'string' && options.homeDir.trim()
     ? path.resolve(options.homeDir)
     : os.homedir();
 
+  // Resolve XDG paths with fallbacks to Linux defaults
   const xdgConfigHome = resolveAbsoluteDirectory(env.XDG_CONFIG_HOME, path.join(homeDir, '.config'));
   const xdgDataHome = resolveAbsoluteDirectory(env.XDG_DATA_HOME, path.join(homeDir, '.local', 'share'));
   const xdgCacheHome = resolveAbsoluteDirectory(env.XDG_CACHE_HOME, path.join(homeDir, '.cache'));
   const xdgStateHome = resolveAbsoluteDirectory(env.XDG_STATE_HOME, path.join(homeDir, '.local', 'state'));
   const xdgRuntimeDir = resolveAbsoluteDirectory(env.XDG_RUNTIME_DIR, path.join(xdgStateHome, 'runtime'));
 
+  // Legacy macOS path for migration/compatibility
   const legacyClaudeAppSupportRoot = path.join(homeDir, 'Library', 'Application Support', 'Claude');
 
+  // Claude Desktop paths (shared between macOS and Linux builds)
   const claudeConfigRoot = path.join(xdgConfigHome, 'Claude');
   const claudeLogsDir = path.join(claudeConfigRoot, 'logs');
   const claudeLocalAgentRoot = path.join(claudeConfigRoot, 'local-agent-mode-sessions');
   const claudeVmBundlesDir = path.join(claudeConfigRoot, 'vm_bundles');
 
+  // Cowork-specific paths (Linux-specific extensions)
   const coworkConfigRoot = path.join(xdgConfigHome, 'claude-cowork');
   const coworkDataRoot = path.join(xdgDataHome, 'claude-cowork');
   const coworkCacheRoot = path.join(xdgCacheHome, 'claude-cowork');
@@ -97,11 +145,29 @@ function createDirs(options) {
 }
 
 function isPathSafe(basePath, targetPath) {
+  // SECURITY: Check if targetPath stays within basePath (path traversal protection).
+  // Returns false if targetPath contains ../ patterns that escape basePath.
+  // 
+  // Example:
+  //   isPathSafe('/sessions', 'user/project') → true
+  //   isPathSafe('/sessions', '../etc/passwd') → false
   const resolved = path.resolve(basePath, targetPath);
   return resolved.startsWith(path.resolve(basePath) + path.sep) || resolved === path.resolve(basePath);
 }
 
 function translateVmPathStrict(sessionsBase, vmPath) {
+  // SECURITY: Translate VM path to host path with strict validation.
+  // VM paths start with /sessions/ and must be translated to the real
+  // sessions directory on the Linux host.
+  //
+  // Translation example:
+  //   /sessions/demo/mnt/.claude
+  //     → ~/.config/Claude/local-agent-mode-sessions/sessions/demo/mnt/.claude
+  //
+  // SECURITY CHECKS:
+  //   - Validates path starts with /sessions/
+  //   - Blocks path traversal attempts (../)
+  //   - Ensures result stays within sessions base directory
   if (typeof vmPath !== 'string' || !vmPath.startsWith('/sessions/')) {
     throw new Error('Not a VM path: ' + vmPath);
   }
@@ -113,6 +179,11 @@ function translateVmPathStrict(sessionsBase, vmPath) {
 }
 
 function canonicalizeHostPath(hostPath) {
+  // Resolve symlinks in a host path to get the canonical absolute path.
+  // Falls back to partial resolution if intermediate directories don't exist.
+  //
+  // Example:
+  //   /home/user/project → /home/realuser/project (if /home/user is a symlink)
   if (typeof hostPath !== 'string') {
     return hostPath;
   }
@@ -122,9 +193,12 @@ function canonicalizeHostPath(hostPath) {
   if (!path.isAbsolute(hostPath)) {
     return hostPath;
   }
+
+  // Try to resolve the full path
   try {
     return fs.realpathSync(hostPath);
   } catch (_) {
+    // If full path doesn't exist, resolve as much as possible
     const segments = [];
     let current = path.dirname(hostPath);
     segments.push(path.basename(hostPath));
@@ -141,10 +215,17 @@ function canonicalizeHostPath(hostPath) {
 }
 
 function canonicalizeVmPathStrict(sessionsBase, vmPath) {
+  // Translate VM path to host path, then canonicalize it.
+  // Combines translateVmPathStrict() + canonicalizeHostPath().
   return canonicalizeHostPath(translateVmPathStrict(sessionsBase, vmPath));
 }
 
 function canonicalizePathForHostAccess(sessionsBase, inputPath) {
+  // Canonicalize any path (VM or host) for host filesystem access.
+  // - If path starts with /sessions/, translate it first
+  // - Otherwise, canonicalize it as a host path
+  //
+  // This is the main entry point for path resolution throughout the codebase.
   if (typeof inputPath === 'string' && inputPath.startsWith('/sessions/')) {
     return canonicalizeVmPathStrict(sessionsBase, inputPath);
   }