Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 72 additions & 4 deletions stubs/cowork/credential_classifier.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,27 @@
'use strict';

// ============================================================================
// CREDENTIAL CLASSIFIER
// ============================================================================
// This module protects user privacy by detecting and redacting credentials
// from logs, traces, and IPC messages. It's a critical security component
// that prevents accidental leakage of API keys, tokens, and passwords.
//
// PRIVACY PROTECTION:
// - Redacts OAuth tokens before logging (complies with Anthropic AUP)
// - Prevents API keys from appearing in debug output
// - Protects session cookies and authentication headers
//
// DETECTION METHODS:
// 1. Known token prefixes (sk-ant-, eyJ, ghp_, etc.)
// 2. Shannon entropy analysis for high-randomness strings
// 3. Environment variable key patterns (TOKEN, SECRET, PASSWORD, etc.)
// 4. HTTP header patterns (Authorization, Cookie)
//
// Used throughout the codebase wherever logging or tracing occurs.

// Known token prefixes for various services
// These are used to quickly identify credentials without entropy analysis
const TOKEN_PREFIXES = [
{ prefix: 'sk-ant-sid', label: 'anthropic-session-key' },
{ prefix: 'sk-ant-', label: 'anthropic-api-key' },
Expand All @@ -14,13 +36,30 @@ const TOKEN_PREFIXES = [
{ prefix: 'sk-proj-', label: 'openai-project-key' },
];

// Entropy threshold for detecting high-randomness strings (likely secrets)
// Shannon entropy measures the randomness of a string - high entropy
// indicates a cryptographically random value (like an API key or token)
const HIGH_ENTROPY_THRESHOLD = 3.5;
const MIN_SECRET_LENGTH = 16;

function shannonEntropy(str) {
// Calculate Shannon entropy to measure string randomness.
// High entropy (>3.5) indicates a cryptographically random value.
//
// Formula: H(X) = -Σ p(x) * log₂(p(x))
// where p(x) is the probability of character x
//
// Example entropies:
// "aaaaaaaa" = 0 (no randomness)
// "abcdefgh" = 3.0 (some variety)
// "xK9mP2qL" = 3.8 (high randomness, likely a token)
if (typeof str !== 'string' || str.length === 0) return 0;

// Count character frequencies
const freq = {};
for (const ch of str) freq[ch] = (freq[ch] || 0) + 1;

// Calculate entropy
const len = str.length;
let entropy = 0;
for (const count of Object.values(freq)) {
Expand All @@ -31,46 +70,75 @@ function shannonEntropy(str) {
}

function isLikelyCredentialValue(value) {
// Detect if a value looks like a credential based on:
// 1. Known token prefixes (sk-ant-, eyJ, ghp_, etc.)
// 2. High entropy + sufficient length (cryptographic randomness)
if (typeof value !== 'string' || value.length < MIN_SECRET_LENGTH) return false;
if (TOKEN_PREFIXES.some(t => value.startsWith(t.prefix))) return true;
if (value.length >= 20 && shannonEntropy(value) >= HIGH_ENTROPY_THRESHOLD) return true;
return false;
}

function isLikelyCredentialKey(key) {
// Detect if an environment variable or object key name suggests credentials.
// Looks for patterns like: TOKEN, SECRET, PASSWORD, API_KEY, etc.
// Excludes safe environment variables like PATH, HOME, etc.
if (typeof key !== 'string') return false;
return /token|secret|key|credential|auth|password|cookie/i.test(key)
&& !/^(PATH|HOME|USER|SHELL|TERM|LANG|NODE_ENV)$/i.test(key);
}

function classifyEnvEntry(key, value) {
// Classify an environment variable as safe, suspect, or credential.
// Returns: 'credential' (definitely sensitive), 'suspect' (possibly sensitive), 'safe'
//
// Used to determine logging behavior:
// - 'credential': Always redact, log as [REDACTED]
// - 'suspect': Redact if suspicious key + non-trivial value
// - 'safe': Log as-is
if (isLikelyCredentialValue(value)) return 'credential';
if (isLikelyCredentialKey(key) && typeof value === 'string' && value.length > 8) return 'suspect';
return 'safe';
}

function redactCredentials(text) {
// PRIVACY PROTECTION: Redact credentials from any text before logging.
// This is the primary defense against credential leakage in logs and traces.
//
// Handles multiple formats:
// 1. Environment variables: ANTHROPIC_API_KEY=sk-ant-123... → ANTHROPIC_API_KEY=[REDACTED]
// 2. JSON objects: {"token": "eyJ..."} → {"token": "[REDACTED]"}
// 3. HTTP headers: Authorization: Bearer sk-ant-123 → Authorization: Bearer [REDACTED]
// 4. Bare tokens: sk-ant-sid123456... → [REDACTED]
//
// IMPORTANT: This function is called on ALL trace() and log() output to ensure
// OAuth tokens never appear in logs (required by Anthropic Acceptable Use Policy).
let result = String(text);
// 1. Env-var-style: KEY=value

// Pattern 1: Environment variable style (KEY=value)
result = result.replace(/([A-Z_][A-Z0-9_]*=)([^\s&"]+)/g, (match, prefix, value) => {
const key = prefix.slice(0, -1);
if (classifyEnvEntry(key, value) !== 'safe') return prefix + '[REDACTED]';
return match;
});
// 2. JSON-style: "key": "value"

// Pattern 2: JSON style ("key": "value")
result = result.replace(/("[^"]*"\s*:\s*")([^"]+)(")/g, (match, pre, value, post) => {
const key = pre.match(/"([^"]*)"/)?.[1] || '';
if (classifyEnvEntry(key, value) !== 'safe') return pre + '[REDACTED]' + post;
return match;
});
// 3. HTTP headers

// Pattern 3: HTTP Authorization and Cookie headers
result = result.replace(/(Authorization:\s*(?:Bearer\s+|Basic\s+))([^\s\r\n]+)/gi, '$1[REDACTED]');
result = result.replace(/(Cookie:\s*)([^\r\n]+)/gi, '$1[REDACTED]');
// 4. Bare high-entropy tokens

// Pattern 4: Bare high-entropy tokens (last resort catch-all)
result = result.replace(/\b([A-Za-z0-9_-]{32,})\b/g, (match) => {
if (isLikelyCredentialValue(match)) return '[REDACTED]';
return match;
});

return result;
}

Expand Down
81 changes: 81 additions & 0 deletions stubs/cowork/dirs.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,41 @@ const os = require('os');
const path = require('path');
const fs = require('fs');

// ============================================================================
// DIRECTORY STRUCTURE & PATH MANAGEMENT
// ============================================================================
// This module provides XDG-compliant directory paths for Linux and handles
// VM-to-host path translation. It's the foundation for all file operations
// in the cowork system.
//
// XDG BASE DIRECTORY SPECIFICATION:
// - XDG_CONFIG_HOME: ~/.config (application configuration)
// - XDG_DATA_HOME: ~/.local/share (application data)
// - XDG_CACHE_HOME: ~/.cache (disposable cache files)
// - XDG_STATE_HOME: ~/.local/state (persistent state/logs)
//
// PATH TRANSLATION:
// VM path: /sessions/<name>/mnt/.claude
// Host path: ~/.config/Claude/local-agent-mode-sessions/sessions/<name>/mnt/.claude
//
// SECURITY:
// - Path traversal protection (blocks ../ patterns)
// - Validates all paths stay within sessions base directory
// - Canonicalization to resolve symlinks safely

function resolveAbsoluteDirectory(value, fallbackPath) {
// Resolve directory path with fallback if value is not a valid absolute path.
// Used for XDG environment variables that may not be set.
if (typeof value === 'string' && value.trim() && path.isAbsolute(value)) {
return path.resolve(value);
}
return path.resolve(fallbackPath);
}

function getCoworkSessionDataDir(dirs, localSessionId) {
// Get the data directory for a specific session.
// Path: ~/.local/share/claude-cowork/sessions/<sessionId>
// Used for persistent session data (file registry, etc.)
if (!dirs || typeof dirs !== 'object') {
return null;
}
Expand All @@ -20,6 +47,9 @@ function getCoworkSessionDataDir(dirs, localSessionId) {
}

function getCoworkSessionStateDir(dirs, localSessionId) {
// Get the state directory for a specific session.
// Path: ~/.local/state/claude-cowork/sessions/<sessionId>
// Used for runtime state (watch state, ephemeral data)
if (!dirs || typeof dirs !== 'object') {
return null;
}
Expand All @@ -30,34 +60,52 @@ function getCoworkSessionStateDir(dirs, localSessionId) {
}

function getSessionFileRegistryPath(dirs, localSessionId) {
// Get path to session's file registry (files.jsonl).
// This JSONL file tracks all files the session has accessed.
const sessionDir = getCoworkSessionDataDir(dirs, localSessionId);
return sessionDir ? path.join(sessionDir, 'files.jsonl') : null;
}

function getSessionWatchStatePath(dirs, localSessionId) {
// Get path to session's file watch state (watch-state.json).
// Tracks which files are being watched for changes.
const sessionDir = getCoworkSessionStateDir(dirs, localSessionId);
return sessionDir ? path.join(sessionDir, 'watch-state.json') : null;
}

function createDirs(options) {
// Create and return all directory paths used by claude-cowork-linux.
// Follows XDG Base Directory Specification for Linux compliance.
//
// Directory structure:
// Config: ~/.config/Claude (app settings, session metadata)
// Data: ~/.local/share/claude-cowork (persistent data)
// Cache: ~/.cache/claude-cowork (disposable cache)
// State: ~/.local/state/claude-cowork (logs, runtime state)
//
// Legacy compatibility: Also includes macOS paths for migration support.
const env = options && options.env && typeof options.env === 'object' ? options.env : process.env;
const homeDir = options && typeof options.homeDir === 'string' && options.homeDir.trim()
? path.resolve(options.homeDir)
: os.homedir();

// Resolve XDG paths with fallbacks to Linux defaults
const xdgConfigHome = resolveAbsoluteDirectory(env.XDG_CONFIG_HOME, path.join(homeDir, '.config'));
const xdgDataHome = resolveAbsoluteDirectory(env.XDG_DATA_HOME, path.join(homeDir, '.local', 'share'));
const xdgCacheHome = resolveAbsoluteDirectory(env.XDG_CACHE_HOME, path.join(homeDir, '.cache'));
const xdgStateHome = resolveAbsoluteDirectory(env.XDG_STATE_HOME, path.join(homeDir, '.local', 'state'));
const xdgRuntimeDir = resolveAbsoluteDirectory(env.XDG_RUNTIME_DIR, path.join(xdgStateHome, 'runtime'));

// Legacy macOS path for migration/compatibility
const legacyClaudeAppSupportRoot = path.join(homeDir, 'Library', 'Application Support', 'Claude');

// Claude Desktop paths (shared between macOS and Linux builds)
const claudeConfigRoot = path.join(xdgConfigHome, 'Claude');
const claudeLogsDir = path.join(claudeConfigRoot, 'logs');
const claudeLocalAgentRoot = path.join(claudeConfigRoot, 'local-agent-mode-sessions');
const claudeVmBundlesDir = path.join(claudeConfigRoot, 'vm_bundles');

// Cowork-specific paths (Linux-specific extensions)
const coworkConfigRoot = path.join(xdgConfigHome, 'claude-cowork');
const coworkDataRoot = path.join(xdgDataHome, 'claude-cowork');
const coworkCacheRoot = path.join(xdgCacheHome, 'claude-cowork');
Expand Down Expand Up @@ -97,11 +145,29 @@ function createDirs(options) {
}

function isPathSafe(basePath, targetPath) {
// SECURITY: Check if targetPath stays within basePath (path traversal protection).
// Returns false if targetPath contains ../ patterns that escape basePath.
//
// Example:
// isPathSafe('/sessions', 'user/project') → true
// isPathSafe('/sessions', '../etc/passwd') → false
const resolved = path.resolve(basePath, targetPath);
return resolved.startsWith(path.resolve(basePath) + path.sep) || resolved === path.resolve(basePath);
}

function translateVmPathStrict(sessionsBase, vmPath) {
// SECURITY: Translate VM path to host path with strict validation.
// VM paths start with /sessions/ and must be translated to the real
// sessions directory on the Linux host.
//
// Translation example:
// /sessions/demo/mnt/.claude
// → ~/.config/Claude/local-agent-mode-sessions/sessions/demo/mnt/.claude
//
// SECURITY CHECKS:
// - Validates path starts with /sessions/
// - Blocks path traversal attempts (../)
// - Ensures result stays within sessions base directory
if (typeof vmPath !== 'string' || !vmPath.startsWith('/sessions/')) {
throw new Error('Not a VM path: ' + vmPath);
}
Expand All @@ -113,6 +179,11 @@ function translateVmPathStrict(sessionsBase, vmPath) {
}

function canonicalizeHostPath(hostPath) {
// Resolve symlinks in a host path to get the canonical absolute path.
// Falls back to partial resolution if intermediate directories don't exist.
//
// Example:
// /home/user/project → /home/realuser/project (if /home/user is a symlink)
if (typeof hostPath !== 'string') {
return hostPath;
}
Expand All @@ -122,9 +193,12 @@ function canonicalizeHostPath(hostPath) {
if (!path.isAbsolute(hostPath)) {
return hostPath;
}

// Try to resolve the full path
try {
return fs.realpathSync(hostPath);
} catch (_) {
// If full path doesn't exist, resolve as much as possible
const segments = [];
let current = path.dirname(hostPath);
segments.push(path.basename(hostPath));
Expand All @@ -141,10 +215,17 @@ function canonicalizeHostPath(hostPath) {
}

function canonicalizeVmPathStrict(sessionsBase, vmPath) {
// Translate VM path to host path, then canonicalize it.
// Combines translateVmPathStrict() + canonicalizeHostPath().
return canonicalizeHostPath(translateVmPathStrict(sessionsBase, vmPath));
}

function canonicalizePathForHostAccess(sessionsBase, inputPath) {
// Canonicalize any path (VM or host) for host filesystem access.
// - If path starts with /sessions/, translate it first
// - Otherwise, canonicalize it as a host path
//
// This is the main entry point for path resolution throughout the codebase.
if (typeof inputPath === 'string' && inputPath.startsWith('/sessions/')) {
return canonicalizeVmPathStrict(sessionsBase, inputPath);
}
Expand Down
Loading
Loading