@@ -9,6 +9,24 @@ use std::path::PathBuf;
99const LOCAL_CONFIG_FILE : & str = ".sofos/config.local.toml" ;
1010const GLOBAL_CONFIG_FILE : & str = ".sofos/config.toml" ;
1111
12+ /// POSIX shell control keywords that punctuate compound commands but
13+ /// don't introduce one of their own. Stripped from segment heads so the
14+ /// next non-keyword token is the base command we actually want to inspect.
15+ const COMPOUND_KEYWORDS : & [ & str ] = & [
16+ "do" , "done" , "then" , "else" , "elif" , "fi" , "case" , "esac" , "{" , "}" , "(" ,
17+ ] ;
18+
19+ /// Loop / conditional headers whose tail *is* a real command —
20+ /// `while CMD; do ...`, `until CMD; do ...`, `if CMD; then ...`. The
21+ /// keyword itself is skipped, and the rest of the segment is parsed
22+ /// like any other command.
23+ const COMPOUND_HEADERS_WITH_BODY : & [ & str ] = & [ "while" , "until" , "if" ] ;
24+
25+ /// Loop headers whose segment is a word list, *not* a command —
26+ /// `for VAR in WORDS`. Matching one means the segment carries no
27+ /// base command at all and should yield no entry.
28+ const COMPOUND_HEADERS_NO_BODY : & [ & str ] = & [ "for" ] ;
29+
1230/// Match POSIX-shell `KEY=value` assignment tokens: the key starts with
1331/// a letter or underscore, is alphanumeric+`_` throughout, and is
1432/// followed by `=`. Used by `extract_base_command` to skip leading
@@ -745,14 +763,37 @@ impl PermissionManager {
745763 return Ok ( CommandPermission :: Denied ) ;
746764 }
747765
748- if self . allowed_commands . contains ( base_command) {
749- return Ok ( CommandPermission :: Allowed ) ;
750- }
766+ // Walk every sub-command in a compound shell (`for ...; do ...; done`,
767+ // `cmd1 && cmd2`, `cmd1; cmd2 | cmd3`) so the verdict reflects the
768+ // whole pipeline, not just the first token. Two reasons:
769+ //
770+ // 1. `for f in *; do echo $f; sed -n '1,N'p $f; done` leads with the
771+ // structural keyword `for`, which isn't in `allowed_commands` —
772+ // the old single-token lookup forced a prompt even though every
773+ // real step is read-only. Bases pulled from the compound let the
774+ // same shell auto-allow.
775+ // 2. `cat foo && rm bar` used to slip past as Allowed because `cat`
776+ // is on the allow-list; the smuggled `rm` was never seen. Any
777+ // forbidden base anywhere in the pipeline now wins.
778+ //
779+ // The splitter does NOT descend into `$(...)` / backticks, so a
780+ // command smuggled there is still seen as part of the parent
781+ // command's args (same blind spot as before this change).
782+ let compound_bases = Self :: enumerate_compound_bases ( command) ;
783+ let bases: Vec < & str > = if compound_bases. is_empty ( ) {
784+ vec ! [ base_command]
785+ } else {
786+ compound_bases. iter ( ) . map ( String :: as_str) . collect ( )
787+ } ;
751788
752- if self . forbidden_commands . contains ( base_command ) {
789+ if bases . iter ( ) . any ( | & b| self . forbidden_commands . contains ( b ) ) {
753790 return Ok ( CommandPermission :: Denied ) ;
754791 }
755792
793+ if bases. iter ( ) . all ( |& b| self . allowed_commands . contains ( b) ) {
794+ return Ok ( CommandPermission :: Allowed ) ;
795+ }
796+
756797 Ok ( CommandPermission :: Ask )
757798 }
758799
@@ -788,9 +829,110 @@ impl PermissionManager {
788829 Ok ( ( confirmed, remember) )
789830 }
790831
832+ /// Quote-aware split of a compound command on shell separators
833+ /// (`;`, `\n`, `|`, `||`, `&&`). Lone `&` is preserved so that
834+ /// `2>&1` stays glued to its preceding token. Quoted regions are
835+ /// kept whole so `echo 'a; b'` doesn't split mid-string.
836+ ///
837+ /// Does NOT descend into `$(...)` command substitution or backtick
838+ /// substitution — a `rm` smuggled inside `echo $(rm bad)` is
839+ /// invisible to this splitter (and to the rest of the permission
840+ /// system, both before and after this change). Closing that hole
841+ /// would require yielding the inner sub-commands as separate
842+ /// segments and is intentionally out of scope here.
843+ fn split_compound_command ( command : & str ) -> Vec < String > {
844+ let mut segments: Vec < String > = Vec :: new ( ) ;
845+ let mut current = String :: new ( ) ;
846+ let mut chars = command. chars ( ) . peekable ( ) ;
847+ let mut quote: Option < char > = None ;
848+
849+ while let Some ( c) = chars. next ( ) {
850+ if let Some ( q) = quote {
851+ current. push ( c) ;
852+ if c == q {
853+ quote = None ;
854+ }
855+ continue ;
856+ }
857+ match c {
858+ '\'' | '"' => {
859+ quote = Some ( c) ;
860+ current. push ( c) ;
861+ }
862+ ';' | '\n' => Self :: push_segment ( & mut segments, & mut current) ,
863+ '|' => {
864+ if chars. peek ( ) == Some ( & '|' ) {
865+ chars. next ( ) ;
866+ }
867+ Self :: push_segment ( & mut segments, & mut current) ;
868+ }
869+ '&' if chars. peek ( ) == Some ( & '&' ) => {
870+ chars. next ( ) ;
871+ Self :: push_segment ( & mut segments, & mut current) ;
872+ }
873+ _ => current. push ( c) ,
874+ }
875+ }
876+ Self :: push_segment ( & mut segments, & mut current) ;
877+ segments
878+ }
879+
880+ fn push_segment ( segments : & mut Vec < String > , current : & mut String ) {
881+ let trimmed = current. trim ( ) ;
882+ if !trimmed. is_empty ( ) {
883+ segments. push ( trimmed. to_string ( ) ) ;
884+ }
885+ current. clear ( ) ;
886+ }
887+
888+ /// Strip leading shell-control prefixes from a segment and return
889+ /// the next "real" command (base + args). Returns `None` for
890+ /// segments that carry no command of their own — `for VAR in WORDS`
891+ /// loop headers, bare `done` / `fi` / `esac` closers, or shell
892+ /// comments (`# anything`).
893+ fn extract_segment_base_with_args ( segment : & str ) -> Option < ( & str , Vec < & str > ) > {
894+ let mut tokens = segment. split_whitespace ( ) . peekable ( ) ;
895+ while let Some ( & tok) = tokens. peek ( ) {
896+ if COMPOUND_HEADERS_NO_BODY . contains ( & tok) {
897+ return None ;
898+ }
899+ if is_env_assignment ( tok)
900+ || COMPOUND_KEYWORDS . contains ( & tok)
901+ || COMPOUND_HEADERS_WITH_BODY . contains ( & tok)
902+ {
903+ tokens. next ( ) ;
904+ continue ;
905+ }
906+ break ;
907+ }
908+ let base = tokens. next ( ) ?;
909+ // A bare `#` (or token starting with `#`) at the head of a
910+ // segment marks the rest as a comment — `ls; # tail msg`
911+ // splits to `["ls", "# tail msg"]`, and the second segment is
912+ // entirely commentary, not a command. Treating it as a base
913+ // would force a needless Ask prompt for what the shell ignores.
914+ if base. starts_with ( '#' ) {
915+ return None ;
916+ }
917+ let args: Vec < & str > = tokens. collect ( ) ;
918+ Some ( ( base, args) )
919+ }
920+
921+ /// Base-command names of every sub-command in a compound shell.
922+ /// Empty for a single command with no separators (callers fall back
923+ /// to `extract_base_command` in that case).
924+ fn enumerate_compound_bases ( command : & str ) -> Vec < String > {
925+ Self :: split_compound_command ( command)
926+ . iter ( )
927+ . filter_map ( |seg| {
928+ Self :: extract_segment_base_with_args ( seg) . map ( |( base, _) | base. to_string ( ) )
929+ } )
930+ . collect ( )
931+ }
932+
791933 /// Heuristic: does `command` carry line-number / line-range args that
792- /// make the exact string un-rememberable? Checks each pipe segment
793- /// for:
934+ /// make the exact string un-rememberable? Walks every sub-command of
935+ /// a compound shell looking for:
794936 ///
795937 /// - sed numeric addresses: `sed -n '10,20p'`, `sed '5d'`, `sed 1,5q`
796938 /// - head/tail numeric counts: `head -50`, `head -n 50`, `tail +20`
@@ -800,22 +942,15 @@ impl PermissionManager {
800942 /// False positives just downgrade the prompt to Yes/No, so the
801943 /// heuristic prefers simple matches over exhaustive parsing.
802944 fn command_has_volatile_line_args ( command : & str ) -> bool {
803- command
804- . split ( '|' )
805- . any ( |segment| Self :: segment_has_volatile_line_args ( segment. trim ( ) ) )
945+ Self :: split_compound_command ( command)
946+ . iter ( )
947+ . any ( |segment| Self :: segment_has_volatile_line_args ( segment) )
806948 }
807949
808950 fn segment_has_volatile_line_args ( segment : & str ) -> bool {
809- let mut tokens = segment. split_whitespace ( ) ;
810- // Skip leading env-var assignments the same way extract_base_command does.
811- let base = loop {
812- match tokens. next ( ) {
813- Some ( t) if is_env_assignment ( t) => continue ,
814- Some ( t) => break t,
815- None => return false ,
816- }
951+ let Some ( ( base, args) ) = Self :: extract_segment_base_with_args ( segment) else {
952+ return false ;
817953 } ;
818- let args: Vec < & str > = tokens. collect ( ) ;
819954 match base {
820955 "sed" => Self :: sed_has_numeric_address ( & args) ,
821956 "head" | "tail" => Self :: head_tail_has_numeric_count ( & args) ,
@@ -2052,4 +2187,145 @@ ask = []
20522187 "ls -la src/"
20532188 ) ) ;
20542189 }
2190+
2191+ /// Volatile sed/head/tail buried inside a `for ...; do ...; done`
2192+ /// loop or behind `;` / `&&` should still be detected — the old
2193+ /// `split('|')` pass missed everything that wasn't pipe-separated.
2194+ #[ test]
2195+ fn test_volatile_line_args_inside_compound_shell ( ) {
2196+ assert ! ( PermissionManager :: command_has_volatile_line_args(
2197+ "for f in src/*.rs; do sed -n '1,320p' \" $f\" | nl -ba; done"
2198+ ) ) ;
2199+ assert ! ( PermissionManager :: command_has_volatile_line_args(
2200+ "cat README.md && head -n 50 CHANGELOG.md"
2201+ ) ) ;
2202+ assert ! ( PermissionManager :: command_has_volatile_line_args(
2203+ "echo start; tail -n 20 build.log"
2204+ ) ) ;
2205+ assert ! ( !PermissionManager :: command_has_volatile_line_args(
2206+ "for f in *.rs; do echo \" $f\" ; cat \" $f\" ; done"
2207+ ) ) ;
2208+ }
2209+
2210+ /// `;` and `&&` inside quotes must NOT split a segment — `echo 'a; b'`
2211+ /// is still a single `echo` command.
2212+ #[ test]
2213+ fn test_split_compound_command_respects_quotes ( ) {
2214+ let segs = PermissionManager :: split_compound_command ( "echo 'a; b' && ls" ) ;
2215+ assert_eq ! ( segs, vec![ "echo 'a; b'" , "ls" ] ) ;
2216+
2217+ let segs = PermissionManager :: split_compound_command ( "echo \" x | y\" | wc -l" ) ;
2218+ assert_eq ! ( segs, vec![ "echo \" x | y\" " , "wc -l" ] ) ;
2219+ }
2220+
2221+ /// Lone `&` is part of `2>&1`, not a separator. The pre-existing
2222+ /// `2>&1` allowance in `is_safe_command_structure` would be undone
2223+ /// if our splitter chopped commands at every `&`.
2224+ #[ test]
2225+ fn test_split_compound_command_keeps_stderr_redirect ( ) {
2226+ let segs = PermissionManager :: split_compound_command ( "cargo test 2>&1 | tee out.log" ) ;
2227+ assert_eq ! ( segs, vec![ "cargo test 2>&1" , "tee out.log" ] ) ;
2228+ }
2229+
2230+ /// The user-reported regression: a `for ...; do echo; sed; nl; done`
2231+ /// pipeline of read-only tools should resolve as Allowed without a
2232+ /// prompt, not get stuck on the `for` keyword.
2233+ #[ test]
2234+ fn compound_for_loop_of_allowed_commands_is_allowed ( ) {
2235+ let temp_dir = TempDir :: new ( ) . unwrap ( ) ;
2236+ let mut manager = PermissionManager :: new ( temp_dir. path ( ) . to_path_buf ( ) ) . unwrap ( ) ;
2237+
2238+ assert_eq ! (
2239+ manager
2240+ . check_command_permission(
2241+ "for f in src/recipient/*.rs src/recipient/native/*.rs; \
2242+ do echo '===== '\" $f\" ' ====='; sed -n '1,320p' \" $f\" | nl -ba; done"
2243+ )
2244+ . unwrap( ) ,
2245+ CommandPermission :: Allowed
2246+ ) ;
2247+ }
2248+
2249+ /// A forbidden base anywhere inside a compound shell wins over an
2250+ /// allowed leader. Closes the `cat foo && rm bar` smuggling hole.
2251+ #[ test]
2252+ fn compound_with_forbidden_base_is_denied ( ) {
2253+ let temp_dir = TempDir :: new ( ) . unwrap ( ) ;
2254+ let mut manager = PermissionManager :: new ( temp_dir. path ( ) . to_path_buf ( ) ) . unwrap ( ) ;
2255+
2256+ assert_eq ! (
2257+ manager
2258+ . check_command_permission( "cat foo && rm bar" )
2259+ . unwrap( ) ,
2260+ CommandPermission :: Denied
2261+ ) ;
2262+ assert_eq ! (
2263+ manager
2264+ . check_command_permission( "for f in *.rs; do rm \" $f\" ; done" )
2265+ . unwrap( ) ,
2266+ CommandPermission :: Denied
2267+ ) ;
2268+ assert_eq ! (
2269+ manager. check_command_permission( "ls; sudo whoami" ) . unwrap( ) ,
2270+ CommandPermission :: Denied
2271+ ) ;
2272+ }
2273+
2274+ /// Compound shells that include an unknown tool stay at Ask — only
2275+ /// fully-allowed pipelines auto-pass.
2276+ #[ test]
2277+ fn compound_with_unknown_base_asks ( ) {
2278+ let temp_dir = TempDir :: new ( ) . unwrap ( ) ;
2279+ let mut manager = PermissionManager :: new ( temp_dir. path ( ) . to_path_buf ( ) ) . unwrap ( ) ;
2280+
2281+ assert_eq ! (
2282+ manager
2283+ . check_command_permission( "cat foo && some_custom_tool bar" )
2284+ . unwrap( ) ,
2285+ CommandPermission :: Ask
2286+ ) ;
2287+ assert_eq ! (
2288+ manager
2289+ . check_command_permission( "for f in *; do unknown_tool \" $f\" ; done" )
2290+ . unwrap( ) ,
2291+ CommandPermission :: Ask
2292+ ) ;
2293+ }
2294+
2295+ /// `ls; # trailing comment` is shell-equivalent to plain `ls` —
2296+ /// the `#` segment is commentary, not a command, and the verdict
2297+ /// must not regress to Ask just because we now look past the head.
2298+ #[ test]
2299+ fn trailing_shell_comment_does_not_force_ask ( ) {
2300+ let temp_dir = TempDir :: new ( ) . unwrap ( ) ;
2301+ let mut manager = PermissionManager :: new ( temp_dir. path ( ) . to_path_buf ( ) ) . unwrap ( ) ;
2302+
2303+ assert_eq ! (
2304+ manager
2305+ . check_command_permission( "ls -la; # quick listing" )
2306+ . unwrap( ) ,
2307+ CommandPermission :: Allowed
2308+ ) ;
2309+ }
2310+
2311+ /// `while CMD; do CMD; done` and `if CMD; then CMD; fi` should
2312+ /// likewise be inspected sub-command by sub-command.
2313+ #[ test]
2314+ fn while_and_if_compounds_are_inspected ( ) {
2315+ let temp_dir = TempDir :: new ( ) . unwrap ( ) ;
2316+ let mut manager = PermissionManager :: new ( temp_dir. path ( ) . to_path_buf ( ) ) . unwrap ( ) ;
2317+
2318+ assert_eq ! (
2319+ manager
2320+ . check_command_permission( "if grep -q foo file.txt; then echo found; fi" )
2321+ . unwrap( ) ,
2322+ CommandPermission :: Allowed
2323+ ) ;
2324+ assert_eq ! (
2325+ manager
2326+ . check_command_permission( "if true; then rm file; fi" )
2327+ . unwrap( ) ,
2328+ CommandPermission :: Denied
2329+ ) ;
2330+ }
20552331}
0 commit comments