Skip to content

Commit 56dbaa4

Browse files
authored
[source_ref] add support for named arguments in a Rust format string
A simple first project, adding support for named arguments in a Rust format string. The SourceRef now contains more detail about the placeholders in the format string since that is where the name comes from. (The Java stuff works because tree-sitter understands the StringTemplate magic) Files: -Cargo.toml: Try using tree-sitter-rust-orchard which seems to be more up-to-date than the main one. lib.rs: Capture all strings in a macro, to make sure we don't miss anything. Add body() method to LogRef to make it easy to get the string to run the regex over. lookup_source() needs to pull expressions from the placeholder or the arguments. -log_format.rs: Make build_src_filter() return an Option that is None if there are no filters. -source_query.rs: Extract the expressions used in the macro since tree-sitter is not going to do it for us. -source_ref.rs: Use a language-specific regex for processing format strings.
2 parents 3660dec + dc7e755 commit 56dbaa4

30 files changed

+345
-321
lines changed

Cargo.lock

Lines changed: 5 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ regex = "1.11.1"
1111
serde = { version = "1.0.219", features = ["derive"] }
1212
serde_json = "1.0.140"
1313
tree-sitter = "0.25.3"
14-
tree-sitter-rust = "0.24.0"
14+
tree-sitter-rust-orchard = "0.12.0"
1515
tree-sitter-java = "0.23.5"
1616
rayon = "1.10.0"
1717

@@ -21,7 +21,7 @@ cc="*"
2121
[dev-dependencies]
2222
assert_cmd = "2.0.16"
2323
env_logger = "0.11.8"
24-
insta = { version = "1.43.1", features = ["yaml"] }
24+
insta = { version = "1.43.1", features = ["yaml", "filters"] }
2525
insta-cmd = "0.6.0"
2626
log = "0.4.27"
2727
rand = "0.9.0"

examples/basic.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,14 @@ fn main() {
77
for i in 0..3 {
88
foo(i);
99
}
10+
bar(4);
11+
baz(5, 6);
1012
}
1113

1214
fn foo(i: u32) {
1315
debug!("Hello from foo i={}", i);
1416
}
17+
18+
fn bar(j: u32) { debug!("Hello from bar j={j}"); }
19+
20+
fn baz(i: u32, j: u32) { debug!("Hello from baz i={1} j={0}", j, i); }

src/call_graph.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ fn nope(i: u32) {
8585
name: String::from("main"),
8686
text: String::from("foo"),
8787
matcher: star_regex,
88+
args: vec![],
8889
vars: vec![],
8990
};
9091
let star_regex = Regex::new(".*").unwrap();
@@ -95,6 +96,7 @@ fn nope(i: u32) {
9596
name: String::from("foo"),
9697
text: String::from("nope"),
9798
matcher: star_regex,
99+
args: vec![],
98100
vars: vec![],
99101
};
100102
assert_eq!(

src/code_source.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ impl CodeSource {
3636

3737
pub fn ts_language(&self) -> Language {
3838
match self.language {
39-
SourceLanguage::Rust => tree_sitter_rust::LANGUAGE.into(),
39+
SourceLanguage::Rust => tree_sitter_rust_orchard::LANGUAGE.into(),
4040
SourceLanguage::Java => tree_sitter_java::LANGUAGE.into(),
4141
}
4242
}

src/lib.rs

Lines changed: 78 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#[cfg(test)]
22
use regex::Regex;
33
use serde::Serialize;
4-
use std::collections::HashMap;
4+
use std::collections::BTreeMap;
55
#[cfg(test)]
66
use std::path::PathBuf;
77
#[cfg(test)]
@@ -14,6 +14,7 @@ mod source_query;
1414
mod source_ref;
1515

1616
// TODO: doesn't need to be exposed if we can clean up the arguments to do_mapping
17+
use crate::source_ref::FormatArgument;
1718
pub use call_graph::CallGraph;
1819
use call_graph::Edge;
1920
pub use code_source::CodeSource;
@@ -36,7 +37,7 @@ impl Default for Filter {
3637
}
3738
}
3839

39-
#[derive(Debug, PartialEq)]
40+
#[derive(Debug, PartialEq, Copy, Clone)]
4041
enum SourceLanguage {
4142
Rust,
4243
Java,
@@ -51,10 +52,10 @@ impl SourceLanguage {
5152
SourceLanguage::Rust => {
5253
// XXX: assumes it's a debug macro
5354
r#"
54-
(macro_invocation macro: (identifier) @macro-name
55+
(macro_invocation macro: (identifier)
5556
(token_tree
56-
(string_literal) @log (identifier)* @arguments
57-
) (#eq? @macro-name "debug")
57+
(string_literal) @log
58+
)
5859
)
5960
"#
6061
}
@@ -91,7 +92,7 @@ pub struct LogMapping<'a> {
9192
pub log_ref: LogRef<'a>,
9293
#[serde(rename(serialize = "srcRef"))]
9394
pub src_ref: Option<SourceRef>,
94-
pub variables: HashMap<String, String>,
95+
pub variables: BTreeMap<String, String>,
9596
pub stack: Vec<Vec<SourceRef>>,
9697
}
9798

@@ -101,6 +102,16 @@ pub struct LogRef<'a> {
101102
details: Option<LogDetails<'a>>,
102103
}
103104

105+
impl<'a> LogRef<'a> {
106+
pub fn body(self) -> &'a str {
107+
if let Some(LogDetails { body: Some(s), .. }) = self.details {
108+
s
109+
} else {
110+
self.line
111+
}
112+
}
113+
}
114+
104115
#[derive(Copy, Clone, Debug, PartialEq)]
105116
struct LogDetails<'a> {
106117
file: Option<&'a str>,
@@ -133,15 +144,15 @@ impl<'a> LogRef<'a> {
133144
pub fn link_to_source<'a>(log_ref: &LogRef, src_refs: &'a [SourceRef]) -> Option<&'a SourceRef> {
134145
src_refs
135146
.iter()
136-
.find(|&source_ref| source_ref.captures(log_ref.line).is_some())
147+
.find(|&source_ref| source_ref.captures(log_ref.body()).is_some())
137148
}
138149

139150
pub fn lookup_source<'a>(
140151
log_ref: &LogRef,
141152
log_format: &LogFormat,
142153
src_refs: &'a [SourceRef],
143154
) -> Option<&'a SourceRef> {
144-
let captures = log_format.captures(log_ref.line);
155+
let captures = log_format.captures(log_ref.body());
145156
let file_name = captures.name("file").map_or("", |m| m.as_str());
146157
let line_no: usize = captures
147158
.name("line")
@@ -157,28 +168,35 @@ pub fn lookup_source<'a>(
157168
pub fn extract_variables<'a>(
158169
log_ref: LogRef<'a>,
159170
src_ref: &'a SourceRef,
160-
) -> HashMap<String, String> {
161-
let mut variables = HashMap::new();
171+
) -> BTreeMap<String, String> {
172+
let mut variables = BTreeMap::new();
162173
let line = match log_ref.details {
163174
Some(details) => details.body.unwrap_or(log_ref.line),
164175
None => log_ref.line,
165176
};
166-
if !src_ref.vars.is_empty() {
167-
if let Some(captures) = src_ref.captures(line) {
168-
for i in 0..captures.len() - 1 {
169-
variables.insert(
170-
src_ref.vars[i].to_string(),
171-
captures.get(i + 1).unwrap().as_str().to_string(),
172-
);
173-
}
177+
if let Some(captures) = src_ref.captures(line) {
178+
for (index, (cap, placeholder)) in
179+
std::iter::zip(captures.iter().skip(1), src_ref.args.iter()).enumerate()
180+
{
181+
let key = match placeholder {
182+
FormatArgument::Named(name) => name.clone(),
183+
FormatArgument::Positional(pos) => src_ref
184+
.vars
185+
.get(*pos)
186+
.map(|s| s.as_str())
187+
.unwrap_or("<unknown>")
188+
.to_string(),
189+
FormatArgument::Placeholder => src_ref.vars[index].to_string(),
190+
};
191+
variables.insert(key, cap.unwrap().as_str().to_string());
174192
}
175193
}
176194

177195
variables
178196
}
179197

180198
pub fn filter_log(buffer: &str, filter: Filter, log_format: Option<String>) -> Vec<LogRef> {
181-
let log_format = LogFormat::new(log_format);
199+
let log_format = log_format.map(LogFormat::new);
182200
buffer
183201
.lines()
184202
.enumerate()
@@ -200,8 +218,10 @@ pub fn do_mappings<'a>(
200218
sources: &str,
201219
log_format: Option<String>,
202220
) -> Vec<LogMapping<'a>> {
203-
let log_format = LogFormat::new(log_format);
204-
let source_filter = log_format.clone().map(|f| f.build_src_filter(&log_refs));
221+
let log_format = log_format.map(LogFormat::new);
222+
let source_filter = log_format
223+
.clone()
224+
.and_then(|f| f.build_src_filter(&log_refs));
205225
let mut sources = CodeSource::find_code(sources, source_filter);
206226
let src_logs = extract_logging(&mut sources);
207227
let call_graph = CallGraph::new(&mut sources);
@@ -215,7 +235,7 @@ pub fn do_mappings<'a>(
215235
} else {
216236
link_to_source(&log_ref, &src_logs)
217237
};
218-
let variables = src_ref.as_ref().map_or(HashMap::new(), move |src_ref| {
238+
let variables = src_ref.as_ref().map_or(BTreeMap::new(), move |src_ref| {
219239
extract_variables(log_ref, src_ref)
220240
});
221241
let stack = src_ref.as_ref().map_or(Vec::new(), |src_ref| {
@@ -302,7 +322,7 @@ pub fn extract_logging(sources: &mut [CodeSource]) -> Vec<SourceRef> {
302322
{
303323
let length = matched.len() - 1;
304324
let prior_result: &mut SourceRef = matched.get_mut(length).unwrap();
305-
prior_result.vars.push(text);
325+
prior_result.vars.push(text.trim().to_string());
306326
}
307327
}
308328
_ => println!("ignoring {}", result.kind),
@@ -379,16 +399,20 @@ fn foo(i: u32) {
379399
nope(i);
380400
}
381401
382-
fn nope(i: u32) {
383-
debug!("this won't match i={}", i);
402+
fn nope(i: u32, j: i32) {
403+
debug!("this won't match i={}; j={}", i, j);
404+
}
405+
406+
fn namedarg(name: &str) {
407+
debug!("Hello, {name}!");
384408
}
385409
"#;
386410

387411
#[test]
388412
fn test_extract_logging() {
389413
let code = CodeSource::new(PathBuf::from("in-mem.rs"), Box::new(TEST_SOURCE.as_bytes()));
390414
let src_refs = extract_logging(&mut [code]);
391-
assert_eq!(src_refs.len(), 2);
415+
assert_eq!(src_refs.len(), 3);
392416
let first = &src_refs[0];
393417
assert_eq!(first.line_no, 7);
394418
assert_eq!(first.column, 11);
@@ -400,39 +424,56 @@ fn nope(i: u32) {
400424
assert_eq!(second.line_no, 18);
401425
assert_eq!(second.column, 11);
402426
assert_eq!(second.name, "nope");
403-
assert_eq!(second.text, "\"this won't match i={}\"");
427+
assert_eq!(second.text, "\"this won't match i={}; j={}\"");
404428
assert_eq!(second.vars[0], "i");
405429
}
406430

407431
#[test]
408432
fn test_link_to_source() {
409-
let log_ref =
410-
LogRef::new("[2024-02-15T03:46:44Z DEBUG stack] you're only as funky as your last cut");
433+
let lf = LogFormat::new(
434+
r#"^\[\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z \w+ \w+\]\s+(?<body>.*)"#.to_string(),
435+
);
436+
let log_ref = LogRef::with_format(
437+
"[2024-05-09T19:58:53Z DEBUG main] you're only as funky as your last cut",
438+
lf,
439+
);
411440
let code = CodeSource::new(PathBuf::from("in-mem.rs"), Box::new(TEST_SOURCE.as_bytes()));
412441
let src_refs = extract_logging(&mut [code]);
413-
assert_eq!(src_refs.len(), 2);
442+
assert_eq!(src_refs.len(), 3);
414443
let result = link_to_source(&log_ref, &src_refs);
415444
assert!(ptr::eq(result.unwrap(), &src_refs[0]));
416445
}
417446

418447
#[test]
419448
fn test_link_to_source_no_matches() {
420-
let log_ref = LogRef::new("[2024-02-26T03:44:40Z DEBUG stack] nope!");
449+
let log_ref = LogRef::new("nope!");
421450
let code = CodeSource::new(PathBuf::from("in-mem.rs"), Box::new(TEST_SOURCE.as_bytes()));
422451
let src_refs = extract_logging(&mut [code]);
423-
assert_eq!(src_refs.len(), 2);
452+
assert_eq!(src_refs.len(), 3);
424453
let result = link_to_source(&log_ref, &src_refs);
425454
assert!(result.is_none());
426455
}
427456

428457
#[test]
429458
fn test_extract_variables() {
430-
let log_ref = LogRef::new("[2024-02-15T03:46:44Z DEBUG nope] this won't match i=1");
459+
let log_ref = LogRef::new("this won't match i=1; j=2");
431460
let code = CodeSource::new(PathBuf::from("in-mem.rs"), Box::new(TEST_SOURCE.as_bytes()));
432461
let src_refs = extract_logging(&mut [code]);
433-
assert_eq!(src_refs.len(), 2);
462+
assert_eq!(src_refs.len(), 3);
434463
let vars = extract_variables(log_ref, &src_refs[1]);
464+
assert_eq!(vars.len(), 2);
435465
assert_eq!(vars.get("i").map(|val| val.as_str()), Some("1"));
466+
assert_eq!(vars.get("j").map(|val| val.as_str()), Some("2"));
467+
}
468+
469+
#[test]
470+
fn test_extract_named() {
471+
let log_ref = LogRef::new("Hello, Tim!");
472+
let code = CodeSource::new(PathBuf::from("in-mem.rs"), Box::new(TEST_SOURCE.as_bytes()));
473+
let src_refs = extract_logging(&mut [code]);
474+
assert_eq!(src_refs.len(), 3);
475+
let vars = extract_variables(log_ref, &src_refs[2]);
476+
assert_eq!(vars.get("name").map(|val| val.as_str()), Some("Tim"));
436477
}
437478

438479
const TEST_PUNC_SRC: &str = r#"""
@@ -452,8 +493,7 @@ fn nope(i: u32) {
452493
let regex = String::from(
453494
r"^(?<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}) (?<level>\w+)\s+ (?<file>[\w$.]+):(?<line>\d+) - (?<body>.*)$",
454495
);
455-
let log_format = Some(regex);
456-
let log_format = LogFormat::new(log_format).unwrap();
496+
let log_format = LogFormat::new(regex);
457497
let log_ref = LogRef::with_format(
458498
"2025-04-10 22:12:52 INFO JvmPauseMonitor:146 - JvmPauseMonitor-n0: Started",
459499
log_format,
@@ -487,6 +527,7 @@ fn nope(i: u32) {
487527
name: String::from("main"),
488528
text: String::from("foo"),
489529
matcher: star_regex,
530+
args: vec![],
490531
vars: vec![],
491532
};
492533
let star_regex = Regex::new(".*").unwrap();
@@ -497,6 +538,7 @@ fn nope(i: u32) {
497538
name: String::from("foo"),
498539
text: String::from("nope"),
499540
matcher: star_regex,
541+
args: vec![],
500542
vars: vec![],
501543
};
502544
assert_eq!(paths, vec![vec![foo_2_nope, main_2_foo]])

src/log_format.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,27 +8,27 @@ pub struct LogFormat {
88
}
99

1010
impl LogFormat {
11-
pub fn new(format: Option<String>) -> Option<LogFormat> {
12-
format.map(|fmt| LogFormat {
11+
pub fn new(format: String) -> LogFormat {
12+
LogFormat {
1313
// TODO handle more gracefully if wrong format
14-
regex: Regex::new(&fmt).unwrap(),
15-
})
14+
regex: Regex::new(&format).unwrap(),
15+
}
1616
}
1717

1818
pub fn has_src_hint(self: LogFormat) -> bool {
1919
let mut flatten = self.regex.capture_names().flatten();
2020
flatten.any(|name| name == "line") && flatten.any(|name| name == "file")
2121
}
2222

23-
pub fn build_src_filter(&self, log_refs: &Vec<LogRef>) -> Vec<String> {
23+
pub fn build_src_filter(&self, log_refs: &Vec<LogRef>) -> Option<Vec<String>> {
2424
let mut results = Vec::new();
2525
for log_ref in log_refs {
2626
let captures = self.captures(log_ref.line);
2727
if let Some(file_match) = captures.name("file") {
2828
results.push(file_match.as_str().to_string());
2929
}
3030
}
31-
results
31+
(!results.is_empty()).then_some(results)
3232
}
3333

3434
pub fn captures<'a>(&self, line: &'a str) -> Captures<'a> {

0 commit comments

Comments
 (0)