Skip to content

Commit af0328a

Browse files
feat: allow specifying the base commit for local (non-CI) diffs (#260)
- ref cpp-linter/cpp-linter#180 - as discussed in cpp-linter/cpp-linter#179 <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **New Features** * Added --diff-base and --ignore-index CLI options to control base diffs and staged-file handling. * **Documentation** * Documented new CLI options (minimum version 1.12.0). * Improved CLI argument header formatting to handle missing short names. * **Chores** * Default test filter updated to exclude a specific test. * **Tests** * Tests expanded to cover diff-base and ignore-index behaviors and updated call sites. <!-- end of auto-generated comment: release notes by coderabbit.ai --> --------- Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
1 parent 6002ba2 commit af0328a

File tree

9 files changed

+233
-79
lines changed

9 files changed

+233
-79
lines changed

.config/nextest.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ nextest-version = "0.9.77"
33

44
[profile.default]
55
# A profile to run most tests, except tests that run longer than 10 seconds
6-
default-filter = "not test(#*rate_limit_secondary) - test(#git::test::with_*)"
6+
default-filter = "not test(#*rate_limit_secondary) - test(#git::test::with_*) - test(#repo_get_sha)"
77

88
# This will flag any test that runs longer than 10 seconds. Useful when writing new tests.
99
slow-timeout = "10s"

cpp-linter/src/cli/mod.rs

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,27 @@ pub struct SourceOptions {
195195
verbatim_doc_comment
196196
)]
197197
pub ignore: Vec<String>,
198+
199+
/// The git reference to use as the base for diffing changed files.
200+
///
201+
/// This can be any valid git ref, such as a branch name, tag name, or commit SHA.
202+
/// If it is an integer, then it is treated as the number of parent commits from HEAD.
203+
///
204+
/// This option only applies to non-CI contexts (eg. local CLI use).
205+
#[arg(
206+
short = 'b',
207+
long,
208+
value_name = "REF",
209+
help_heading = "Source options",
210+
verbatim_doc_comment
211+
)]
212+
pub diff_base: Option<String>,
213+
214+
/// Assert this switch to ignore any staged changes when
215+
/// generating a diff of changed files.
216+
/// Useful when used with [`--diff-base`](#-b-diff-base).
217+
#[arg(default_value_t = false, long, help_heading = "Source options")]
218+
pub ignore_index: bool,
198219
}
199220

200221
#[derive(Debug, Clone, Args)]

cpp-linter/src/git.rs

Lines changed: 156 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
//! (str or bytes) only happens in CI or when libgit2 cannot be used to initialize a
99
//! repository.
1010
11-
use std::{ops::RangeInclusive, path::PathBuf};
11+
use std::{fmt::Display, ops::RangeInclusive, path::PathBuf};
1212

1313
use anyhow::{Context, Result};
1414
// non-std crates
@@ -33,50 +33,103 @@ pub fn open_repo(path: &str) -> Result<Repository, Error> {
3333
///
3434
/// The optionally specified `depth` can be used to traverse the tree a number of times
3535
/// since the current `"HEAD"`.
36-
fn get_sha(repo: &Repository, depth: Option<u32>) -> Result<git2::Object<'_>, Error> {
36+
fn get_sha<'d, T: Display>(
37+
repo: &'d Repository,
38+
depth: &Option<T>,
39+
) -> Result<git2::Object<'d>, Error> {
3740
match depth {
38-
Some(int) => repo.revparse_single(format!("HEAD~{}", int).as_str()),
41+
Some(base) => {
42+
let base = base.to_string();
43+
// First treat base as an explicit refs/SHAs. If that fails, then
44+
// fall back to `HEAD~<base>` if `base` is purely numeric.
45+
match repo.revparse_single(base.as_str()) {
46+
Ok(obj) => Ok(obj),
47+
Err(err) => {
48+
if base.chars().all(|c| c.is_ascii_digit()) {
49+
repo.revparse_single(format!("HEAD~{}", base).as_str())
50+
} else {
51+
Err(err)
52+
}
53+
}
54+
}
55+
}
3956
None => repo.revparse_single("HEAD"),
4057
}
4158
}
4259

4360
/// Fetch the [`git2::Diff`] about a given [`git2::Repository`].
4461
///
4562
/// This is actually not used in CI for file permissions and ownership reasons.
46-
/// Rather this is only (supposed to be) used when executed on a local developer
63+
/// Rather, this is only (supposed to be) used when executed on a local developer
4764
/// machine.
4865
///
49-
/// If there are files staged for a commit, then the resulting [`Diff`] will describe
50-
/// the staged changes. However, if there are no staged changes, then the last commit's
51-
/// [`Diff`] is returned.
52-
pub fn get_diff(repo: &'_ Repository) -> Result<git2::Diff<'_>> {
53-
let head = get_sha(repo, None).unwrap().peel_to_tree().unwrap();
54-
let mut has_staged_files = false;
55-
for entry in repo.statuses(None).unwrap().iter() {
56-
if entry.status().bits()
57-
& (git2::Status::INDEX_NEW.bits()
58-
| git2::Status::INDEX_MODIFIED.bits()
59-
| git2::Status::INDEX_RENAMED.bits())
60-
> 0
61-
{
62-
has_staged_files = true;
63-
break;
64-
}
65-
}
66+
/// ## Using `diff_base` and `ignore_index`
67+
///
68+
/// The `diff_base` is a commit or ref to use as the base of the diff.
69+
/// Use `ignore_index` to exclude any staged changes in the local index.
70+
///
71+
/// | `diff_base` value | Git index state | Scope of diff |
72+
/// |-------------------|-----------------|---------------|
73+
/// | `None` | No staged changes | `HEAD~1..HEAD` |
74+
/// | `None` | Has staged changes | `HEAD..index` |
75+
/// | `Some(2)` or `Some("HEAD~2")` | No staged changes | `HEAD~2..HEAD` |
76+
/// | `Some(2)` or `Some("HEAD~2")` | Has staged changes | `HEAD~2..index` |
77+
pub fn get_diff<'d, T: Display>(
78+
repo: &'d Repository,
79+
diff_base: &Option<T>,
80+
ignore_index: bool,
81+
) -> Result<git2::Diff<'d>> {
82+
let use_staged_files = if ignore_index {
83+
false
84+
} else {
85+
// check if there are staged file changes
86+
repo.statuses(None)
87+
.with_context(|| "Could not get repo statuses")?
88+
.iter()
89+
.any(|entry| {
90+
entry.status().bits()
91+
& (git2::Status::INDEX_NEW.bits()
92+
| git2::Status::INDEX_MODIFIED.bits()
93+
| git2::Status::INDEX_RENAMED.bits())
94+
> 0
95+
})
96+
};
97+
let base = if diff_base.is_some() {
98+
// diff base is specified (regardless of staged changes)
99+
get_sha(repo, diff_base)
100+
} else if !use_staged_files {
101+
// diff base is unspecified, when the repo has
102+
// no staged changes (and they are not ignored),
103+
// then focus on just the last commit
104+
get_sha(repo, &Some(1))
105+
} else {
106+
// diff base is unspecified and there are staged changes, so
107+
// let base be set to HEAD.
108+
get_sha(repo, &None::<u8>)
109+
}?
110+
.peel_to_tree()?;
66111

67112
// RARE BUG when `head` is the first commit in the repo! Affects local-only runs.
68-
// > panicked at cpp-linter\src\git.rs:73:43:
69-
// > called `Result::unwrap()` on an `Err` value:
70113
// > Error { code: -3, class: 3, message: "parent 0 does not exist" }
71-
if has_staged_files {
72-
// get diff for staged files only
73-
repo.diff_tree_to_index(Some(&head), None, None)
74-
.with_context(|| "Could not get diff for current changes in local repo index")
114+
if use_staged_files {
115+
// get diff including staged files
116+
repo.diff_tree_to_index(Some(&base), None, None)
117+
.with_context(|| {
118+
format!(
119+
"Could not get diff for {}..index",
120+
&base.id().to_string()[..7]
121+
)
122+
})
75123
} else {
76-
// get diff for last commit only
77-
let base = get_sha(repo, Some(1)).unwrap().peel_to_tree().unwrap();
124+
// get diff for range of commits between base..HEAD
125+
let head = get_sha(repo, &None::<u8>)?.peel_to_tree()?;
78126
repo.diff_tree_to_tree(Some(&base), Some(&head), None)
79-
.with_context(|| "Could not get diff for last commit")
127+
.with_context(|| {
128+
format!(
129+
"Could not get diff for {}..HEAD",
130+
&base.id().to_string()[..7]
131+
)
132+
})
80133
}
81134
}
82135

@@ -105,8 +158,10 @@ fn parse_patch(patch: &Patch) -> (Vec<u32>, Vec<RangeInclusive<u32>>) {
105158

106159
/// Parses a given [`git2::Diff`] and returns a list of [`FileObj`]s.
107160
///
108-
/// The specified list of `extensions`, `ignored` and `not_ignored` files are used as
109-
/// filters to expedite the process and only focus on the data cpp_linter can use.
161+
/// The `lines_changed_only` parameter is used to expedite the process and only
162+
/// focus on files that have relevant changes. The `file_filter` parameter applies
163+
/// a filter to only include source files (or ignored files) based on the
164+
/// extensions and ignore patterns specified.
110165
pub fn parse_diff(
111166
diff: &git2::Diff,
112167
file_filter: &FileFilter,
@@ -393,19 +448,30 @@ mod test {
393448
fs::read,
394449
};
395450

396-
use git2::build::CheckoutBuilder;
397-
use git2::{ApplyLocation, Diff, IndexAddOption, Repository};
451+
use git2::{ApplyLocation, Diff, IndexAddOption, Repository, build::CheckoutBuilder};
452+
use tempfile::{TempDir, tempdir};
453+
454+
use super::get_sha;
455+
use crate::{
456+
cli::LinesChangedOnly,
457+
common_fs::FileFilter,
458+
rest_api::{RestApiClient, github::GithubApiClient},
459+
};
460+
461+
const TEST_REPO_URL: &str = "https://github.com/cpp-linter/cpp-linter";
398462

399463
// used to setup a testing stage
400-
fn clone_repo(url: &str, sha: &str, path: &str, patch_path: Option<&str>) {
401-
let repo = Repository::clone(url, path).unwrap();
402-
let commit = repo.revparse_single(sha).unwrap();
403-
repo.checkout_tree(
404-
&commit,
405-
Some(CheckoutBuilder::new().force().recreate_missing(true)),
406-
)
407-
.unwrap();
408-
repo.set_head_detached(commit.id()).unwrap();
464+
fn clone_repo(sha: Option<&str>, path: &str, patch_path: Option<&str>) -> Repository {
465+
let repo = Repository::clone(TEST_REPO_URL, path).unwrap();
466+
if let Some(sha) = sha {
467+
let commit = repo.revparse_single(sha).unwrap();
468+
repo.checkout_tree(
469+
&commit,
470+
Some(CheckoutBuilder::new().force().recreate_missing(true)),
471+
)
472+
.unwrap();
473+
repo.set_head_detached(commit.id()).unwrap();
474+
}
409475
if let Some(patch) = patch_path {
410476
let diff = Diff::from_buffer(&read(patch).unwrap()).unwrap();
411477
repo.apply(&diff, ApplyLocation::Both, None).unwrap();
@@ -415,16 +481,9 @@ mod test {
415481
.unwrap();
416482
index.write().unwrap();
417483
}
484+
repo
418485
}
419486

420-
use tempfile::{TempDir, tempdir};
421-
422-
use crate::{
423-
cli::LinesChangedOnly,
424-
common_fs::FileFilter,
425-
rest_api::{RestApiClient, github::GithubApiClient},
426-
};
427-
428487
fn get_temp_dir() -> TempDir {
429488
let tmp = tempdir().unwrap();
430489
println!("Using temp folder at {:?}", tmp.path());
@@ -436,11 +495,10 @@ mod test {
436495
extensions: &[String],
437496
tmp: &TempDir,
438497
patch_path: Option<&str>,
498+
ignore_staged: bool,
439499
) -> Vec<crate::common_fs::FileObj> {
440-
let url = "https://github.com/cpp-linter/cpp-linter";
441500
clone_repo(
442-
url,
443-
sha,
501+
Some(sha),
444502
tmp.path().as_os_str().to_str().unwrap(),
445503
patch_path,
446504
);
@@ -453,7 +511,12 @@ mod test {
453511
}
454512
rest_api_client
455513
.unwrap()
456-
.get_list_of_changed_files(&file_filter, &LinesChangedOnly::Off)
514+
.get_list_of_changed_files(
515+
&file_filter,
516+
&LinesChangedOnly::Off,
517+
if ignore_staged { &Some(0) } else { &None::<u8> },
518+
ignore_staged,
519+
)
457520
.await
458521
.unwrap()
459522
}
@@ -465,7 +528,7 @@ mod test {
465528
let cur_dir = current_dir().unwrap();
466529
let tmp = get_temp_dir();
467530
let extensions = vec!["cpp".to_string(), "hpp".to_string()];
468-
let files = checkout_cpp_linter_py_repo(sha, &extensions, &tmp, None).await;
531+
let files = checkout_cpp_linter_py_repo(sha, &extensions, &tmp, None, false).await;
469532
println!("files = {:?}", files);
470533
assert!(files.is_empty());
471534
set_current_dir(cur_dir).unwrap(); // prep to delete temp_folder
@@ -479,7 +542,7 @@ mod test {
479542
let cur_dir = current_dir().unwrap();
480543
let tmp = get_temp_dir();
481544
let extensions = vec!["cpp".to_string(), "hpp".to_string()];
482-
let files = checkout_cpp_linter_py_repo(sha, &extensions.clone(), &tmp, None).await;
545+
let files = checkout_cpp_linter_py_repo(sha, &extensions.clone(), &tmp, None, false).await;
483546
println!("files = {:?}", files);
484547
assert!(files.len() >= 2);
485548
for file in files {
@@ -503,6 +566,7 @@ mod test {
503566
&extensions.clone(),
504567
&tmp,
505568
Some("tests/git_status_test_assets/cpp-linter/cpp-linter/test_git_lib.patch"),
569+
false,
506570
)
507571
.await;
508572
println!("files = {:?}", files);
@@ -515,4 +579,39 @@ mod test {
515579
set_current_dir(cur_dir).unwrap(); // prep to delete temp_folder
516580
drop(tmp); // delete temp_folder
517581
}
582+
583+
#[tokio::test]
584+
async fn with_ignored_staged_changes() {
585+
// commit with no modified C/C++ sources
586+
let sha = "0c236809891000b16952576dc34de082d7a40bf3";
587+
let cur_dir = current_dir().unwrap();
588+
let tmp = get_temp_dir();
589+
let extensions = vec!["cpp".to_string(), "hpp".to_string()];
590+
let files = checkout_cpp_linter_py_repo(
591+
sha,
592+
&extensions.clone(),
593+
&tmp,
594+
Some("tests/git_status_test_assets/cpp-linter/cpp-linter/test_git_lib.patch"),
595+
true,
596+
)
597+
.await;
598+
println!("files = {:?}", files);
599+
assert!(files.is_empty());
600+
set_current_dir(cur_dir).unwrap(); // prep to delete temp_folder
601+
drop(tmp); // delete temp_folder
602+
}
603+
604+
#[test]
605+
fn repo_get_sha() {
606+
let tmp_dir = get_temp_dir();
607+
let repo = clone_repo(None, tmp_dir.path().to_str().unwrap(), None);
608+
for (ours, theirs) in [(None::<u8>, "HEAD"), (Some(2), "HEAD~2")] {
609+
let our_obj = get_sha(&repo, &ours).unwrap();
610+
let their_obj = get_sha(&repo, &Some(theirs)).unwrap();
611+
assert_eq!(our_obj.id(), their_obj.id());
612+
}
613+
// test an invalid ref for coverage measurement
614+
assert!(get_sha(&repo, &Some("1.0")).is_err());
615+
drop(tmp_dir); // delete temp_folder
616+
}
518617
}

cpp-linter/src/rest_api/github/mod.rs

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
//! In other (private) submodules we implement behavior specific to Github's REST API.
55
66
use std::env;
7+
use std::fmt::Display;
78
use std::fs::OpenOptions;
89
use std::io::Write;
910
use std::sync::{Arc, Mutex};
@@ -119,10 +120,12 @@ impl RestApiClient for GithubApiClient {
119120
Ok(headers)
120121
}
121122

122-
async fn get_list_of_changed_files(
123+
async fn get_list_of_changed_files<T: Display>(
123124
&self,
124125
file_filter: &FileFilter,
125126
lines_changed_only: &LinesChangedOnly,
127+
diff_base: &Option<T>,
128+
ignore_index: bool,
126129
) -> Result<Vec<FileObj>> {
127130
if env::var("CI").is_ok_and(|val| val.as_str() == "true")
128131
&& let Some(repo) = self.repo.as_ref()
@@ -171,7 +174,11 @@ impl RestApiClient for GithubApiClient {
171174
let repo = open_repo(".").with_context(
172175
|| "Please ensure the repository is checked out before running cpp-linter.",
173176
)?;
174-
let list = parse_diff(&get_diff(&repo)?, file_filter, lines_changed_only);
177+
let list = parse_diff(
178+
&get_diff(&repo, diff_base, ignore_index)?,
179+
file_filter,
180+
lines_changed_only,
181+
);
175182
Ok(list)
176183
}
177184
}
@@ -430,7 +437,12 @@ mod test {
430437
env::set_current_dir(tmp_dir.path()).unwrap();
431438
let rest_client = GithubApiClient::new().unwrap();
432439
let files = rest_client
433-
.get_list_of_changed_files(&FileFilter::new(&[], vec![]), &LinesChangedOnly::Off)
440+
.get_list_of_changed_files(
441+
&FileFilter::new(&[], vec![]),
442+
&LinesChangedOnly::Off,
443+
&None::<u8>,
444+
false,
445+
)
434446
.await;
435447
assert!(files.is_err())
436448
}

0 commit comments

Comments
 (0)