Skip to content

Commit 3178dc6

Browse files
authored
Consolidate all dependencies into a single indexed file (to reduce I/O) (#704)
Signed-off-by: Juan Cruz Viotti <[email protected]>
1 parent 6d786dc commit 3178dc6

31 files changed

+274
-663
lines changed

src/build/adapter_filesystem.cc

Lines changed: 135 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -3,107 +3,170 @@
33
#include <sourcemeta/core/io.h>
44

55
#include <cassert> // assert
6+
#include <chrono> // std::chrono::nanoseconds, std::chrono::duration_cast
7+
#include <cstdint> // std::int64_t
68
#include <fstream> // std::ofstream, std::ifstream
79
#include <mutex> // std::unique_lock
810
#include <string> // std::string
911
#include <string_view> // std::string_view
1012

11-
namespace sourcemeta::one {
13+
static constexpr std::string_view DEPENDENCIES_FILE{"deps.txt"};
1214

13-
constexpr std::string_view DEPENDENCIES_EXTENSION{".deps"};
15+
namespace sourcemeta::one {
1416

1517
BuildAdapterFilesystem::BuildAdapterFilesystem(
1618
const std::filesystem::path &output_root)
17-
: root{std::filesystem::canonical(output_root)} {}
18-
19-
auto BuildAdapterFilesystem::dependencies_path(const node_type &path) const
20-
-> node_type {
21-
assert(path.is_absolute());
22-
return path.string() + std::string{DEPENDENCIES_EXTENSION};
23-
}
24-
25-
auto BuildAdapterFilesystem::read_dependencies(const node_type &path) const
26-
-> std::optional<BuildDependencies<node_type>> {
27-
assert(path.is_absolute());
28-
const auto dependencies_path{this->dependencies_path(path)};
29-
30-
std::ifstream stream{dependencies_path};
31-
if (!stream.is_open()) {
32-
return std::nullopt;
19+
: root{std::filesystem::canonical(output_root)} {
20+
const auto deps_path{this->root / DEPENDENCIES_FILE};
21+
if (!std::filesystem::exists(deps_path)) {
22+
return;
3323
}
3424

35-
std::string contents{std::istreambuf_iterator<char>(stream),
36-
std::istreambuf_iterator<char>()};
37-
38-
BuildDependencies<node_type> result;
39-
std::size_t position{0};
40-
while (position < contents.size()) {
41-
auto newline{contents.find('\n', position)};
42-
if (newline == std::string::npos) {
43-
newline = contents.size();
25+
try {
26+
std::ifstream stream{deps_path};
27+
if (!stream.is_open()) {
28+
return;
4429
}
4530

46-
auto end{newline};
47-
// Prevent CRLF on Windows
48-
if (end > position && contents[end - 1] == '\r') {
49-
end -= 1;
50-
}
31+
std::string contents{std::istreambuf_iterator<char>(stream),
32+
std::istreambuf_iterator<char>()};
5133

52-
if (end > position) {
53-
auto kind{BuildDependencyKind::Static};
54-
std::filesystem::path dependency;
55-
const auto length{end - position};
56-
if (length >= 2 && contents[position + 1] == ' ' &&
57-
(contents[position] == 's' || contents[position] == 'd')) {
58-
kind = (contents[position] == 'd') ? BuildDependencyKind::Dynamic
59-
: BuildDependencyKind::Static;
60-
dependency = contents.substr(position + 2, end - position - 2);
61-
} else {
62-
dependency = contents.substr(position, length);
34+
std::string current_key;
35+
BuildDependencies<node_type> current_deps;
36+
std::size_t position{0};
37+
38+
while (position < contents.size()) {
39+
auto newline{contents.find('\n', position)};
40+
if (newline == std::string::npos) {
41+
newline = contents.size();
42+
}
43+
44+
if (newline <= position + 2 || contents[position + 1] != ' ') {
45+
position = newline + 1;
46+
continue;
6347
}
64-
if (!dependency.is_absolute()) {
65-
dependency = (this->root / dependency).lexically_normal();
48+
49+
const char tag{contents[position]};
50+
const std::string_view value{contents.data() + position + 2,
51+
newline - position - 2};
52+
53+
switch (tag) {
54+
case 't':
55+
if (!current_key.empty()) {
56+
this->dependencies_map.insert_or_assign(current_key,
57+
std::move(current_deps));
58+
current_deps = {};
59+
}
60+
61+
current_key = value;
62+
break;
63+
case 's':
64+
current_deps.emplace_back(
65+
BuildDependencyKind::Static,
66+
(this->root / std::string{value}).lexically_normal());
67+
break;
68+
case 'd':
69+
current_deps.emplace_back(
70+
BuildDependencyKind::Dynamic,
71+
(this->root / std::string{value}).lexically_normal());
72+
break;
73+
case 'm': {
74+
const auto space{value.find(' ')};
75+
if (space != std::string_view::npos) {
76+
const auto path_part{value.substr(0, space)};
77+
const auto ns_part{value.substr(space + 1)};
78+
const std::chrono::nanoseconds nanoseconds{
79+
std::stoll(std::string{ns_part})};
80+
const auto mark_value{mark_type{
81+
std::chrono::duration_cast<mark_type::duration>(nanoseconds)}};
82+
this->marks.insert_or_assign(
83+
(this->root / std::string{path_part}).lexically_normal(),
84+
mark_value);
85+
}
86+
87+
break;
88+
}
89+
default:
90+
break;
6691
}
6792

68-
result.emplace_back(kind, std::move(dependency));
93+
position = newline + 1;
6994
}
7095

71-
position = newline + 1;
96+
if (!current_key.empty()) {
97+
this->dependencies_map.insert_or_assign(current_key,
98+
std::move(current_deps));
99+
}
100+
this->has_previous_run = true;
101+
} catch (...) {
102+
this->dependencies_map.clear();
103+
this->marks.clear();
72104
}
105+
}
73106

74-
if (result.empty()) {
107+
auto BuildAdapterFilesystem::read_dependencies(const node_type &path) const
108+
-> std::optional<BuildDependencies<node_type>> {
109+
assert(path.is_absolute());
110+
const auto key{path.lexically_relative(this->root).string()};
111+
std::shared_lock lock{this->dependencies_mutex};
112+
const auto match{this->dependencies_map.find(key)};
113+
if (match == this->dependencies_map.end() || match->second.empty()) {
75114
return std::nullopt;
76-
} else {
77-
return result;
78115
}
116+
117+
return match->second;
79118
}
80119

81120
auto BuildAdapterFilesystem::write_dependencies(
82121
const node_type &path, const BuildDependencies<node_type> &dependencies)
83122
-> void {
84123
assert(path.is_absolute());
85124
assert(std::filesystem::exists(path));
86-
// Try to make sure as much as we can that any write operation made to disk
87125
sourcemeta::core::flush(path);
88126
this->refresh(path);
89-
const auto dependencies_path{this->dependencies_path(path)};
90-
std::filesystem::create_directories(dependencies_path.parent_path());
91-
std::ofstream dependencies_stream{dependencies_path};
92-
assert(!dependencies_stream.fail());
93-
for (const auto &dependency : dependencies) {
94-
const auto prefix{dependency.first == BuildDependencyKind::Dynamic ? "d "
95-
: "s "};
96-
const auto relative{dependency.second.lexically_relative(this->root)};
127+
const auto key{path.lexically_relative(this->root).string()};
128+
std::unique_lock lock{this->dependencies_mutex};
129+
this->dependencies_map.insert_or_assign(key, dependencies);
130+
}
131+
132+
auto BuildAdapterFilesystem::flush_dependencies(
133+
const std::function<bool(const node_type &)> &filter) -> void {
134+
const auto deps_path{this->root / DEPENDENCIES_FILE};
135+
std::ofstream stream{deps_path};
136+
assert(!stream.fail());
137+
138+
for (const auto &entry : this->dependencies_map) {
139+
if (!filter(this->root / entry.first)) {
140+
continue;
141+
}
142+
143+
stream << "t " << entry.first << '\n';
144+
for (const auto &dependency : entry.second) {
145+
const char kind_char{
146+
dependency.first == BuildDependencyKind::Dynamic ? 'd' : 's'};
147+
const auto relative{dependency.second.lexically_relative(this->root)};
148+
if (!relative.empty() && *relative.begin() != "..") {
149+
stream << kind_char << ' ' << relative.string() << '\n';
150+
} else {
151+
stream << kind_char << ' ' << dependency.second.string() << '\n';
152+
}
153+
}
154+
}
155+
156+
for (const auto &entry : this->marks) {
157+
const auto relative{entry.first.lexically_relative(this->root)};
97158
if (!relative.empty() && *relative.begin() != "..") {
98-
dependencies_stream << prefix << relative.string() << "\n";
99-
} else {
100-
dependencies_stream << prefix << dependency.second.string() << "\n";
159+
const auto nanoseconds{
160+
std::chrono::duration_cast<std::chrono::nanoseconds>(
161+
entry.second.time_since_epoch())
162+
.count()};
163+
stream << "m " << relative.string() << ' '
164+
<< static_cast<std::int64_t>(nanoseconds) << '\n';
101165
}
102166
}
103167

104-
dependencies_stream.flush();
105-
dependencies_stream.close();
106-
sourcemeta::core::flush(dependencies_path);
168+
stream.flush();
169+
stream.close();
107170
}
108171

109172
auto BuildAdapterFilesystem::refresh(const node_type &path) -> void {
@@ -129,11 +192,15 @@ auto BuildAdapterFilesystem::mark(const node_type &path)
129192
}
130193
}
131194

195+
// Output files should always have their marks cached
196+
// Only input files or new output files are not
197+
assert(!this->has_previous_run ||
198+
!path.string().starts_with(this->root.string()) ||
199+
!std::filesystem::exists(path));
200+
132201
try {
133202
const auto value{std::filesystem::last_write_time(path)};
134-
// Within a single run, if we didn't build this file, its mtime won't
135-
// change. If we did build it, refreshing already set a synthetic timestamp
136-
// that the cache lookup above would have returned instead
203+
// Cache for the rest of this run since input files don't change
137204
std::unique_lock lock{this->mutex};
138205
this->marks.emplace(path, value);
139206
return value;

src/build/include/sourcemeta/one/build_adapter_filesystem.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,10 @@
1010
// NOLINTEND(misc-include-cleaner)
1111

1212
#include <filesystem> // std::filesystem
13+
#include <functional> // std::function
1314
#include <optional> // std::optional
1415
#include <shared_mutex> // std::shared_mutex
16+
#include <string> // std::string
1517
#include <unordered_map> // std::unordered_map
1618

1719
namespace sourcemeta::one {
@@ -23,13 +25,13 @@ class SOURCEMETA_ONE_BUILD_EXPORT BuildAdapterFilesystem {
2325

2426
BuildAdapterFilesystem(const std::filesystem::path &output_root);
2527

26-
[[nodiscard]] auto dependencies_path(const node_type &path) const
27-
-> node_type;
2828
[[nodiscard]] auto read_dependencies(const node_type &path) const
2929
-> std::optional<BuildDependencies<node_type>>;
3030
auto write_dependencies(const node_type &path,
3131
const BuildDependencies<node_type> &dependencies)
3232
-> void;
33+
auto flush_dependencies(const std::function<bool(const node_type &)> &filter)
34+
-> void;
3335
auto refresh(const node_type &path) -> void;
3436
[[nodiscard]] auto mark(const node_type &path) -> std::optional<mark_type>;
3537
[[nodiscard]] auto is_newer_than(const mark_type left,
@@ -39,6 +41,10 @@ class SOURCEMETA_ONE_BUILD_EXPORT BuildAdapterFilesystem {
3941
std::filesystem::path root;
4042
std::unordered_map<node_type, mark_type> marks;
4143
std::shared_mutex mutex;
44+
std::unordered_map<std::string, BuildDependencies<node_type>>
45+
dependencies_map;
46+
mutable std::shared_mutex dependencies_mutex;
47+
bool has_previous_run{false};
4248
};
4349

4450
} // namespace sourcemeta::one

src/index/index.cc

Lines changed: 25 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,6 @@ DISPATCH(const std::filesystem::path &destination,
9494

9595
// We need to mark files regardless of whether they were generated or not
9696
output.track(destination);
97-
output.track(destination.string() + ".deps");
9897
return was_built;
9998
}
10099

@@ -193,12 +192,17 @@ static auto index_main(const std::string_view &program,
193192
// (6) Store a mark of the One version for target dependencies
194193
/////////////////////////////////////////////////////////////////////////////
195194

195+
sourcemeta::one::BuildAdapterFilesystem adapter{output.path()};
196+
196197
// We do this so that targets can be re-built if the One version changes
197198
const auto mark_version_path{output.path() / "version.json"};
198199
// Note we only write back if the content changed in order to not accidentally
199200
// bump up the file modified time
200-
output.write_json_if_different(
201-
mark_version_path, sourcemeta::core::JSON{sourcemeta::one::version()});
201+
if (output.write_json_if_different(
202+
mark_version_path,
203+
sourcemeta::core::JSON{sourcemeta::one::version()})) {
204+
adapter.refresh(mark_version_path);
205+
};
202206

203207
/////////////////////////////////////////////////////////////////////////////
204208
// (7) Store the full configuration file for target dependencies
@@ -209,17 +213,21 @@ static auto index_main(const std::string_view &program,
209213
const auto mark_configuration_path{output.path() / "configuration.json"};
210214
// Note we only write back if the content changed in order to not accidentally
211215
// bump up the file modified time
212-
output.write_json_if_different(mark_configuration_path, raw_configuration);
216+
if (output.write_json_if_different(mark_configuration_path,
217+
raw_configuration)) {
218+
adapter.refresh(mark_configuration_path);
219+
}
213220

214221
/////////////////////////////////////////////////////////////////////////////
215222
// (8) Store the optional comment for informational purposes
216223
/////////////////////////////////////////////////////////////////////////////
217224

218-
if (app.contains("comment")) {
219-
const auto comment_path{output.path() / "comment.json"};
220-
output.write_json_if_different(
221-
comment_path,
222-
sourcemeta::core::JSON{std::string{app.at("comment").at(0)}});
225+
const auto comment_path{output.path() / "comment.json"};
226+
if (app.contains("comment") &&
227+
output.write_json_if_different(
228+
comment_path,
229+
sourcemeta::core::JSON{std::string{app.at("comment").at(0)}})) {
230+
adapter.refresh(comment_path);
223231
}
224232

225233
PROFILE_END(profiling, "Startup");
@@ -309,7 +317,6 @@ static auto index_main(const std::string_view &program,
309317
const auto schemas_path{output.path() / "schemas"};
310318
const auto display_schemas_path{
311319
std::filesystem::relative(schemas_path, output.path())};
312-
sourcemeta::one::BuildAdapterFilesystem adapter{output.path()};
313320
sourcemeta::core::parallel_for_each(
314321
resolver.begin(), resolver.end(),
315322
[&output, &schemas_path, &resolver, &mutex, &adapter,
@@ -322,8 +329,6 @@ static auto index_main(const std::string_view &program,
322329
DISPATCH<sourcemeta::one::GENERATE_MATERIALISED_SCHEMA>(
323330
destination,
324331
{sourcemeta::one::make_dependency(schema.second.path),
325-
// This target depends on the configuration file given things like
326-
// resolve maps and base URIs
327332
sourcemeta::one::make_dependency(mark_configuration_path),
328333
sourcemeta::one::make_dependency(mark_version_path)},
329334
{schema.first, resolver}, mutex, "Ingesting", schema.first,
@@ -573,15 +578,14 @@ static auto index_main(const std::string_view &program,
573578
for (const auto &schema : resolver) {
574579
auto dependents_path{schemas_path / schema.second.relative_path / SENTINEL /
575580
"dependents.metapack"};
576-
const auto dependents_deps_path{dependents_path.string() + ".deps"};
577581
if (affected_dependents.contains(schema.first) ||
578582
!std::filesystem::exists(dependents_path) ||
579-
!std::filesystem::exists(dependents_deps_path)) {
583+
// TODO: This is potentially pretty slow?
584+
!adapter.read_dependencies(dependents_path).has_value()) {
580585
rework_entries.push_back(
581586
{std::cref(schema.first), std::move(dependents_path)});
582587
} else {
583588
output.track(dependents_path);
584-
output.track(dependents_path.string() + ".deps");
585589
}
586590
}
587591

@@ -792,6 +796,12 @@ static auto index_main(const std::string_view &program,
792796

793797
// TODO: Print the size of the output directory here
794798

799+
// TODO: This level of coupling means that the output and the adapter should
800+
// be one
801+
adapter.flush_dependencies([&output](const auto &target) {
802+
return !output.is_untracked_file(target);
803+
});
804+
output.track(output.path() / "deps.txt");
795805
output.remove_unknown_files();
796806

797807
PROFILE_END(profiling, "Cleanup");

0 commit comments

Comments
 (0)