diff --git a/DEPENDENCIES b/DEPENDENCIES index b9d1ebbd..1abdd596 100644 --- a/DEPENDENCIES +++ b/DEPENDENCIES @@ -1,3 +1,3 @@ vendorpull https://github.com/sourcemeta/vendorpull 1dcbac42809cf87cb5b045106b863e17ad84ba02 -core https://github.com/sourcemeta/core e4d7ae9358710fc138d2afd3179db6d850e4190f +core https://github.com/sourcemeta/core 8fb7ba6f57bfa52ecdae83ada221e0aecc8e4e42 bootstrap https://github.com/twbs/bootstrap 1a6fdfae6be09b09eaced8f0e442ca6f7680a61e diff --git a/src/compiler/compiler.cc b/src/compiler/compiler.cc index e8e60ca9..c53742d8 100644 --- a/src/compiler/compiler.cc +++ b/src/compiler/compiler.cc @@ -23,7 +23,7 @@ namespace sourcemeta::jsonbinpack { auto canonicalize(sourcemeta::core::JSON &schema, const sourcemeta::core::SchemaWalker &walker, const sourcemeta::core::SchemaResolver &resolver, - const std::optional &default_dialect) -> void { + const std::string_view default_dialect) -> void { sourcemeta::core::SchemaTransformer canonicalizer; sourcemeta::core::add(canonicalizer, sourcemeta::core::AlterSchemaMode::Canonicalizer); @@ -61,7 +61,7 @@ auto make_encoding(sourcemeta::core::JSON &document, auto compile(sourcemeta::core::JSON &schema, const sourcemeta::core::SchemaWalker &walker, const sourcemeta::core::SchemaResolver &resolver, - const std::optional &default_dialect) -> void { + const std::string_view default_dialect) -> void { canonicalize(schema, walker, resolver, default_dialect); sourcemeta::core::SchemaTransformer mapper; @@ -94,7 +94,7 @@ auto compile(sourcemeta::core::JSON &schema, // The "any" encoding is always the last resort const auto dialect{sourcemeta::core::dialect(schema)}; - if (!dialect.has_value() || dialect.value() != ENCODING_V1) { + if (dialect.empty() || dialect != ENCODING_V1) { make_encoding(schema, "ANY_PACKED_TYPE_TAG_BYTE_PREFIX", sourcemeta::core::JSON::make_object()); } diff --git a/src/compiler/include/sourcemeta/jsonbinpack/compiler.h b/src/compiler/include/sourcemeta/jsonbinpack/compiler.h index c976dedd..c1ec85a9 100644 --- a/src/compiler/include/sourcemeta/jsonbinpack/compiler.h +++ b/src/compiler/include/sourcemeta/jsonbinpack/compiler.h @@ -17,8 +17,7 @@ #include #include -#include // std::optional -#include // std::string +#include // std::string_view namespace sourcemeta::jsonbinpack { @@ -50,8 +49,7 @@ SOURCEMETA_JSONBINPACK_COMPILER_EXPORT auto compile(sourcemeta::core::JSON &schema, const sourcemeta::core::SchemaWalker &walker, const sourcemeta::core::SchemaResolver &resolver, - const std::optional &default_dialect = std::nullopt) - -> void; + std::string_view default_dialect = "") -> void; /// @ingroup compiler /// @@ -80,11 +78,10 @@ auto compile(sourcemeta::core::JSON &schema, /// std::cout << std::endl; /// ``` SOURCEMETA_JSONBINPACK_COMPILER_EXPORT -auto canonicalize( - sourcemeta::core::JSON &schema, - const sourcemeta::core::SchemaWalker &walker, - const sourcemeta::core::SchemaResolver &resolver, - const std::optional &default_dialect = std::nullopt) -> void; +auto canonicalize(sourcemeta::core::JSON &schema, + const sourcemeta::core::SchemaWalker &walker, + const sourcemeta::core::SchemaResolver &resolver, + std::string_view default_dialect = "") -> void; } // namespace sourcemeta::jsonbinpack diff --git a/test/compiler/canonicalizer_test.cc b/test/compiler/canonicalizer_test.cc index 77e84d09..d2b845b5 100644 --- a/test/compiler/canonicalizer_test.cc +++ b/test/compiler/canonicalizer_test.cc @@ -27,7 +27,7 @@ TEST(JSONBinPack_Canonicalizer, unsupported_draft) { EXPECT_THROW(sourcemeta::jsonbinpack::canonicalize( schema, sourcemeta::core::schema_walker, test_resolver), - sourcemeta::core::SchemaBaseDialectError); + sourcemeta::core::SchemaUnknownBaseDialectError); } TEST(JSONBinPack_Canonicalizer, unknown_draft) { diff --git a/vendor/core/CMakeLists.txt b/vendor/core/CMakeLists.txt index fc8d88c1..4edf68f0 100644 --- a/vendor/core/CMakeLists.txt +++ b/vendor/core/CMakeLists.txt @@ -13,6 +13,7 @@ option(SOURCEMETA_CORE_UUID "Build the Sourcemeta Core UUID library" ON) option(SOURCEMETA_CORE_MD5 "Build the Sourcemeta Core MD5 library" ON) option(SOURCEMETA_CORE_REGEX "Build the Sourcemeta Core Regex library" ON) option(SOURCEMETA_CORE_URI "Build the Sourcemeta Core URI library" ON) +option(SOURCEMETA_CORE_URITEMPLATE "Build the Sourcemeta Core URI Template library" ON) option(SOURCEMETA_CORE_JSON "Build the Sourcemeta Core JSON library" ON) option(SOURCEMETA_CORE_JSONSCHEMA "Build the Sourcemeta Core JSON Schema library" ON) option(SOURCEMETA_CORE_JSONPOINTER "Build the Sourcemeta Core JSON Pointer library" ON) @@ -100,6 +101,10 @@ if(SOURCEMETA_CORE_URI) add_subdirectory(src/core/uri) endif() +if(SOURCEMETA_CORE_URITEMPLATE) + add_subdirectory(src/core/uritemplate) +endif() + if(SOURCEMETA_CORE_JSON) add_subdirectory(src/core/json) endif() @@ -212,6 +217,10 @@ if(SOURCEMETA_CORE_TESTS) add_subdirectory(test/uri) endif() + if(SOURCEMETA_CORE_URITEMPLATE) + add_subdirectory(test/uritemplate) + endif() + if(SOURCEMETA_CORE_JSON) add_subdirectory(test/json) endif() diff --git a/vendor/core/config.cmake.in b/vendor/core/config.cmake.in index 2a663c02..c3d107df 100644 --- a/vendor/core/config.cmake.in +++ b/vendor/core/config.cmake.in @@ -14,6 +14,7 @@ if(NOT SOURCEMETA_CORE_COMPONENTS) list(APPEND SOURCEMETA_CORE_COMPONENTS md5) list(APPEND SOURCEMETA_CORE_COMPONENTS regex) list(APPEND SOURCEMETA_CORE_COMPONENTS uri) + list(APPEND SOURCEMETA_CORE_COMPONENTS uritemplate) list(APPEND SOURCEMETA_CORE_COMPONENTS json) list(APPEND SOURCEMETA_CORE_COMPONENTS jsonl) list(APPEND SOURCEMETA_CORE_COMPONENTS jsonpointer) @@ -52,6 +53,9 @@ foreach(component ${SOURCEMETA_CORE_COMPONENTS}) include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_regex.cmake") elseif(component STREQUAL "uri") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_uri.cmake") + elseif(component STREQUAL "uritemplate") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_io.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_uritemplate.cmake") elseif(component STREQUAL "json") find_dependency(mpdecimal CONFIG) include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_numeric.cmake") diff --git a/vendor/core/src/core/json/include/sourcemeta/core/json_hash.h b/vendor/core/src/core/json/include/sourcemeta/core/json_hash.h index 5818b842..49ee1066 100644 --- a/vendor/core/src/core/json/include/sourcemeta/core/json_hash.h +++ b/vendor/core/src/core/json/include/sourcemeta/core/json_hash.h @@ -1,17 +1,23 @@ #ifndef SOURCEMETA_CORE_JSON_HASH_H_ #define SOURCEMETA_CORE_JSON_HASH_H_ -#include // assert -#include // std::uint64_t -#include // std::memcpy +#include // assert +#include // std::uint64_t +#include // std::memcpy +#include // std::reference_wrapper namespace sourcemeta::core { /// @ingroup json template struct HashJSON { using hash_type = std::uint64_t; + inline auto operator()(const T &value) const noexcept -> hash_type { - return value.fast_hash(); + if constexpr (requires { value.get().fast_hash(); }) { + return value.get().fast_hash(); + } else { + return value.fast_hash(); + } } [[nodiscard]] @@ -145,6 +151,21 @@ template struct PropertyHashJSON { } }; +/// @ingroup json +/// Until C++26, `std::reference_wrapper` does not overload `operator==`, +/// so we need custom comparisons for use in i.e. `unordered_set` +/// See +/// https://en.cppreference.com/w/cpp/utility/functional/reference_wrapper/operator_cmp.html +template struct EqualJSON { + inline auto operator()(const T &left, const T &right) const -> bool { + if constexpr (requires { left.get() == right.get(); }) { + return left.get() == right.get(); + } else { + return left == right; + } + } +}; + } // namespace sourcemeta::core #endif diff --git a/vendor/core/src/core/jsonpointer/CMakeLists.txt b/vendor/core/src/core/jsonpointer/CMakeLists.txt index 20d4291a..20300551 100644 --- a/vendor/core/src/core/jsonpointer/CMakeLists.txt +++ b/vendor/core/src/core/jsonpointer/CMakeLists.txt @@ -1,7 +1,7 @@ sourcemeta_library(NAMESPACE sourcemeta PROJECT core NAME jsonpointer PRIVATE_HEADERS pointer.h position.h error.h token.h - walker.h template.h - SOURCES jsonpointer.cc stringify.h parser.h grammar.h position.cc) + walker.h + SOURCES jsonpointer.cc stringify.h parser.h grammar.h position.cc mangle.cc) if(SOURCEMETA_CORE_INSTALL) sourcemeta_library_install(NAMESPACE sourcemeta PROJECT core NAME jsonpointer) diff --git a/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer.h b/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer.h index 3a914313..17bebdd4 100644 --- a/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer.h +++ b/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer.h @@ -12,7 +12,6 @@ #include #include #include -#include #include // NOLINTEND(misc-include-cleaner) @@ -21,6 +20,7 @@ #include // std::allocator #include // std::basic_ostream #include // std::basic_string +#include // std::string_view #include // std::is_same_v /// @defgroup jsonpointer JSON Pointer @@ -38,8 +38,9 @@ namespace sourcemeta::core { using Pointer = GenericPointer>; /// @ingroup jsonpointer -using WeakPointer = GenericPointer, - PropertyHashJSON>; +using WeakPointer = GenericPointer< + // We use this instead of a string view as the latter occupies more memory + std::reference_wrapper, PropertyHashJSON>; /// @ingroup jsonpointer /// A global constant instance of the empty JSON Pointer. @@ -49,10 +50,6 @@ const Pointer empty_pointer; /// A global constant instance of the empty JSON WeakPointer. const WeakPointer empty_weak_pointer; -/// @ingroup jsonpointer -/// A JSON Pointer with unresolved wildcards -using PointerTemplate = GenericPointerTemplate; - /// @ingroup jsonpointer /// Get a value from a JSON document using a JSON Pointer (`const` overload). /// @@ -465,27 +462,6 @@ auto stringify(const WeakPointer &pointer, std::basic_ostream &stream) -> void; -/// @ingroup jsonpointer -/// -/// Stringify the input JSON Pointer template into a given C++ standard output -/// stream. For example: -/// -/// ```cpp -/// #include -/// #include -/// #include -/// -/// const sourcemeta::core::Pointer base{"foo", "bar"}; -/// const sourcemeta::core::PointerTemplate pointer{base}; -/// std::ostringstream stream; -/// sourcemeta::core::stringify(pointer, stream); -/// std::cout << stream.str() << std::endl; -/// ``` -SOURCEMETA_CORE_JSONPOINTER_EXPORT -auto stringify(const PointerTemplate &pointer, - std::basic_ostream &stream) - -> void; - /// @ingroup jsonpointer /// /// Stringify the input JSON Pointer into a C++ standard string. For example: @@ -526,24 +502,38 @@ auto to_string(const WeakPointer &pointer) /// @ingroup jsonpointer /// -/// Stringify the input JSON Pointer template into a C++ standard string. For -/// example: +/// Mangle a JSON Pointer template and prefix into a collision-free identifier. +/// +/// The encoding rules for ASCII characters (0x00-0x7F) are: +/// +/// - Lowercase at segment start (except x, u, z): capitalize (no marker) +/// - Lowercase x, u, z at segment start: hex escape (reserved characters) +/// - Uppercase at segment start (except X, U, Z): U + letter +/// - Uppercase X, U, Z at segment start: hex escape (reserved characters) +/// - Non-segment-start lowercase: as-is +/// - Non-segment-start uppercase (except X, U): as-is +/// - Non-segment-start X: X58, Non-segment-start U: X55 +/// - ASCII digits (0-9): as-is +/// - Other ASCII (space, punctuation, control): hex escape, starts new segment +/// - Z/z reserved for special token prefixes +/// +/// For non-ASCII bytes (0x80-0xFF, e.g. UTF-8 sequences): +/// +/// - Always hex escaped +/// - Do NOT start a new segment (preserves UTF-8 multi-byte sequences) +/// +/// For example: /// /// ```cpp /// #include -/// #include -/// #include +/// #include /// -/// sourcemeta::core::PointerTemplate pointer; -/// pointer.emplace_back(sourcemeta::core::Pointer::Token{"foo"}); -/// pointer.emplace_back(sourcemeta::core::PointerTemplate::Wildcard::Property); -/// const std::string result{sourcemeta::core::to_string(pointer)}; -/// std::cout << result << '\n'; +/// const sourcemeta::core::Pointer pointer{"foo", "bar"}; +/// const auto result{sourcemeta::core::mangle(pointer, "schema")}; +/// assert(result == "Schema_Foo_Bar"); /// ``` SOURCEMETA_CORE_JSONPOINTER_EXPORT -auto to_string(const PointerTemplate &pointer) - -> std::basic_string>; +auto mangle(const Pointer &pointer, std::string_view prefix) -> std::string; /// @ingroup jsonpointer /// @@ -584,34 +574,48 @@ auto to_uri(const Pointer &pointer) -> URI; SOURCEMETA_CORE_JSONPOINTER_EXPORT auto to_uri(const Pointer &pointer, const URI &base) -> URI; -// TODO: Only support this with weak pointers +/// @ingroup jsonpointer +SOURCEMETA_CORE_JSONPOINTER_EXPORT +auto to_uri(const WeakPointer &pointer) -> URI; + +/// @ingroup jsonpointer +SOURCEMETA_CORE_JSONPOINTER_EXPORT +auto to_uri(const WeakPointer &pointer, const URI &base) -> URI; + +/// @ingroup jsonpointer +SOURCEMETA_CORE_JSONPOINTER_EXPORT +auto to_uri(const WeakPointer &pointer, const std::string_view base) -> URI; + /// @ingroup jsonpointer /// -/// Walk over every element of a JSON document, top-down, using JSON Pointers. -/// For example: +/// Walk over every element of a JSON document, top-down, using weak pointers. +/// Note that the resulting weak pointers hold references to strings in the JSON +/// document, so the document must outlive the walker and any pointers obtained +/// from it. For example: /// /// ```cpp /// #include /// #include /// #include +/// #include /// #include /// /// const sourcemeta::core::JSON document = /// sourcemeta::core::parse_json("[ 1, 2, 3 ]"); -/// std::vector subpointers; +/// std::vector subpointers; /// /// for (const auto &subpointer : /// sourcemeta::core::PointerWalker{document}) { -/// subpointers.push_back(subpointer); +/// subpointers.push_back(sourcemeta::core::to_string(subpointer)); /// } /// /// assert(subpointers.size() == 4); -/// assert(subpointers.at(0) == sourcemeta::core::Pointer{}); -/// assert(subpointers.at(1) == sourcemeta::core::Pointer{0}); -/// assert(subpointers.at(2) == sourcemeta::core::Pointer{1}); -/// assert(subpointers.at(3) == sourcemeta::core::Pointer{2}); +/// assert(subpointers.at(0) == ""); +/// assert(subpointers.at(1) == "/0"); +/// assert(subpointers.at(2) == "/1"); +/// assert(subpointers.at(3) == "/2"); /// ``` -using PointerWalker = GenericPointerWalker; +using PointerWalker = GenericPointerWalker; /// @ingroup jsonpointer /// Serialise a Pointer as JSON @@ -680,41 +684,6 @@ struct hash -struct hash> { - auto operator()(const sourcemeta::core::GenericPointerTemplate - &pointer) const noexcept -> std::size_t { - const auto size{pointer.size()}; - if (size == 0) { - return size; - } - - auto hash_element = - [](const typename sourcemeta::core::GenericPointerTemplate< - PointerT>::value_type &element) -> std::size_t { - using Template = sourcemeta::core::GenericPointerTemplate; - const auto *token{std::get_if(&element)}; - if (token) { - return token->is_property() - ? static_cast(token->property_hash().a) - : token->to_index(); - } else { - return element.index(); - } - }; - - const auto &first{*pointer.cbegin()}; - const auto &middle{ - *(pointer.cbegin() + - static_cast::difference_type>(size / 2))}; - const auto &last{*(pointer.cend() - 1)}; - - return size + hash_element(first) + hash_element(middle) + - hash_element(last); - } -}; } // namespace std #endif diff --git a/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer_pointer.h b/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer_pointer.h index cc6eaf33..0e5931e5 100644 --- a/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer_pointer.h +++ b/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer_pointer.h @@ -433,6 +433,33 @@ template class GenericPointer { return result; } + /// Get a copy of the JSON Pointer starting from a given token index. This + /// method is undefined if the index is greater than the pointer size. For + /// example: + /// + /// ```cpp + /// #include + /// #include + /// + /// const sourcemeta::core::Pointer pointer{"foo", "bar", "baz"}; + /// const sourcemeta::core::Pointer result{pointer.slice(1)}; + /// assert(result.size() == 2); + /// assert(result.at(0).is_property()); + /// assert(result.at(0).to_property() == "bar"); + /// assert(result.at(1).is_property()); + /// assert(result.at(1).to_property() == "baz"); + /// ``` + [[nodiscard]] auto slice(const std::size_t index) const + -> GenericPointer { + assert(index <= this->size()); + auto new_begin{this->data.cbegin()}; + std::advance(new_begin, index); + GenericPointer result; + result.reserve(this->size() - index); + std::copy(new_begin, this->data.cend(), std::back_inserter(result.data)); + return result; + } + /// Concatenate a JSON Pointer with another JSON Pointer, getting a new /// pointer as a result. For example: /// @@ -492,6 +519,55 @@ template class GenericPointer { } } + /// Check whether a JSON Pointer starts with another JSON Pointer followed + /// by a property token. This is useful for checking container membership + /// without allocating a new pointer. For example: + /// + /// ```cpp + /// #include + /// #include + /// + /// const sourcemeta::core::Pointer pointer{"foo", "$defs", "bar"}; + /// const sourcemeta::core::Pointer prefix{"foo"}; + /// assert(pointer.starts_with(prefix, "$defs")); + /// assert(!pointer.starts_with(prefix, "other")); + /// ``` + template + requires(!std::is_same_v, Token>) + [[nodiscard]] auto starts_with(const GenericPointer &other, + const StringT &tail) const -> bool { + const auto prefix_size{other.size()}; + return this->size() > prefix_size && this->starts_with(other) && + this->data[prefix_size].is_property() && + this->data[prefix_size].to_property() == tail; + } + + /// Check whether a JSON Pointer starts with another JSON Pointer followed + /// by two property tokens. This is useful for checking nested container + /// membership without allocating a new pointer. For example: + /// + /// ```cpp + /// #include + /// #include + /// + /// const sourcemeta::core::Pointer pointer{"foo", "$defs", "bar", "baz"}; + /// const sourcemeta::core::Pointer prefix{"foo"}; + /// assert(pointer.starts_with(prefix, "$defs", "bar")); + /// assert(!pointer.starts_with(prefix, "$defs", "other")); + /// ``` + template + requires(!std::is_same_v, Token> && + !std::is_same_v, Token>) + [[nodiscard]] auto starts_with(const GenericPointer &other, + const StringLeftT &tail_left, + const StringRightT &tail_right) const -> bool { + const auto prefix_size{other.size()}; + return this->size() > prefix_size + 1 && + this->starts_with(other, tail_left) && + this->data[prefix_size + 1].is_property() && + this->data[prefix_size + 1].to_property() == tail_right; + } + /// Check whether a JSON Pointer starts with the initial part of another JSON /// Pointer. For example: /// diff --git a/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer_template.h b/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer_template.h deleted file mode 100644 index 0fdfcc7f..00000000 --- a/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer_template.h +++ /dev/null @@ -1,383 +0,0 @@ -#ifndef SOURCEMETA_CORE_JSONPOINTER_TEMPLATE_H_ -#define SOURCEMETA_CORE_JSONPOINTER_TEMPLATE_H_ - -#include - -#include // std::copy, std::all_of -#include // assert -#include // std::uint8_t -#include // std::initializer_list -#include // std::back_inserter -#include // std::optional, std::nullopt -#include // std::is_convertible_v, std::is_null_pointer_v -#include // std::forward -#include // std::variant, std::holds_alternative, std::get -#include // std::vector - -namespace sourcemeta::core { - -/// @ingroup jsonpointer -template class GenericPointerTemplate { -public: - enum class Wildcard : std::uint8_t { Property, Item, Key }; - struct Condition { - auto operator==(const Condition &) const noexcept -> bool = default; - auto operator<(const Condition &) const noexcept -> bool { return false; } - std::optional suffix = std::nullopt; - }; - struct Negation { - auto operator==(const Negation &) const noexcept -> bool = default; - auto operator<(const Negation &) const noexcept -> bool { return false; } - }; - using Regex = typename PointerT::Value::String; - using Token = typename PointerT::Token; - using Container = - std::vector>; - - /// This constructor creates an empty JSON Pointer template. For example: - /// - /// ```cpp - /// #include - /// - /// const sourcemeta::core::PointerTemplate pointer; - /// ``` - GenericPointerTemplate() : data{} {} - - /// This constructor is the preferred way of creating a pointer template. - /// For example: - /// - /// ```cpp - /// #include - /// #include - /// - /// const sourcemeta::core::PointerTemplate pointer{ - /// "foo", - /// sourcemeta::core::PointerTemplate::Wildcard::Property}; - /// ``` - GenericPointerTemplate( - std::initializer_list tokens) - : data{std::move(tokens)} {} - - /// This constructor creates a JSON Pointer template from properties or - /// indexes. For example: - /// - /// ```cpp - /// #include - /// #include - /// - /// const sourcemeta::core::PointerTemplate pointer_1{"foo", "bar", "baz"}; - /// assert(pointer_1.size() == 3); - /// const sourcemeta::core::PointerTemplate pointer_2{"foo", 1, "bar"}; - /// assert(pointer_2.size() == 3); - /// ``` - template - requires(sizeof...(Args) > 0 && - ((!std::is_null_pointer_v> && - (std::is_convertible_v || - std::is_integral_v>)) && - ...)) - GenericPointerTemplate(Args &&...args) - : data{Token{std::forward(args)}...} {} - - /// This constructor creates a JSON Pointer template from an existing JSON - /// Pointer. For example: - /// - /// ```cpp - /// #include - /// - /// const sourcemeta::core::Pointer base{"foo", "bar"}; - /// const sourcemeta::core::PointerTemplate pointer{base}; - /// ``` - GenericPointerTemplate(const PointerT &other) { this->push_back(other); } - - // Member types - using value_type = typename Container::value_type; - using allocator_type = typename Container::allocator_type; - using size_type = typename Container::size_type; - using difference_type = typename Container::difference_type; - using reference = typename Container::reference; - using const_reference = typename Container::const_reference; - using pointer = typename Container::pointer; - using const_pointer = typename Container::const_pointer; - using iterator = typename Container::iterator; - using const_iterator = typename Container::const_iterator; - using reverse_iterator = typename Container::reverse_iterator; - using const_reverse_iterator = typename Container::const_reverse_iterator; - - /// Get a mutable begin iterator on the pointer - auto begin() noexcept -> iterator { return this->data.begin(); } - /// Get a mutable end iterator on the pointer - auto end() noexcept -> iterator { return this->data.end(); } - /// Get a constant begin iterator on the pointer - [[nodiscard]] auto begin() const noexcept -> const_iterator { - return this->data.begin(); - } - /// Get a constant end iterator on the pointer - [[nodiscard]] auto end() const noexcept -> const_iterator { - return this->data.end(); - } - /// Get a constant begin iterator on the pointer - [[nodiscard]] auto cbegin() const noexcept -> const_iterator { - return this->data.cbegin(); - } - /// Get a constant end iterator on the pointer - [[nodiscard]] auto cend() const noexcept -> const_iterator { - return this->data.cend(); - } - /// Get a mutable reverse begin iterator on the pointer - auto rbegin() noexcept -> reverse_iterator { return this->data.rbegin(); } - /// Get a mutable reverse end iterator on the pointer - auto rend() noexcept -> reverse_iterator { return this->data.rend(); } - /// Get a constant reverse begin iterator on the pointer - [[nodiscard]] auto rbegin() const noexcept -> const_reverse_iterator { - return this->data.rbegin(); - } - /// Get a constant reverse end iterator on the pointer - [[nodiscard]] auto rend() const noexcept -> const_reverse_iterator { - return this->data.rend(); - } - /// Get a constant reverse begin iterator on the pointer - [[nodiscard]] auto crbegin() const noexcept -> const_reverse_iterator { - return this->data.crbegin(); - } - /// Get a constant reverse end iterator on the pointer - [[nodiscard]] auto crend() const noexcept -> const_reverse_iterator { - return this->data.crend(); - } - - /// Emplace a token or wildcard into the back of a JSON Pointer template. For - /// example: - /// - /// ```cpp - /// #include - /// - /// sourcemeta::core::PointerTemplate pointer; - /// pointer.emplace_back(sourcemeta::core::PointerTemplate::Wildcard::Property); - /// ``` - template auto emplace_back(Args &&...args) -> reference { - return this->data.emplace_back(std::forward(args)...); - } - - /// Push a copy of a JSON Pointer into the back of a JSON Pointer template. - /// For example: - /// - /// ```cpp - /// #include - /// - /// sourcemeta::core::PointerTemplate result; - /// const sourcemeta::core::Pointer pointer{"bar", "baz"}; - /// result.push_back(pointer); - /// ``` - auto push_back(const PointerT &other) -> void { - this->data.reserve(this->data.size() + other.size()); - std::copy(other.cbegin(), other.cend(), std::back_inserter(this->data)); - } - - /// Remove the last token of a JSON Pointer template. For example: - /// - /// ```cpp - /// #include - /// - /// const sourcemeta::core::Pointer base{"bar", "baz"}; - /// sourcemeta::core::PointerTemplate pointer{base}; - /// pointer.pop_back(); - /// ``` - auto pop_back() -> void { - assert(!this->empty()); - this->data.pop_back(); - } - - /// Concatenate a JSON Pointer template with another JSON Pointer template, - /// getting a new pointer template as a result. For example: - /// - /// ```cpp - /// #include - /// #include - /// - /// const sourcemeta::core::Pointer pointer_left{"foo"}; - /// const sourcemeta::core::Pointer pointer_right{"bar", "baz"}; - /// const sourcemeta::core::Pointer pointer_expected{"foo", "bar", "baz"}; - /// - /// const sourcemeta::core::PointerTemplate left{pointer_left}; - /// const sourcemeta::core::PointerTemplate right{pointer_right}; - /// const sourcemeta::core::PointerTemplate expected{pointer_expected}; - /// - /// assert(left.concat(right) == expected); - /// ``` - [[nodiscard]] auto - concat(const GenericPointerTemplate &&other) const - -> GenericPointerTemplate { - GenericPointerTemplate result{*this}; - result.data.reserve(result.data.size() + other.data.size()); - for (auto &&token : other) { - result.emplace_back(std::move(token)); - } - - return result; - } - - /// Check if a JSON Pointer template is empty. - /// For example: - /// - /// ```cpp - /// #include - /// #include - /// - /// const sourcemeta::core::PointerTemplate empty_pointer; - /// assert(empty_pointer.empty()); - /// ``` - [[nodiscard]] auto empty() const noexcept -> bool { - return this->data.empty(); - } - - /// Get the size of the JSON Pointer template. For example: - /// - /// ```cpp - /// #include - /// #include - /// - /// const sourcemeta::core::Pointer base{"foo", "bar"}; - /// const sourcemeta::core::PointerTemplate pointer{base}; - /// assert(pointer.size() == 2); - /// ``` - [[nodiscard]] auto size() const noexcept -> size_type { - return this->data.size(); - } - - /// Check if a JSON Pointer template only consists in normal non-templated - /// tokens. For example: - /// - /// ```cpp - /// #include - /// #include - /// - /// sourcemeta::core::PointerTemplate pointer; - /// pointer.emplace_back(sourcemeta::core::PointerTemplate::Wildcard::Property); - /// pointer.emplace_back(sourcemeta::core::Pointer::Token{"foo"}); - /// assert(!pointer.trivial()); - /// ``` - [[nodiscard]] auto trivial() const noexcept -> bool { - return std::all_of( - this->data.cbegin(), this->data.cend(), - [](const auto &token) { return std::holds_alternative(token); }); - } - - /// Check if a JSON Pointer template matches another JSON Pointer template. - /// For example: - /// - /// ```cpp - /// #include - /// #include - /// - /// const sourcemeta::core::PointerTemplate left{ - /// sourcemeta::core::PointerTemplate::Condition{}, - /// sourcemeta::core::Pointer::Token{"foo"}}; - /// const sourcemeta::core::PointerTemplate right{ - /// sourcemeta::core::Pointer::Token{"foo"}}; - /// - /// assert(left.matches(right)); - /// assert(right.matches(left)); - /// ``` - [[nodiscard]] auto - matches(const GenericPointerTemplate &other) const noexcept - -> bool { - // TODO: Find a way to simplify this long method - auto iterator_this = this->data.cbegin(); - auto iterator_that = other.data.cbegin(); - - while (iterator_this != this->data.cend() && - iterator_that != other.data.cend()) { - while (iterator_this != this->data.cend() && - std::holds_alternative(*iterator_this)) { - iterator_this += 1; - } - - while (iterator_that != other.data.cend() && - std::holds_alternative(*iterator_that)) { - iterator_that += 1; - } - - if (iterator_this == this->data.cend() || - iterator_that == other.data.cend()) { - break; - } else if (*iterator_this != *iterator_that) { - // Handle regular expressions - if (std::holds_alternative(*iterator_this) && - std::holds_alternative(*iterator_that)) { - const auto &token{std::get(*iterator_this)}; - if (!token.is_property() || - !sourcemeta::core::matches_if_valid( - std::get(*iterator_that), token.to_property())) { - return false; - } - } else if (std::holds_alternative(*iterator_this) && - std::holds_alternative(*iterator_that)) { - const auto &token{std::get(*iterator_that)}; - if (!token.is_property() || - !sourcemeta::core::matches_if_valid( - std::get(*iterator_this), token.to_property())) { - return false; - } - - // Handle wildcards - } else if (std::holds_alternative(*iterator_this) && - std::holds_alternative(*iterator_that)) { - const auto &token{std::get(*iterator_that)}; - const auto wildcard{std::get(*iterator_this)}; - if (wildcard == Wildcard::Key || - (wildcard == Wildcard::Property && !token.is_property()) || - (wildcard == Wildcard::Item && !token.is_index())) { - return false; - } - } else if (std::holds_alternative(*iterator_this) && - std::holds_alternative(*iterator_that)) { - const auto &token{std::get(*iterator_this)}; - const auto wildcard{std::get(*iterator_that)}; - if (wildcard == Wildcard::Key || - (wildcard == Wildcard::Property && !token.is_property()) || - (wildcard == Wildcard::Item && !token.is_index())) { - return false; - } - } else if (std::holds_alternative(*iterator_this) && - std::holds_alternative(*iterator_that)) { - if (std::get(*iterator_that) != Wildcard::Property) { - return false; - } - } else if (std::holds_alternative(*iterator_this) && - std::holds_alternative(*iterator_that)) { - if (std::get(*iterator_this) != Wildcard::Property) { - return false; - } - } else { - return false; - } - } - - iterator_this += 1; - iterator_that += 1; - } - - return iterator_this == this->data.cend() && - iterator_that == other.data.cend(); - } - - /// Compare JSON Pointer template instances - auto operator==(const GenericPointerTemplate &other) const noexcept - -> bool { - return this->data == other.data; - } - - /// Overload to support ordering of JSON Pointer templates. Typically for - /// sorting reasons. - auto operator<(const GenericPointerTemplate &other) const noexcept - -> bool { - return this->data < other.data; - } - -private: - Container data; -}; - -} // namespace sourcemeta::core - -#endif diff --git a/vendor/core/src/core/jsonpointer/jsonpointer.cc b/vendor/core/src/core/jsonpointer/jsonpointer.cc index 330afee4..1656254d 100644 --- a/vendor/core/src/core/jsonpointer/jsonpointer.cc +++ b/vendor/core/src/core/jsonpointer/jsonpointer.cc @@ -347,12 +347,6 @@ auto stringify(const WeakPointer &pointer, stringify(pointer, stream); } -auto stringify(const PointerTemplate &pointer, - std::basic_ostream &stream) - -> void { - stringify(pointer, stream); -} - auto to_string(const Pointer &pointer) -> std::basic_string> { @@ -373,26 +367,36 @@ auto to_string(const WeakPointer &pointer) return result.str(); } -auto to_string(const PointerTemplate &pointer) - -> std::basic_string> { +auto to_uri(const Pointer &pointer) -> URI { std::basic_ostringstream> result; - stringify(pointer, result); - return result.str(); + stringify(pointer, result); + return URI::from_fragment(result.str()); } -auto to_uri(const Pointer &pointer) -> URI { +auto to_uri(const Pointer &pointer, const URI &base) -> URI { + return to_uri(pointer).resolve_from(base).canonicalize(); +} + +auto to_uri(const WeakPointer &pointer) -> URI { std::basic_ostringstream> result; - stringify(pointer, result); + stringify(pointer, result); return URI::from_fragment(result.str()); } -auto to_uri(const Pointer &pointer, const URI &base) -> URI { +auto to_uri(const WeakPointer &pointer, const URI &base) -> URI { return to_uri(pointer).resolve_from(base).canonicalize(); } +auto to_uri(const WeakPointer &pointer, const std::string_view base) -> URI { + if (base.empty()) { + return to_uri(pointer); + } + + return to_uri(pointer).resolve_from(URI{base}).canonicalize(); +} + } // namespace sourcemeta::core diff --git a/vendor/core/src/core/jsonpointer/mangle.cc b/vendor/core/src/core/jsonpointer/mangle.cc new file mode 100644 index 00000000..64cfb317 --- /dev/null +++ b/vendor/core/src/core/jsonpointer/mangle.cc @@ -0,0 +1,169 @@ +#include + +#include // assert +#include // std::setfill, std::setw +#include // std::ostringstream +#include // std::string_view + +namespace { + +// Special characters +constexpr auto ESCAPE_PREFIX = 'X'; +constexpr auto UPPERCASE_PREFIX = 'U'; +constexpr auto SEPARATOR = '_'; +constexpr auto HYPHEN = '-'; + +// Reserved characters that need escaping +constexpr auto RESERVED_X_UPPER = 'X'; +constexpr auto RESERVED_X_LOWER = 'x'; +constexpr auto RESERVED_U_UPPER = 'U'; +constexpr auto RESERVED_U_LOWER = 'u'; +constexpr auto RESERVED_Z_UPPER = 'Z'; +constexpr auto RESERVED_Z_LOWER = 'z'; + +// Special token markers +constexpr std::string_view TOKEN_EMPTY = "ZEmpty"; +constexpr std::string_view TOKEN_INDEX = "ZIndex"; + +constexpr auto ASCII_MAX = static_cast(0x80); + +// Locale-independent ASCII character classification +inline auto is_ascii_alpha(unsigned char character) noexcept -> bool { + return (character >= 'A' && character <= 'Z') || + (character >= 'a' && character <= 'z'); +} + +inline auto is_ascii_digit(unsigned char character) noexcept -> bool { + return character >= '0' && character <= '9'; +} + +inline auto is_ascii_lower(unsigned char character) noexcept -> bool { + return character >= 'a' && character <= 'z'; +} + +inline auto to_ascii_upper(unsigned char character) noexcept -> char { + if (character >= 'a' && character <= 'z') { + return static_cast(character - 'a' + 'A'); + } + return static_cast(character); +} + +inline auto hex_escape(std::ostringstream &output, char character) noexcept + -> void { + output << ESCAPE_PREFIX << std::uppercase << std::hex << std::setfill('0') + << std::setw(2) + << static_cast(static_cast(character)); +} + +inline auto is_reserved_at_start(char character) noexcept -> bool { + switch (character) { + case RESERVED_X_UPPER: + case RESERVED_X_LOWER: + case RESERVED_U_UPPER: + case RESERVED_U_LOWER: + case RESERVED_Z_UPPER: + case RESERVED_Z_LOWER: + return true; + default: + return false; + } +} + +inline auto encode_prefix(std::ostringstream &output, + std::string_view input) noexcept -> void { + bool capitalize_next{true}; + bool first{true}; + + for (const auto character : input) { + const auto unsigned_character{static_cast(character)}; + + if (is_ascii_alpha(unsigned_character)) { + if (capitalize_next && is_ascii_lower(unsigned_character)) { + output << to_ascii_upper(unsigned_character); + } else { + output << character; + } + capitalize_next = false; + } else if (is_ascii_digit(unsigned_character)) { + if (first) { + output << SEPARATOR; + } + output << character; + capitalize_next = false; + } else if (character == SEPARATOR || character == HYPHEN) { + capitalize_next = true; + } else { + hex_escape(output, character); + capitalize_next = true; + } + + first = false; + } +} + +inline auto encode_string(std::ostringstream &output, + const std::string &input) noexcept -> void { + bool segment_start{true}; + + for (const auto character : input) { + const auto unsigned_character{static_cast(character)}; + + if (is_ascii_alpha(unsigned_character)) { + const bool is_lower{is_ascii_lower(unsigned_character)}; + if (segment_start) { + if (is_reserved_at_start(character)) { + hex_escape(output, character); + } else if (is_lower) { + output << to_ascii_upper(unsigned_character); + } else { + output << UPPERCASE_PREFIX << character; + } + } else if (character == RESERVED_X_UPPER || + character == RESERVED_U_UPPER) { + hex_escape(output, character); + } else { + output << character; + } + segment_start = false; + } else if (is_ascii_digit(unsigned_character)) { + output << character; + segment_start = false; + } else { + hex_escape(output, character); + // Only ASCII non-alphanumeric starts a new segment + // Non-ASCII bytes (>= 0x80) do not start new segments (UTF-8 handling) + segment_start = (unsigned_character < ASCII_MAX); + } + } +} + +inline auto encode_string_or_empty(std::ostringstream &output, + const std::string &input) noexcept -> void { + if (input.empty()) { + output << TOKEN_EMPTY; + } else { + encode_string(output, input); + } +} + +} // namespace + +namespace sourcemeta::core { + +auto mangle(const Pointer &pointer, const std::string_view prefix) + -> std::string { + assert(!prefix.empty()); + std::ostringstream output; + encode_prefix(output, prefix); + for (const auto &token : pointer) { + output << SEPARATOR; + if (token.is_property()) { + encode_string_or_empty(output, token.to_property()); + } else { + output << TOKEN_INDEX << token.to_index(); + } + } + return output.str(); +} + +} // namespace sourcemeta::core diff --git a/vendor/core/src/core/jsonschema/bundle.cc b/vendor/core/src/core/jsonschema/bundle.cc index c668d664..22b154bc 100644 --- a/vendor/core/src/core/jsonschema/bundle.cc +++ b/vendor/core/src/core/jsonschema/bundle.cc @@ -18,15 +18,15 @@ auto is_official_metaschema_reference(const sourcemeta::core::Pointer &pointer, sourcemeta::core::schema_resolver(destination).has_value(); } -auto dependencies_internal( - const sourcemeta::core::JSON &schema, - const sourcemeta::core::SchemaWalker &walker, - const sourcemeta::core::SchemaResolver &resolver, - const sourcemeta::core::DependencyCallback &callback, - const std::optional &default_dialect, - const std::optional &default_id, - const sourcemeta::core::SchemaFrame::Paths &paths, - std::unordered_set &visited) -> void { +auto dependencies_internal(const sourcemeta::core::JSON &schema, + const sourcemeta::core::SchemaWalker &walker, + const sourcemeta::core::SchemaResolver &resolver, + const sourcemeta::core::DependencyCallback &callback, + std::string_view default_dialect, + std::string_view default_id, + const sourcemeta::core::SchemaFrame::Paths &paths, + std::unordered_set &visited) + -> void { sourcemeta::core::SchemaFrame frame{ sourcemeta::core::SchemaFrame::Mode::References}; frame.analyse(schema, walker, resolver, default_dialect, default_id, paths); @@ -34,40 +34,36 @@ auto dependencies_internal( default_dialect, default_id)}; std::vector< - std::tuple>> + std::tuple> found; - for (const auto &[key, reference] : frame.references()) { - if (frame.traverse(reference.destination).has_value() || - - // We don't want to report official schemas, as we can expect - // virtually all implementations to understand them out of the box - is_official_metaschema_reference(key.second, reference.destination)) { - continue; + frame.for_each_unresolved_reference([&](const auto &pointer, + const auto &reference) { + // We don't want to report official schemas, as we can expect + // virtually all implementations to understand them out of the box + if (is_official_metaschema_reference(pointer, reference.destination)) { + return; } - if (!reference.base.has_value()) { + if (reference.base.empty()) { throw sourcemeta::core::SchemaReferenceError( - reference.destination, key.second, - "Could not resolve schema reference"); + reference.destination, pointer, "Could not resolve schema reference"); } // To not infinitely loop on circular references - if (visited.contains(reference.base.value())) { - continue; + if (visited.contains(reference.base)) { + return; } // If we can't find the destination but there is a base and we can // find the base, then we are facing an unresolved fragment - if (frame.traverse(reference.base.value()).has_value()) { + if (frame.traverse(reference.base).has_value()) { throw sourcemeta::core::SchemaReferenceError( - reference.destination, key.second, - "Could not resolve schema reference"); + reference.destination, pointer, "Could not resolve schema reference"); } - assert(reference.base.has_value()); - const auto &identifier{reference.base.value()}; + assert(!reference.base.empty()); + const auto &identifier{reference.base}; auto remote{resolver(identifier)}; if (!remote.has_value()) { throw sourcemeta::core::SchemaResolutionError( @@ -76,33 +72,32 @@ auto dependencies_internal( if (!sourcemeta::core::is_schema(remote.value())) { throw sourcemeta::core::SchemaReferenceError( - identifier, key.second, - "The JSON document is not a valid JSON Schema"); + identifier, pointer, "The JSON document is not a valid JSON Schema"); } - const auto base_dialect{sourcemeta::core::base_dialect( + const auto remote_base_dialect{sourcemeta::core::base_dialect( remote.value(), resolver, default_dialect)}; - if (!base_dialect.has_value()) { + if (!remote_base_dialect.has_value()) { throw sourcemeta::core::SchemaReferenceError( - identifier, key.second, - "The JSON document is not a valid JSON Schema"); + identifier, pointer, "The JSON document is not a valid JSON Schema"); } - callback(origin, key.second, identifier, remote.value()); - found.emplace_back(std::move(remote).value(), identifier); + callback(origin, pointer, identifier, remote.value()); + found.emplace_back(std::move(remote).value(), + sourcemeta::core::JSON::String{identifier}); visited.emplace(identifier); - } + }); for (const auto &entry : found) { dependencies_internal(std::get<0>(entry), walker, resolver, callback, - default_dialect, std::get<1>(entry).get(), - {sourcemeta::core::empty_pointer}, visited); + default_dialect, std::get<1>(entry), + {sourcemeta::core::empty_weak_pointer}, visited); } } auto embed_schema(sourcemeta::core::JSON &root, const sourcemeta::core::Pointer &container, - const std::string &identifier, + const std::string_view identifier, sourcemeta::core::JSON &&target) -> void { auto *current{&root}; for (const auto &token : container) { @@ -134,18 +129,17 @@ auto embed_schema(sourcemeta::core::JSON &root, auto bundle_schema(sourcemeta::core::JSON &root, const sourcemeta::core::Pointer &container, const sourcemeta::core::JSON &subschema, - sourcemeta::core::SchemaFrame &frame, const sourcemeta::core::SchemaWalker &walker, const sourcemeta::core::SchemaResolver &resolver, - const std::optional &default_dialect, - const std::optional &default_id, + std::string_view default_dialect, + std::string_view default_id, const sourcemeta::core::SchemaFrame::Paths &paths, + std::unordered_set &bundled, const std::size_t depth = 0) -> void { - // Keep in mind that the resulting frame does miss some information. For - // example, when we recurse to framing embedded schemas, we will frame them - // without keeping their new relationship to their parent (after embedding if - // to the container location). However, that's fine for the purpose of this - // function, given we don't pass the frame back to the caller + // Create a fresh frame for each schema we analyze to avoid key collisions + // between different schemas that have references at the same pointer paths + sourcemeta::core::SchemaFrame frame{ + sourcemeta::core::SchemaFrame::Mode::References}; if (depth == 0) { frame.analyse( subschema, walker, resolver, default_dialect, default_id, @@ -155,41 +149,40 @@ auto bundle_schema(sourcemeta::core::JSON &root, frame.analyse(subschema, walker, resolver, default_dialect, default_id); } - // Otherwise, given recursion, we would be modifying the - // references list *while* looping on it - // TODO: How can we avoid this very expensive copy? - const auto references_copy = frame.references(); - for (const auto &[key, reference] : references_copy) { - if (frame.traverse(reference.destination).has_value() || - - // We don't want to bundle official schemas, as we can expect - // virtually all implementations to understand them out of the box - is_official_metaschema_reference(key.second, reference.destination)) { - continue; + frame.for_each_unresolved_reference([&](const auto &pointer, + const auto &reference) { + // We don't want to bundle official schemas, as we can expect + // virtually all implementations to understand them out of the box + if (is_official_metaschema_reference(pointer, reference.destination)) { + return; } // If we can't find the destination but there is a base and we can // find base, then we are facing an unresolved fragment - if (reference.base.has_value() && - frame.traverse(reference.base.value()).has_value()) { + if (!reference.base.empty() && frame.traverse(reference.base).has_value()) { throw sourcemeta::core::SchemaReferenceError( - reference.destination, key.second, - "Could not resolve schema reference"); + reference.destination, pointer, "Could not resolve schema reference"); } - if (!reference.base.has_value()) { + if (reference.base.empty()) { throw sourcemeta::core::SchemaReferenceError( - reference.destination, key.second, - "Could not resolve schema reference"); + reference.destination, pointer, "Could not resolve schema reference"); + } + + assert(!reference.base.empty()); + const sourcemeta::core::JSON::String identifier{reference.base}; + + // Skip if already bundled to avoid infinite loops on circular + // references + if (bundled.contains(identifier)) { + return; } - assert(reference.base.has_value()); - const auto &identifier{reference.base.value()}; auto remote{resolver(identifier)}; if (!remote.has_value()) { if (frame.traverse(identifier).has_value()) { throw sourcemeta::core::SchemaReferenceError( - reference.destination, key.second, + reference.destination, pointer, "Could not resolve schema reference"); } @@ -199,29 +192,45 @@ auto bundle_schema(sourcemeta::core::JSON &root, if (!sourcemeta::core::is_schema(remote.value())) { throw sourcemeta::core::SchemaReferenceError( - identifier, key.second, - "The JSON document is not a valid JSON Schema"); + identifier, pointer, "The JSON document is not a valid JSON Schema"); } - const auto base_dialect{sourcemeta::core::base_dialect( + const auto remote_base_dialect{sourcemeta::core::base_dialect( remote.value(), resolver, default_dialect)}; - if (!base_dialect.has_value()) { + if (!remote_base_dialect.has_value()) { throw sourcemeta::core::SchemaReferenceError( - identifier, key.second, - "The JSON document is not a valid JSON Schema"); + identifier, pointer, "The JSON document is not a valid JSON Schema"); + } + + // If the reference has a fragment, verify it exists in the remote + // schema + if (reference.fragment.has_value()) { + // TODO: The fact that we have to re-frame on each loop pass to check + // for this is probably insanely slow + sourcemeta::core::SchemaFrame remote_frame{ + sourcemeta::core::SchemaFrame::Mode::Locations}; + remote_frame.analyse(remote.value(), walker, resolver, default_dialect, + identifier); + if (!remote_frame.traverse(reference.destination).has_value()) { + throw sourcemeta::core::SchemaReferenceError( + reference.destination, pointer, + "Could not resolve schema reference"); + } } if (remote.value().is_object()) { - // Always insert an identifier, as a schema might refer to another schema - // using another URI (i.e. due to relying on HTTP re-directions, etc) + // Always insert an identifier, as a schema might refer to another + // schema using another URI (i.e. due to relying on HTTP + // re-directions, etc) sourcemeta::core::reidentify(remote.value(), identifier, - base_dialect.value()); + remote_base_dialect.value()); } - bundle_schema(root, container, remote.value(), frame, walker, resolver, - default_dialect, identifier, paths, depth + 1); + bundled.emplace(identifier); + bundle_schema(root, container, remote.value(), walker, resolver, + default_dialect, identifier, paths, bundled, depth + 1); embed_schema(root, container, identifier, std::move(remote).value()); - } + }); } } // namespace @@ -231,10 +240,9 @@ namespace sourcemeta::core { auto dependencies(const JSON &schema, const SchemaWalker &walker, const SchemaResolver &resolver, const DependencyCallback &callback, - const std::optional &default_dialect, - const std::optional &default_id, + std::string_view default_dialect, std::string_view default_id, const SchemaFrame::Paths &paths) -> void { - std::unordered_set visited; + std::unordered_set visited; dependencies_internal(schema, walker, resolver, callback, default_dialect, default_id, paths, visited); } @@ -242,18 +250,24 @@ auto dependencies(const JSON &schema, const SchemaWalker &walker, // TODO: Refactor this function to internally rely on the `.dependencies()` // function auto bundle(JSON &schema, const SchemaWalker &walker, - const SchemaResolver &resolver, - const std::optional &default_dialect, - const std::optional &default_id, + const SchemaResolver &resolver, std::string_view default_dialect, + std::string_view default_id, const std::optional &default_container, const SchemaFrame::Paths &paths) -> void { - SchemaFrame frame{SchemaFrame::Mode::References}; - + // Pre-scan the schema to find any already-embedded schemas and mark them + // as bundled to avoid re-embedding them. This includes the root schema itself + // and any schemas already embedded within it + std::unordered_set bundled; + SchemaFrame initial_frame{SchemaFrame::Mode::Locations}; + initial_frame.analyse(schema, walker, resolver, default_dialect, default_id, + paths); + initial_frame.for_each_resource_uri( + [&bundled](const auto uri) { bundled.emplace(uri); }); if (default_container.has_value()) { // This is undefined behavior assert(!default_container.value().empty()); - bundle_schema(schema, default_container.value(), schema, frame, walker, - resolver, default_dialect, default_id, paths); + bundle_schema(schema, default_container.value(), schema, walker, resolver, + default_dialect, default_id, paths, bundled); return; } @@ -261,9 +275,9 @@ auto bundle(JSON &schema, const SchemaWalker &walker, // bundled schema. Otherwise, potential relative references based on this // implicit base URI will likely not resolve unless end users happen to // know that this implicit base URI is. - if (default_id.has_value() && - !identify(schema, resolver, default_dialect).has_value()) { - reidentify(schema, default_id.value(), resolver, default_dialect); + if (!default_id.empty() && + identify(schema, resolver, default_dialect).empty()) { + reidentify(schema, default_id, resolver, default_dialect); } const auto vocabularies{ @@ -272,8 +286,8 @@ auto bundle(JSON &schema, const SchemaWalker &walker, sourcemeta::core::Vocabularies::Known::JSON_Schema_2020_12_Core) || vocabularies.contains( sourcemeta::core::Vocabularies::Known::JSON_Schema_2019_09_Core)) { - bundle_schema(schema, {"$defs"}, schema, frame, walker, resolver, - default_dialect, default_id, paths); + bundle_schema(schema, {"$defs"}, schema, walker, resolver, default_dialect, + default_id, paths, bundled); return; } else if ( vocabularies.contains( @@ -304,8 +318,8 @@ auto bundle(JSON &schema, const SchemaWalker &walker, } } - bundle_schema(schema, {"definitions"}, schema, frame, walker, resolver, - default_dialect, default_id, paths); + bundle_schema(schema, {"definitions"}, schema, walker, resolver, + default_dialect, default_id, paths, bundled); return; } else if ( vocabularies.contains( @@ -324,6 +338,7 @@ auto bundle(JSON &schema, const SchemaWalker &walker, sourcemeta::core::Vocabularies::Known::JSON_Schema_Draft_0_Hyper) || vocabularies.contains( sourcemeta::core::Vocabularies::Known::JSON_Schema_Draft_0)) { + SchemaFrame frame{SchemaFrame::Mode::References}; frame.analyse(schema, walker, resolver, default_dialect, default_id); if (frame.standalone()) { return; @@ -337,9 +352,8 @@ auto bundle(JSON &schema, const SchemaWalker &walker, } auto bundle(const JSON &schema, const SchemaWalker &walker, - const SchemaResolver &resolver, - const std::optional &default_dialect, - const std::optional &default_id, + const SchemaResolver &resolver, std::string_view default_dialect, + std::string_view default_id, const std::optional &default_container, const SchemaFrame::Paths &paths) -> JSON { JSON copy = schema; diff --git a/vendor/core/src/core/jsonschema/format.cc b/vendor/core/src/core/jsonschema/format.cc index 864904af..cc785c0d 100644 --- a/vendor/core/src/core/jsonschema/format.cc +++ b/vendor/core/src/core/jsonschema/format.cc @@ -2,7 +2,7 @@ #include // std::uint64_t #include // std::numeric_limits -#include // std::string +#include // std::string_view #include // std::unordered_map namespace { @@ -136,8 +136,8 @@ auto keyword_compare(const sourcemeta::core::JSON::String &left, namespace sourcemeta::core { auto format(JSON &schema, const SchemaWalker &walker, - const SchemaResolver &resolver, - const std::optional &default_dialect) -> void { + const SchemaResolver &resolver, std::string_view default_dialect) + -> void { assert(is_schema(schema)); SchemaFrame frame{SchemaFrame::Mode::Locations}; frame.analyse(schema, walker, resolver, default_dialect); diff --git a/vendor/core/src/core/jsonschema/frame.cc b/vendor/core/src/core/jsonschema/frame.cc index 9f9bf66c..bde79f3f 100644 --- a/vendor/core/src/core/jsonschema/frame.cc +++ b/vendor/core/src/core/jsonschema/frame.cc @@ -17,8 +17,8 @@ namespace { auto find_anchors(const sourcemeta::core::JSON &schema, const sourcemeta::core::Vocabularies &vocabularies) - -> std::map { - std::map result; + -> std::vector> { + std::vector> result; // 2020-12 if (schema.is_object() && @@ -27,18 +27,24 @@ auto find_anchors(const sourcemeta::core::JSON &schema, if (schema.defines("$dynamicAnchor")) { const auto &anchor{schema.at("$dynamicAnchor")}; if (anchor.is_string()) { - result.insert({anchor.to_string(), AnchorType::Dynamic}); + result.emplace_back(anchor.to_string(), AnchorType::Dynamic); } } if (schema.defines("$anchor")) { const auto &anchor{schema.at("$anchor")}; if (anchor.is_string()) { - const auto anchor_string{anchor.to_string()}; - const auto success = result.insert({anchor_string, AnchorType::Static}); - assert(success.second || result.contains(anchor_string)); - if (!success.second) { - result[anchor_string] = AnchorType::All; + const std::string_view anchor_view{anchor.to_string()}; + bool found = false; + for (auto &entry : result) { + if (entry.first == anchor_view) { + entry.second = AnchorType::All; + found = true; + break; + } + } + if (!found) { + result.emplace_back(anchor_view, AnchorType::Static); } } } @@ -53,18 +59,24 @@ auto find_anchors(const sourcemeta::core::JSON &schema, assert(anchor.is_boolean()); if (anchor.to_boolean()) { // We store a 2019-09 recursive anchor as an empty anchor - result.insert({"", AnchorType::Dynamic}); + result.emplace_back(std::string_view{}, AnchorType::Dynamic); } } if (schema.defines("$anchor")) { const auto &anchor{schema.at("$anchor")}; if (anchor.is_string()) { - const auto anchor_string{anchor.to_string()}; - const auto success = result.insert({anchor_string, AnchorType::Static}); - assert(success.second || result.contains(anchor_string)); - if (!success.second) { - result[anchor_string] = AnchorType::All; + const std::string_view anchor_view{anchor.to_string()}; + bool found = false; + for (auto &entry : result) { + if (entry.first == anchor_view) { + entry.second = AnchorType::All; + found = true; + break; + } + } + if (!found) { + result.emplace_back(anchor_view, AnchorType::Static); } } } @@ -79,14 +91,11 @@ auto find_anchors(const sourcemeta::core::JSON &schema, sourcemeta::core::Vocabularies::Known::JSON_Schema_Draft_6))) { if (schema.defines("$id")) { assert(schema.at("$id").is_string()); - const sourcemeta::core::URI identifier(schema.at("$id").to_string()); - if (identifier.is_fragment_only()) { - result.insert( - {sourcemeta::core::JSON::String{ - // Check for optional is happening inside is_fragment_only() - // NOLINTNEXTLINE(bugprone-unchecked-optional-access) - identifier.fragment().value()}, - AnchorType::Static}); + const auto &id_string{schema.at("$id").to_string()}; + if (id_string.starts_with('#')) { + // The original string is "#fragment", skip the '#' + result.emplace_back(std::string_view{id_string}.substr(1), + AnchorType::Static); } } } @@ -98,14 +107,11 @@ auto find_anchors(const sourcemeta::core::JSON &schema, sourcemeta::core::Vocabularies::Known::JSON_Schema_Draft_4)) { if (schema.defines("id")) { assert(schema.at("id").is_string()); - const sourcemeta::core::URI identifier(schema.at("id").to_string()); - if (identifier.is_fragment_only()) { - result.insert( - {sourcemeta::core::JSON::String{ - // Check for optional is happening inside is_fragment_only() - // NOLINTNEXTLINE(bugprone-unchecked-optional-access) - identifier.fragment().value()}, - AnchorType::Static}); + const auto &id_string{schema.at("id").to_string()}; + if (id_string.starts_with('#')) { + // The original string is "#fragment", skip the '#' + result.emplace_back(std::string_view{id_string}.substr(1), + AnchorType::Static); } } } @@ -113,19 +119,19 @@ auto find_anchors(const sourcemeta::core::JSON &schema, return result; } -auto find_nearest_bases( - const std::unordered_map> - &bases, - const sourcemeta::core::Pointer &pointer, - const std::optional &default_base) - -> std::pair, - sourcemeta::core::Pointer> { +template +auto find_nearest_bases_ref( + const std::unordered_map> &bases, + const sourcemeta::core::WeakPointer &pointer) + -> std::optional< + std::pair>, + sourcemeta::core::WeakPointer>> { auto current_pointer{pointer}; while (true) { const auto match{bases.find(current_pointer)}; if (match != bases.cend()) { - return {match->second, current_pointer}; + return std::make_pair(std::cref(match->second), current_pointer); } if (current_pointer.empty()) { @@ -135,22 +141,36 @@ auto find_nearest_bases( current_pointer = current_pointer.initial(); } + return std::nullopt; +} + +template +auto find_nearest_bases( + const std::unordered_map> &bases, + const sourcemeta::core::WeakPointer &pointer, + const std::optional &default_base) + -> std::pair, sourcemeta::core::WeakPointer> { + const auto result{find_nearest_bases_ref(bases, pointer)}; + if (result.has_value()) { + return {result->first.get(), result->second}; + } + if (default_base.has_value()) { - return {{default_base.value()}, sourcemeta::core::empty_pointer}; + return {{StringType{default_base.value()}}, + sourcemeta::core::empty_weak_pointer}; } - return {{}, sourcemeta::core::empty_pointer}; + return {{}, sourcemeta::core::empty_weak_pointer}; } auto find_every_base( - const std::unordered_map> &bases, - const sourcemeta::core::Pointer &pointer) - -> std::vector< - std::pair> { - std::vector< - std::pair> + const sourcemeta::core::WeakPointer &pointer) + -> std::vector> { + std::vector> result; auto current_pointer{pointer}; @@ -158,7 +178,7 @@ auto find_every_base( const auto match{bases.find(current_pointer)}; if (match != bases.cend()) { for (const auto &base : match->second) { - result.emplace_back(base, current_pointer); + result.emplace_back(std::string_view{base}, current_pointer); } } @@ -170,46 +190,77 @@ auto find_every_base( } if (result.empty() || - result.back().second != sourcemeta::core::empty_pointer) { - result.emplace_back("", sourcemeta::core::empty_pointer); + result.back().second != sourcemeta::core::empty_weak_pointer) { + result.emplace_back(std::string_view{}, + sourcemeta::core::empty_weak_pointer); } return result; } // TODO: Why do we have this function both here and on `walker.cc`? -auto ref_overrides_adjacent_keywords(std::string_view base_dialect) -> bool { +auto ref_overrides_adjacent_keywords( + const sourcemeta::core::SchemaBaseDialect base_dialect) -> bool { + using sourcemeta::core::SchemaBaseDialect; // In older drafts, the presence of `$ref` would override any sibling // keywords // See // https://json-schema.org/draft-07/draft-handrews-json-schema-01#rfc.section.8.3 - return base_dialect == "http://json-schema.org/draft-07/schema#" || - base_dialect == "http://json-schema.org/draft-07/hyper-schema#" || - base_dialect == "http://json-schema.org/draft-06/schema#" || - base_dialect == "http://json-schema.org/draft-06/hyper-schema#" || - base_dialect == "http://json-schema.org/draft-04/schema#" || - base_dialect == "http://json-schema.org/draft-04/hyper-schema#" || - base_dialect == "http://json-schema.org/draft-03/schema#" || - base_dialect == "http://json-schema.org/draft-03/hyper-schema#"; + switch (base_dialect) { + case SchemaBaseDialect::JSON_Schema_Draft_7: + case SchemaBaseDialect::JSON_Schema_Draft_7_Hyper: + case SchemaBaseDialect::JSON_Schema_Draft_6: + case SchemaBaseDialect::JSON_Schema_Draft_6_Hyper: + case SchemaBaseDialect::JSON_Schema_Draft_4: + case SchemaBaseDialect::JSON_Schema_Draft_4_Hyper: + case SchemaBaseDialect::JSON_Schema_Draft_3: + case SchemaBaseDialect::JSON_Schema_Draft_3_Hyper: + return true; + default: + return false; + } } -auto supports_id_anchors(std::string_view base_dialect) -> bool { - return base_dialect == "http://json-schema.org/draft-07/schema#" || - base_dialect == "http://json-schema.org/draft-07/hyper-schema#" || - base_dialect == "http://json-schema.org/draft-06/schema#" || - base_dialect == "http://json-schema.org/draft-06/hyper-schema#" || - base_dialect == "http://json-schema.org/draft-04/schema#" || - base_dialect == "http://json-schema.org/draft-04/hyper-schema#"; +auto supports_id_anchors(const sourcemeta::core::SchemaBaseDialect base_dialect) + -> bool { + using sourcemeta::core::SchemaBaseDialect; + switch (base_dialect) { + case SchemaBaseDialect::JSON_Schema_Draft_7: + case SchemaBaseDialect::JSON_Schema_Draft_7_Hyper: + case SchemaBaseDialect::JSON_Schema_Draft_6: + case SchemaBaseDialect::JSON_Schema_Draft_6_Hyper: + case SchemaBaseDialect::JSON_Schema_Draft_4: + case SchemaBaseDialect::JSON_Schema_Draft_4_Hyper: + return true; + default: + return false; + } } -auto fragment_string(const sourcemeta::core::URI &uri) - -> std::optional { - const auto fragment{uri.fragment()}; - if (fragment.has_value()) { - return sourcemeta::core::JSON::String{fragment.value()}; +auto set_base_and_fragment( + sourcemeta::core::SchemaFrame::ReferencesEntry &entry) -> void { + if (entry.destination.empty()) { + entry.base = std::string_view{}; + entry.fragment = std::nullopt; + return; } - return std::nullopt; + const auto hash_position{entry.destination.find('#')}; + if (hash_position != std::string::npos) { + // Has a fragment + if (hash_position == 0) { + // Starts with #, so no base + entry.base = std::string_view{}; + } else { + entry.base = std::string_view{entry.destination}.substr(0, hash_position); + } + entry.fragment = + std::string_view{entry.destination}.substr(hash_position + 1); + } else { + // No fragment + entry.base = std::string_view{entry.destination}; + entry.fragment = std::nullopt; + } } [[noreturn]] @@ -219,40 +270,33 @@ auto throw_already_exists(const sourcemeta::core::JSON::String &uri) -> void { } auto store(sourcemeta::core::SchemaFrame::Locations &frame, - sourcemeta::core::SchemaFrame::Instances &instances, const sourcemeta::core::SchemaReferenceType type, const sourcemeta::core::SchemaFrame::LocationType entry_type, - const sourcemeta::core::JSON::String &uri, - const std::optional &root_id, - const sourcemeta::core::JSON::String &base_id, - const sourcemeta::core::Pointer &pointer_from_root, - const sourcemeta::core::Pointer &pointer_from_base, - const sourcemeta::core::JSON::String &dialect, - const sourcemeta::core::JSON::String &base_dialect, - std::vector instance_locations, - const std::optional &parent, + sourcemeta::core::JSON::String uri, const std::string_view base, + const sourcemeta::core::WeakPointer &pointer_from_root, + const std::size_t relative_pointer_offset, + const std::string_view dialect, + const sourcemeta::core::SchemaBaseDialect base_dialect, + const std::optional &parent, const bool ignore_if_present = false, const bool already_canonical = false) -> void { - const auto canonical{ - already_canonical ? uri : sourcemeta::core::URI::canonicalize(uri)}; - const auto inserted{frame - .insert({{type, canonical}, - {.parent = parent, - .type = entry_type, - .root = root_id, - .base = base_id, - .pointer = pointer_from_root, - .relative_pointer = pointer_from_base, - .dialect = dialect, - .base_dialect = base_dialect}}) - .second}; + auto canonical{already_canonical ? std::move(uri) + : sourcemeta::core::URI::canonicalize(uri)}; + auto [iterator, inserted] = + frame.insert({{type, std::move(canonical)}, + {.parent = parent, + .type = entry_type, + .base = base, + .pointer = to_pointer(pointer_from_root), + .relative_pointer = relative_pointer_offset, + .dialect = dialect, + .base_dialect = base_dialect}}); if (!ignore_if_present && !inserted) { - throw_already_exists(canonical); + throw_already_exists(iterator->first.second); } - if (!instance_locations.empty()) { - instances.insert_or_assign(pointer_from_root, - std::move(instance_locations)); + if (inserted && iterator->first.second == base) { + iterator->second.base = iterator->first.second; } } @@ -263,114 +307,13 @@ struct InternalEntry { std::optional id; }; -auto traverse_origin_instance_locations( - const sourcemeta::core::SchemaFrame &frame, - const sourcemeta::core::SchemaFrame::Instances &instances, - const sourcemeta::core::Pointer ¤t, - const std::optional &accumulator, - sourcemeta::core::SchemaFrame::Instances::mapped_type &destination, - std::unordered_set< - const sourcemeta::core::SchemaFrame::References::value_type *> &visited) - -> void { - if (accumulator.has_value() && - std::ranges::find(destination, accumulator.value()) == - destination.cend()) { - destination.push_back(accumulator.value()); - } - - for (const auto &reference : frame.references_to(current)) { - if (visited.contains(&reference.get())) { - continue; - } - - visited.insert(&reference.get()); - - const auto subschema_pointer{reference.get().first.second.initial()}; - const auto match{instances.find(subschema_pointer)}; - if (match != instances.cend()) { - for (const auto &instance_location : match->second) { - traverse_origin_instance_locations(frame, instances, subschema_pointer, - instance_location, destination, - visited); - } - } else { - // Even if the parent doesn't have instance locations yet, - // recurse to find the origin of the reference chain - traverse_origin_instance_locations(frame, instances, subschema_pointer, - std::nullopt, destination, visited); - } - } -} - // Check misunderstood struct to be a function // NOLINTNEXTLINE(bugprone-exception-escape) struct CacheSubschema { - sourcemeta::core::PointerTemplate instance_location{}; - sourcemeta::core::PointerTemplate relative_instance_location{}; bool orphan{}; - std::optional parent{}; + std::optional parent{}; }; -auto is_definition_entry(const sourcemeta::core::Pointer &pointer) -> bool { - if (pointer.size() < 2) { - return false; - } - - const auto &container{pointer.at(pointer.size() - 2)}; - return container.is_property() && (container.to_property() == "$defs" || - container.to_property() == "definitions"); -} - -auto repopulate_instance_locations( - const sourcemeta::core::SchemaFrame &frame, - const sourcemeta::core::SchemaFrame::Instances &instances, - const std::unordered_map &cache, - const sourcemeta::core::Pointer &pointer, const CacheSubschema &cache_entry, - sourcemeta::core::SchemaFrame::Instances::mapped_type &destination, - const std::optional &accumulator) - -> void { - // Definition entries should not inherit instance locations from their parent - // container. They only get instance locations if something references them. - // However, children of definitions should still inherit from their definition - // parent - if (cache_entry.orphan && is_definition_entry(pointer)) { - return; - } - - if (cache_entry.parent.has_value() && - // Don't consider bases from the root subschema, as if that - // subschema has any instance location other than "", then it - // indicates a recursive reference - !cache_entry.parent.value().empty()) { - const auto match{instances.find(cache_entry.parent.value())}; - if (match == instances.cend()) { - return; - } - - for (const auto &parent_instance_location : match->second) { - auto new_accumulator = cache_entry.relative_instance_location; - if (accumulator.has_value()) { - for (const auto &token : accumulator.value()) { - new_accumulator.emplace_back(token); - } - } - - auto result = parent_instance_location; - for (const auto &token : new_accumulator) { - result.emplace_back(token); - } - - if (std::ranges::find(destination, result) == destination.cend()) { - destination.push_back(result); - } - - repopulate_instance_locations( - frame, instances, cache, cache_entry.parent.value(), - cache.at(cache_entry.parent.value()), destination, new_accumulator); - } - } -} - } // namespace namespace sourcemeta::core { @@ -408,10 +351,9 @@ auto SchemaFrame::to_json( sourcemeta::core::to_json(location.second.parent)); entry.assign_assume_new("type", sourcemeta::core::to_json(location.second.type)); - entry.assign_assume_new("root", - sourcemeta::core::to_json(location.second.root)); - entry.assign_assume_new("base", - sourcemeta::core::to_json(location.second.base)); + entry.assign_assume_new("root", this->root_.empty() ? JSON{nullptr} + : JSON{this->root_}); + entry.assign_assume_new("base", JSON{JSON::String{location.second.base}}); entry.assign_assume_new("pointer", sourcemeta::core::to_json(location.second.pointer)); if (tracker.has_value()) { @@ -424,11 +366,13 @@ auto SchemaFrame::to_json( entry.assign_assume_new( "relativePointer", - sourcemeta::core::to_json(location.second.relative_pointer)); + sourcemeta::core::to_json( + this->relative_instance_location(location.second))); entry.assign_assume_new("dialect", - sourcemeta::core::to_json(location.second.dialect)); + JSON{JSON::String{location.second.dialect}}); entry.assign_assume_new( - "baseDialect", sourcemeta::core::to_json(location.second.base_dialect)); + "baseDialect", + JSON{JSON::String{to_string(location.second.base_dialect)}}); switch (location.first.first) { case SchemaReferenceType::Static: @@ -464,45 +408,34 @@ auto SchemaFrame::to_json( entry.assign_assume_new( "destination", sourcemeta::core::to_json(reference.second.destination)); - entry.assign_assume_new("base", - sourcemeta::core::to_json(reference.second.base)); entry.assign_assume_new( - "fragment", sourcemeta::core::to_json(reference.second.fragment)); + "base", + !reference.second.base.empty() + ? sourcemeta::core::to_json(JSON::String{reference.second.base}) + : sourcemeta::core::to_json(nullptr)); + entry.assign_assume_new( + "fragment", reference.second.fragment.has_value() + ? sourcemeta::core::to_json( + JSON::String{reference.second.fragment.value()}) + : sourcemeta::core::to_json(nullptr)); root.at("references").push_back(std::move(entry)); } - root.assign_assume_new("instances", JSON::make_object()); - for (const auto &instance : this->instances_) { - if (instance.second.empty()) { - continue; - } - - auto entry{JSON::make_array()}; - for (const auto &pointer : instance.second) { - // TODO: Overload .to_string() for PointerTemplate - std::ostringstream result; - sourcemeta::core::stringify(pointer, result); - entry.push_back(sourcemeta::core::to_json(result.str())); - } - - root.at("instances") - .assign_assume_new(to_string(instance.first), std::move(entry)); - } - return root; } auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, const SchemaResolver &resolver, - const std::optional &default_dialect, - const std::optional &default_id, + std::string_view default_dialect, + std::string_view default_id, const SchemaFrame::Paths &paths) -> void { + this->reset(); + assert(std::unordered_set(paths.cbegin(), paths.cend()).size() == + paths.size()); std::vector subschema_entries; - std::unordered_map subschemas; - std::unordered_map> - base_uris; - std::unordered_map> - base_dialects; + std::unordered_map subschemas; + std::unordered_map> base_uris; + std::unordered_map> base_dialects; for (const auto &path : paths) { // Passing paths that overlap is undefined behavior. No path should @@ -513,47 +446,41 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, const auto &schema{get(root, path)}; - const std::optional root_base_dialect{ + const auto root_base_dialect{ sourcemeta::core::base_dialect(schema, resolver, default_dialect)}; if (!root_base_dialect.has_value()) { throw SchemaUnknownBaseDialectError(); } - std::optional root_id{ - // If we are dealing with nested schemas, then by definition - // the root has no identifier - !path.empty() ? std::nullopt - : sourcemeta::core::identify( - schema, root_base_dialect.value(), default_id)}; - if (root_id.has_value()) { - root_id = URI::canonicalize(root_id.value()); + // If we are dealing with nested schemas, then by definition + // the root has no identifier + std::optional root_id{std::nullopt}; + if (path.empty()) { + const auto maybe_id{sourcemeta::core::identify( + schema, root_base_dialect.value(), default_id)}; + if (!maybe_id.empty()) { + root_id = URI::canonicalize(maybe_id); + this->root_ = root_id.value(); + } } - const std::optional root_dialect{ + const std::string_view root_dialect{ sourcemeta::core::dialect(schema, default_dialect)}; - assert(root_dialect.has_value()); + assert(!root_dialect.empty()); // If the top-level schema has a specific identifier but the user // passes a different default identifier, then the schema is by // definition known by two names, and we should handle that accordingly const bool has_explicit_different_id{root_id.has_value() && - default_id.has_value() && - root_id.value() != default_id.value()}; + !default_id.empty() && + root_id.value() != default_id}; if (has_explicit_different_id) { - const auto default_id_canonical{URI::canonicalize(default_id.value())}; - if (this->mode_ == SchemaFrame::Mode::Instances) { - store(this->locations_, this->instances_, SchemaReferenceType::Static, - SchemaFrame::LocationType::Resource, default_id_canonical, - root_id, root_id.value(), path, sourcemeta::core::empty_pointer, - root_dialect.value(), root_base_dialect.value(), {{}}, - std::nullopt); - } else { - store(this->locations_, this->instances_, SchemaReferenceType::Static, - SchemaFrame::LocationType::Resource, default_id_canonical, - root_id, root_id.value(), path, sourcemeta::core::empty_pointer, - root_dialect.value(), root_base_dialect.value(), {}, - std::nullopt); - } + const auto default_id_canonical{URI::canonicalize(default_id)}; + // Use this->root_ as base - it contains root_id.value() and persists + store(this->locations_, SchemaReferenceType::Static, + SchemaFrame::LocationType::Resource, default_id_canonical, + this->root_, path, path.size(), root_dialect, + root_base_dialect.value(), std::nullopt); base_uris.insert({path, {root_id.value(), default_id_canonical}}); } @@ -569,25 +496,29 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, } // Dialect - assert(entry.dialect.has_value()); - base_dialects.insert({entry.pointer, {entry.dialect.value()}}); + assert(!entry.dialect.empty()); + base_dialects.insert({entry.pointer, {entry.dialect}}); // Base dialect assert(entry.base_dialect.has_value()); // Schema identifier - std::optional id{sourcemeta::core::identify( - entry.subschema.get(), entry.base_dialect.value(), - entry.pointer.empty() ? root_id : std::nullopt)}; + // We need to store the default_id in a local variable to ensure + // it survives the identify() call, as identify() returns a string_view + const std::string default_id_for_entry{ + entry.pointer.empty() && root_id.has_value() ? root_id.value() + : std::string{}}; + const auto maybe_id{sourcemeta::core::identify(entry.subschema.get(), + entry.base_dialect.value(), + default_id_for_entry)}; + std::optional id{ + !maybe_id.empty() + ? std::make_optional(std::string{maybe_id}) + : std::nullopt}; // Store information - subschemas.emplace( - entry.pointer, - CacheSubschema{.instance_location = entry.instance_location, - .relative_instance_location = - entry.relative_instance_location, - .orphan = entry.orphan, - .parent = entry.parent}); + subschemas.emplace(entry.pointer, CacheSubschema{.orphan = entry.orphan, + .parent = entry.parent}); subschema_entries.emplace_back( InternalEntry{.common = std::move(entry), .id = std::move(id)}); current_subschema_entries.emplace_back(subschema_entries.size() - 1); @@ -595,24 +526,30 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, for (const auto &entry_index : current_subschema_entries) { const auto &entry{subschema_entries[entry_index]}; + const auto &common_pointer_weak{entry.common.pointer}; + const auto common_pointer{to_pointer(common_pointer_weak)}; + const auto &common_parent{entry.common.parent}; if (entry.id.has_value()) { + assert(entry.common.base_dialect.has_value()); const bool ref_overrides = ref_overrides_adjacent_keywords(entry.common.base_dialect.value()); const bool is_pre_2019_09_location_independent_identifier = supports_id_anchors(entry.common.base_dialect.value()) && - sourcemeta::core::URI{entry.id.value()}.is_fragment_only(); + entry.id.value().starts_with('#'); if ((!entry.common.subschema.get().defines("$ref") || !ref_overrides) && // If we are dealing with a pre-2019-09 location independent // identifier, we ignore it as a traditional identifier and take // care of it as an anchor !is_pre_2019_09_location_independent_identifier) { - const auto bases{ - find_nearest_bases(base_uris, entry.common.pointer, entry.id)}; + const auto bases{find_nearest_bases( + base_uris, common_pointer_weak, + entry.id ? std::optional{*entry.id} + : std::nullopt)}; for (const auto &base_string : bases.first) { // Otherwise we end up pushing the top-level resource twice if (entry_index == 0 && has_explicit_different_id && - default_id.has_value() && default_id.value() == base_string) { + !default_id.empty() && default_id == base_string) { continue; } @@ -636,7 +573,7 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, const auto maybe_match{ this->locations_.find({SchemaReferenceType::Static, new_id})}; if (maybe_match != this->locations_.cend() && - maybe_match->second.pointer != entry.common.pointer) { + maybe_match->second.pointer != common_pointer) { throw_already_exists(new_id); } @@ -644,29 +581,14 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, maybe_match == this->locations_.cend()) { assert(entry.common.base_dialect.has_value()); - if (!(entry.common.orphan) && - this->mode_ == SchemaFrame::Mode::Instances) { - store(this->locations_, this->instances_, - SchemaReferenceType::Static, - SchemaFrame::LocationType::Resource, new_id, root_id, - new_id, entry.common.pointer, - sourcemeta::core::empty_pointer, - entry.common.dialect.value(), - entry.common.base_dialect.value(), - {entry.common.instance_location}, entry.common.parent); - } else { - store(this->locations_, this->instances_, - SchemaReferenceType::Static, - SchemaFrame::LocationType::Resource, new_id, root_id, - new_id, entry.common.pointer, - sourcemeta::core::empty_pointer, - entry.common.dialect.value(), - entry.common.base_dialect.value(), {}, - entry.common.parent); - } + store(this->locations_, SchemaReferenceType::Static, + SchemaFrame::LocationType::Resource, new_id, new_id, + common_pointer_weak, common_pointer_weak.size(), + entry.common.dialect, entry.common.base_dialect.value(), + common_parent); } - auto base_uri_match{base_uris.find(entry.common.pointer)}; + auto base_uri_match{base_uris.find(common_pointer_weak)}; if (base_uri_match != base_uris.cend()) { if (std::find(base_uri_match->second.cbegin(), base_uri_match->second.cend(), @@ -674,7 +596,7 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, base_uri_match->second.push_back(new_id); } } else { - base_uris.insert({entry.common.pointer, {new_id}}); + base_uris.insert({common_pointer_weak, {new_id}}); } } } @@ -684,74 +606,64 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, // Handle metaschema references const auto maybe_metaschema{ sourcemeta::core::dialect(entry.common.subschema.get())}; - if (maybe_metaschema.has_value()) { - sourcemeta::core::URI metaschema{maybe_metaschema.value()}; - const auto nearest_bases{ - find_nearest_bases(base_uris, entry.common.pointer, entry.id)}; + if (!maybe_metaschema.empty()) { + sourcemeta::core::URI metaschema{maybe_metaschema}; + const auto nearest_bases{find_nearest_bases( + base_uris, common_pointer_weak, + entry.id ? std::optional{*entry.id} + : std::nullopt)}; if (!nearest_bases.first.empty()) { metaschema.resolve_from(nearest_bases.first.front()); } metaschema.canonicalize(); - const JSON::String destination{metaschema.recompose()}; assert(entry.common.subschema.get().defines("$schema")); - this->references_.insert_or_assign( - {SchemaReferenceType::Static, - entry.common.pointer.concat({"$schema"})}, - SchemaFrame::ReferencesEntry{ - .original = maybe_metaschema.value(), - .destination = destination, - .base = metaschema.recompose_without_fragment(), - .fragment = fragment_string(metaschema)}); + const auto [it, inserted] = this->references_.insert_or_assign( + {SchemaReferenceType::Static, common_pointer.concat({"$schema"})}, + SchemaFrame::ReferencesEntry{.original = maybe_metaschema, + .destination = + metaschema.recompose(), + .base = std::string_view{}, + .fragment = std::nullopt}); + set_base_and_fragment(it->second); } } // Handle schema anchors for (const auto &[name, type] : find_anchors(entry.common.subschema.get(), entry.common.vocabularies)) { - const auto bases{ - find_nearest_bases(base_uris, entry.common.pointer, entry.id)}; - - std::vector instance_locations; - if (!entry.common.orphan && - this->mode_ == SchemaFrame::Mode::Instances) { - instance_locations.push_back(entry.common.instance_location); - } + const auto bases{find_nearest_bases( + base_uris, common_pointer_weak, + entry.id ? std::optional{*entry.id} + : std::nullopt)}; if (bases.first.empty()) { const auto anchor_uri{sourcemeta::core::URI::from_fragment(name)}; const auto relative_anchor_uri{anchor_uri.recompose()}; if (type == AnchorType::Static || type == AnchorType::All) { - store( - this->locations_, this->instances_, SchemaReferenceType::Static, - SchemaFrame::LocationType::Anchor, relative_anchor_uri, root_id, - "", entry.common.pointer, - entry.common.pointer.resolve_from(bases.second), - entry.common.dialect.value(), entry.common.base_dialect.value(), - instance_locations, entry.common.parent); + store(this->locations_, SchemaReferenceType::Static, + SchemaFrame::LocationType::Anchor, relative_anchor_uri, "", + common_pointer_weak, bases.second.size(), + entry.common.dialect, entry.common.base_dialect.value(), + common_parent); } if (type == AnchorType::Dynamic || type == AnchorType::All) { - store( - this->locations_, this->instances_, - SchemaReferenceType::Dynamic, SchemaFrame::LocationType::Anchor, - relative_anchor_uri, root_id, "", entry.common.pointer, - entry.common.pointer.resolve_from(bases.second), - entry.common.dialect.value(), entry.common.base_dialect.value(), - instance_locations, entry.common.parent); + store(this->locations_, SchemaReferenceType::Dynamic, + SchemaFrame::LocationType::Anchor, relative_anchor_uri, "", + common_pointer_weak, bases.second.size(), + entry.common.dialect, entry.common.base_dialect.value(), + common_parent); // Register a dynamic anchor as a static anchor if possible too if (entry.common.vocabularies.contains( Vocabularies::Known::JSON_Schema_2020_12_Core)) { - store(this->locations_, this->instances_, - SchemaReferenceType::Static, - SchemaFrame::LocationType::Anchor, relative_anchor_uri, - root_id, "", entry.common.pointer, - entry.common.pointer.resolve_from(bases.second), - entry.common.dialect.value(), - entry.common.base_dialect.value(), instance_locations, - entry.common.parent, true); + store(this->locations_, SchemaReferenceType::Static, + SchemaFrame::LocationType::Anchor, relative_anchor_uri, "", + common_pointer_weak, bases.second.size(), + entry.common.dialect, entry.common.base_dialect.value(), + common_parent, true); } } } else { @@ -767,38 +679,40 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, continue; } + const auto base_entry{this->locations_.find( + {SchemaReferenceType::Static, base_string})}; + + const std::string_view base_view{ + base_entry != this->locations_.cend() + ? std::string_view{base_entry->first.second} + : std::string_view{base_string}}; + if (type == AnchorType::Static || type == AnchorType::All) { - store(this->locations_, this->instances_, + store(this->locations_, sourcemeta::core::SchemaReferenceType::Static, - SchemaFrame::LocationType::Anchor, anchor_uri, root_id, - base_string, entry.common.pointer, - entry.common.pointer.resolve_from(bases.second), - entry.common.dialect.value(), - entry.common.base_dialect.value(), instance_locations, - entry.common.parent); + SchemaFrame::LocationType::Anchor, anchor_uri, base_view, + common_pointer_weak, bases.second.size(), + entry.common.dialect, entry.common.base_dialect.value(), + common_parent); } if (type == AnchorType::Dynamic || type == AnchorType::All) { - store(this->locations_, this->instances_, + store(this->locations_, sourcemeta::core::SchemaReferenceType::Dynamic, - SchemaFrame::LocationType::Anchor, anchor_uri, root_id, - base_string, entry.common.pointer, - entry.common.pointer.resolve_from(bases.second), - entry.common.dialect.value(), - entry.common.base_dialect.value(), instance_locations, - entry.common.parent); + SchemaFrame::LocationType::Anchor, anchor_uri, base_view, + common_pointer_weak, bases.second.size(), + entry.common.dialect, entry.common.base_dialect.value(), + common_parent); // Register a dynamic anchor as a static anchor if possible too if (entry.common.vocabularies.contains( Vocabularies::Known::JSON_Schema_2020_12_Core)) { - store(this->locations_, this->instances_, + store(this->locations_, sourcemeta::core::SchemaReferenceType::Static, - SchemaFrame::LocationType::Anchor, anchor_uri, root_id, - base_string, entry.common.pointer, - entry.common.pointer.resolve_from(bases.second), - entry.common.dialect.value(), - entry.common.base_dialect.value(), instance_locations, - entry.common.parent, true); + SchemaFrame::LocationType::Anchor, anchor_uri, base_view, + common_pointer_weak, bases.second.size(), + entry.common.dialect, entry.common.base_dialect.value(), + common_parent, true); } } @@ -810,28 +724,36 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, // It is important for the loop that follows to assume a specific ordering // where smaller pointers (by number of tokens) are scanned first. - // TODO: Perform the pointer walking using weak pointers only - const auto pointer_walker{sourcemeta::core::PointerWalker{schema}}; - std::vector pointers{pointer_walker.cbegin(), - pointer_walker.cend()}; + std::vector pointers; + for (const auto &weak_pointer : sourcemeta::core::PointerWalker{schema}) { + pointers.push_back(weak_pointer); + } + std::ranges::sort(pointers, std::less<>()); // Pre-compute every possible pointer to the schema for (const auto &relative_pointer : pointers) { - const auto pointer{path.concat(relative_pointer)}; + const auto pointer_weak{path.concat(relative_pointer)}; - const auto dialects{ - find_nearest_bases(base_dialects, pointer, root_dialect)}; - assert(dialects.first.size() == 1); + const auto dialect_match{ + find_nearest_bases_ref(base_dialects, pointer_weak)}; + const auto &dialect_for_pointer{dialect_match.has_value() + ? dialect_match->first.get().front() + : root_dialect}; - auto every_base_result = find_every_base(base_uris, pointer); + auto every_base_result = find_every_base(base_uris, pointer_weak); + WeakPointer cached_base{}; for (const auto &base : every_base_result) { + const auto resolved{cached_base == base.second + ? pointer_weak.resolve_from(cached_base) + : pointer_weak.resolve_from(base.second)}; + cached_base = base.second; + auto relative_pointer_uri{ base.first.empty() - ? sourcemeta::core::to_uri(pointer.resolve_from(base.second)) - : sourcemeta::core::to_uri(pointer.resolve_from(base.second)) - .resolve_from({base.first})}; + ? sourcemeta::core::to_uri(resolved) + : sourcemeta::core::to_uri(resolved, base.first)}; relative_pointer_uri.canonicalize(); auto result{relative_pointer_uri.recompose()}; @@ -841,49 +763,39 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, if (!contains) { const auto nearest_bases{ - find_nearest_bases(base_uris, pointer, base.first)}; + find_nearest_bases(base_uris, pointer_weak, + std::optional{base.first})}; assert(!nearest_bases.first.empty()); const auto ¤t_base{nearest_bases.first.front()}; - const auto maybe_base_entry{this->locations_.find( + const auto base_entry{this->locations_.find( {SchemaReferenceType::Static, current_base})}; - const auto current_base_dialect{ - maybe_base_entry == this->locations_.cend() - ? root_base_dialect.value() - : maybe_base_entry->second.base_dialect}; + const std::string_view base_view{ + base_entry != this->locations_.cend() + ? std::string_view{base_entry->first.second} + : std::string_view{current_base}}; - const auto subschema{subschemas.find(pointer)}; + const sourcemeta::core::SchemaBaseDialect current_base_dialect{ + base_entry != this->locations_.cend() + ? base_entry->second.base_dialect + : root_base_dialect.value()}; + const auto subschema{subschemas.find(pointer_weak)}; if (subschema != subschemas.cend()) { - // Handle orphan schemas - if (!(subschema->second.orphan) && - this->mode_ == SchemaFrame::Mode::Instances) { - store(this->locations_, this->instances_, - SchemaReferenceType::Static, - SchemaFrame::LocationType::Subschema, result, root_id, - current_base, pointer, - pointer.resolve_from(nearest_bases.second), - dialects.first.front(), current_base_dialect, - {subschema->second.instance_location}, - subschema->second.parent, false, true); - } else { - store(this->locations_, this->instances_, - SchemaReferenceType::Static, - SchemaFrame::LocationType::Subschema, result, root_id, - current_base, pointer, - pointer.resolve_from(nearest_bases.second), - dialects.first.front(), current_base_dialect, {}, - subschema->second.parent, false, true); - } + store(this->locations_, SchemaReferenceType::Static, + SchemaFrame::LocationType::Subschema, std::move(result), + base_view, pointer_weak, nearest_bases.second.size(), + dialect_for_pointer, current_base_dialect, + subschema->second.parent, false, true); } else { - store(this->locations_, this->instances_, - SchemaReferenceType::Static, - SchemaFrame::LocationType::Pointer, result, root_id, - current_base, pointer, - pointer.resolve_from(nearest_bases.second), - dialects.first.front(), current_base_dialect, {}, - dialects.second, false, true); + store(this->locations_, SchemaReferenceType::Static, + SchemaFrame::LocationType::Pointer, std::move(result), + base_view, pointer_weak, nearest_bases.second.size(), + dialect_for_pointer, current_base_dialect, + dialect_match.has_value() ? dialect_match->second + : empty_weak_pointer, + false, true); } } } @@ -896,9 +808,13 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, // Resolve references after all framing was performed for (const auto &entry : subschema_entries) { + const auto &common_pointer_weak{entry.common.pointer}; + const auto common_pointer{to_pointer(common_pointer_weak)}; if (entry.common.subschema.get().is_object()) { - const auto nearest_bases{ - find_nearest_bases(base_uris, entry.common.pointer, entry.id)}; + const auto nearest_bases{find_nearest_bases( + base_uris, common_pointer_weak, + entry.id ? std::optional{*entry.id} + : std::nullopt)}; if (entry.common.subschema.get().defines("$ref")) { if (entry.common.subschema.get().at("$ref").is_string()) { const auto &original{ @@ -909,14 +825,13 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, } ref.canonicalize(); - this->references_.insert_or_assign( - {SchemaReferenceType::Static, - entry.common.pointer.concat({"$ref"})}, + const auto [it, inserted] = this->references_.insert_or_assign( + {SchemaReferenceType::Static, common_pointer.concat({"$ref"})}, SchemaFrame::ReferencesEntry{.original = original, .destination = ref.recompose(), - .base = - ref.recompose_without_fragment(), - .fragment = fragment_string(ref)}); + .base = std::string_view{}, + .fragment = std::nullopt}); + set_base_and_fragment(it->second); } } @@ -933,8 +848,7 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, // https://json-schema.org/draft/2019-09/draft-handrews-json-schema-02#rfc.section.8.2.4.2.1 if (ref != "#") { throw sourcemeta::core::SchemaReferenceError( - entry.id.value_or(""), - entry.common.pointer.concat({"$recursiveRef"}), + entry.id.value_or(""), common_pointer.concat({"$recursiveRef"}), "Invalid recursive reference"); } @@ -946,13 +860,13 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, ? SchemaReferenceType::Static : SchemaReferenceType::Dynamic}; const sourcemeta::core::URI anchor_uri{anchor_uri_string}; - this->references_.insert_or_assign( - {reference_type, entry.common.pointer.concat({"$recursiveRef"})}, - SchemaFrame::ReferencesEntry{ - .original = ref, - .destination = anchor_uri.recompose(), - .base = anchor_uri.recompose_without_fragment(), - .fragment = fragment_string(anchor_uri)}); + const auto [it, inserted] = this->references_.insert_or_assign( + {reference_type, common_pointer.concat({"$recursiveRef"})}, + SchemaFrame::ReferencesEntry{.original = ref, + .destination = anchor_uri.recompose(), + .base = std::string_view{}, + .fragment = std::nullopt}); + set_base_and_fragment(it->second); } if (entry.common.vocabularies.contains( @@ -982,15 +896,15 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, !has_fragment || (has_fragment && maybe_static_frame != this->locations_.end() && maybe_dynamic_frame == this->locations_.end())}; - this->references_.insert_or_assign( + const auto [it, inserted] = this->references_.insert_or_assign( {behaves_as_static ? SchemaReferenceType::Static : SchemaReferenceType::Dynamic, - entry.common.pointer.concat({"$dynamicRef"})}, + common_pointer.concat({"$dynamicRef"})}, SchemaFrame::ReferencesEntry{.original = original, .destination = std::move(ref_string), - .base = - ref.recompose_without_fragment(), - .fragment = fragment_string(ref)}); + .base = std::string_view{}, + .fragment = std::nullopt}); + set_base_and_fragment(it->second); } } } @@ -999,7 +913,9 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, // A schema is standalone if all references can be resolved within itself if (this->standalone()) { // Find all dynamic anchors - std::map> dynamic_anchors; + // Values are pointers to full URIs in locations_ + std::unordered_map> + dynamic_anchors; for (const auto &entry : this->locations_) { if (entry.first.first != SchemaReferenceType::Dynamic || entry.second.type != SchemaFrame::LocationType::Anchor) { @@ -1007,12 +923,9 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, } const URI anchor_uri{entry.first.second}; + // Copy the fragment to avoid dangling string_view (anchor_uri is local) const JSON::String fragment{anchor_uri.fragment().value_or("")}; - if (!dynamic_anchors.contains(fragment)) { - dynamic_anchors.emplace(fragment, std::vector{}); - } - - dynamic_anchors[fragment].push_back(entry.first.second); + dynamic_anchors[fragment].push_back(&entry.first.second); } // If there is a dynamic reference that only has one possible @@ -1026,7 +939,8 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, continue; } - const auto match{dynamic_anchors.find(reference.second.fragment.value())}; + const auto match{dynamic_anchors.find( + JSON::String{reference.second.fragment.value()})}; assert(match != dynamic_anchors.cend()); // Otherwise we can assume there is only one possible target for the // dynamic reference @@ -1035,14 +949,12 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, } to_delete.push_back(reference.first); - const URI new_destination{match->second.front()}; to_insert.emplace_back( SchemaFrame::References::key_type{SchemaReferenceType::Static, reference.first.second}, SchemaFrame::References::mapped_type{ - match->second.front(), match->second.front(), - new_destination.recompose_without_fragment(), - fragment_string(new_destination)}); + reference.second.original, *match->second.front(), + std::string_view{}, std::nullopt}); } // Because we can't mutate a map as we are traversing it @@ -1052,50 +964,8 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, } for (auto &&entry : to_insert) { - this->references_.emplace(std::move(entry)); - } - } - - if (this->mode_ == sourcemeta::core::SchemaFrame::Mode::Instances) { - // First pass: trace through references to find instance locations. - // This handles definitions that are referenced - for (auto &entry : this->locations_) { - if (entry.second.type == SchemaFrame::LocationType::Pointer) { - continue; - } - - std::unordered_set visited; - traverse_origin_instance_locations( - *this, this->instances_, entry.second.pointer, std::nullopt, - this->instances_[entry.second.pointer], visited); - } - - // Second pass: inherit instance locations from parents (top-down). - // This handles applicator children inheriting from their parent schema - for (auto &entry : this->locations_) { - if (entry.second.type == SchemaFrame::LocationType::Pointer) { - continue; - } - - const auto subschema{subschemas.find(entry.second.pointer)}; - repopulate_instance_locations(*this, this->instances_, subschemas, - subschema->first, subschema->second, - this->instances_[entry.second.pointer], - std::nullopt); - } - - // Third pass: trace references again. Now that inheritance has run, - // schemas from definitions can trace to applicator children that now have - // instance locations from inheritance - for (auto &entry : this->locations_) { - if (entry.second.type == SchemaFrame::LocationType::Pointer) { - continue; - } - - std::unordered_set visited; - traverse_origin_instance_locations( - *this, this->instances_, entry.second.pointer, std::nullopt, - this->instances_[entry.second.pointer], visited); + const auto [it, inserted] = this->references_.emplace(std::move(entry)); + set_base_and_fragment(it->second); } } } @@ -1108,6 +978,17 @@ auto SchemaFrame::references() const noexcept -> const References & { return this->references_; } +auto SchemaFrame::reference(const SchemaReferenceType type, + const Pointer &pointer) const + -> std::optional> { + const auto result{this->references_.find({type, pointer})}; + if (result != this->references_.cend()) { + return result->second; + } + + return std::nullopt; +} + auto SchemaFrame::standalone() const -> bool { return std::ranges::all_of(this->references_, [&](const auto &reference) { assert(!reference.first.second.empty()); @@ -1122,6 +1003,10 @@ auto SchemaFrame::standalone() const -> bool { }); } +auto SchemaFrame::root() const noexcept -> const JSON::String & { + return this->root_; +} + auto SchemaFrame::vocabularies(const Location &location, const SchemaResolver &resolver) const -> Vocabularies { @@ -1132,7 +1017,8 @@ auto SchemaFrame::vocabularies(const Location &location, auto SchemaFrame::uri(const Location &location, const Pointer &relative_schema_location) const -> JSON::String { - return to_uri(location.relative_pointer.concat(relative_schema_location), + return to_uri(this->relative_instance_location(location).concat( + relative_schema_location), location.base) .recompose(); } @@ -1153,16 +1039,17 @@ auto SchemaFrame::traverse(const Location &location, return dynamic_match->second; } -auto SchemaFrame::traverse(const JSON::String &uri) const +auto SchemaFrame::traverse(const std::string_view uri) const -> std::optional> { + const JSON::String uri_string{uri}; const auto static_result{ - this->locations_.find({SchemaReferenceType::Static, uri})}; + this->locations_.find({SchemaReferenceType::Static, uri_string})}; if (static_result != this->locations_.cend()) { return static_result->second; } const auto dynamic_result{ - this->locations_.find({SchemaReferenceType::Dynamic, uri})}; + this->locations_.find({SchemaReferenceType::Dynamic, uri_string})}; if (dynamic_result != this->locations_.cend()) { return dynamic_result->second; } @@ -1170,10 +1057,21 @@ auto SchemaFrame::traverse(const JSON::String &uri) const return std::nullopt; } +auto SchemaFrame::traverse(const Pointer &pointer) const + -> std::optional> { + // TODO: This is slow. Consider adding a pointer-indexed secondary + // lookup structure to SchemaFrame + for (const auto &entry : this->locations_) { + if (entry.second.pointer == pointer) { + return entry.second; + } + } + + return std::nullopt; +} + auto SchemaFrame::uri(const Pointer &pointer) const -> std::optional> { - // TODO: This is potentially very slow. Traversing by pointer shouldn't - // require an O(N) operation for (const auto &entry : this->locations_) { if (entry.second.pointer == pointer) { return entry.first.second; @@ -1210,24 +1108,26 @@ auto SchemaFrame::dereference(const Location &location, return {SchemaReferenceType::Static, destination->second}; } -auto SchemaFrame::instance_locations(const Location &location) const -> const - typename Instances::mapped_type & { - const auto match{this->instances_.find(location.pointer)}; - if (match == this->instances_.cend()) { - static const typename Instances::mapped_type fallback; - return fallback; +auto SchemaFrame::for_each_resource_uri( + const std::function &callback) const -> void { + for (const auto &[key, location] : this->locations_) { + if (location.type == LocationType::Resource) { + callback(key.second); + } } - - return match->second; } -auto SchemaFrame::references_to(const Pointer &pointer) const -> std::vector< - std::reference_wrapper> { - std::vector> - result; +auto SchemaFrame::for_each_unresolved_reference( + const std::function + &callback) const -> void { + for (const auto &[key, reference] : this->references_) { + if (!this->traverse(reference.destination).has_value()) { + callback(key.second, reference); + } + } +} - // TODO: This is currently very slow, as we need to loop on every reference - // to brute force whether it points to the desired entry or not +auto SchemaFrame::has_references_to(const Pointer &pointer) const -> bool { for (const auto &reference : this->references_) { assert(!reference.first.second.empty()); assert(reference.first.second.back().is_property()); @@ -1237,7 +1137,7 @@ auto SchemaFrame::references_to(const Pointer &pointer) const -> std::vector< {reference.first.first, reference.second.destination})}; if (match != this->locations_.cend() && match->second.pointer == pointer) { - result.emplace_back(reference); + return true; } } else { for (const auto &location : this->locations_) { @@ -1247,14 +1147,59 @@ auto SchemaFrame::references_to(const Pointer &pointer) const -> std::vector< if (!reference.second.fragment.has_value() || URI{location.first.second}.fragment().value_or("") == reference.second.fragment.value()) { - result.emplace_back(reference); + return true; } } } } } - return result; + return false; +} + +auto SchemaFrame::has_references_through(const Pointer &pointer) const -> bool { + for (const auto &reference : this->references_) { + assert(!reference.first.second.empty()); + assert(reference.first.second.back().is_property()); + + if (reference.first.first == SchemaReferenceType::Static) { + const auto match{this->locations_.find( + {reference.first.first, reference.second.destination})}; + if (match != this->locations_.cend() && + match->second.pointer.starts_with(pointer)) { + return true; + } + } else { + for (const auto &location : this->locations_) { + if (location.second.type == LocationType::Anchor && + location.first.first == SchemaReferenceType::Dynamic && + location.second.pointer.starts_with(pointer)) { + if (!reference.second.fragment.has_value() || + URI{location.first.second}.fragment().value_or("") == + reference.second.fragment.value()) { + return true; + } + } + } + } + } + + return false; +} + +auto SchemaFrame::relative_instance_location(const Location &location) const + -> Pointer { + return location.pointer.slice(location.relative_pointer); +} + +auto SchemaFrame::empty() const noexcept -> bool { + return this->locations_.empty() && this->references_.empty(); +} + +auto SchemaFrame::reset() -> void { + this->root_.clear(); + this->locations_.clear(); + this->references_.clear(); } } // namespace sourcemeta::core diff --git a/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema.h b/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema.h index 4bd7284b..a18eeada 100644 --- a/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema.h +++ b/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema.h @@ -38,14 +38,26 @@ namespace sourcemeta::core { /// @ingroup jsonschema /// A default resolver that relies on built-in official schemas. SOURCEMETA_CORE_JSONSCHEMA_EXPORT -auto schema_resolver(std::string_view identifier) -> std::optional; +auto schema_resolver(const std::string_view identifier) -> std::optional; /// @ingroup jsonschema /// A default schema walker with support for a wide range of drafs SOURCEMETA_CORE_JSONSCHEMA_EXPORT -auto schema_walker(std::string_view keyword, const Vocabularies &vocabularies) +auto schema_walker(const std::string_view keyword, + const Vocabularies &vocabularies) -> const SchemaWalkerResult &; +/// @ingroup jsonschema +/// Stringify a base dialect to its URI +SOURCEMETA_CORE_JSONSCHEMA_EXPORT +auto to_string(const SchemaBaseDialect base_dialect) -> std::string_view; + +/// @ingroup jsonschema +/// Parse a base dialect URI to its enum representation +SOURCEMETA_CORE_JSONSCHEMA_EXPORT +auto to_base_dialect(const std::string_view base_dialect) + -> std::optional; + /// @ingroup jsonschema /// /// Calculate the priority of a keyword that determines the ordering in which a @@ -82,7 +94,7 @@ auto schema_walker(std::string_view keyword, const Vocabularies &vocabularies) /// sourcemeta::core::schema_walker) == 1); /// ``` SOURCEMETA_CORE_JSONSCHEMA_EXPORT -auto schema_keyword_priority(std::string_view keyword, +auto schema_keyword_priority(const std::string_view keyword, const Vocabularies &vocabularies, const SchemaWalker &walker) -> std::uint64_t; @@ -120,8 +132,8 @@ auto is_empty_schema(const JSON &schema) -> bool; /// @ingroup jsonschema /// -/// This function returns the URI identifier of the given schema, if any. For -/// example: +/// This function returns the URI identifier of the given schema, or an empty +/// string view if the schema has no identifier. For example: /// /// ```cpp /// #include @@ -134,25 +146,23 @@ auto is_empty_schema(const JSON &schema) -> bool; /// "$id": "https://sourcemeta.com/example-schema" /// })JSON"); /// -/// std::optional id{sourcemeta::core::identify( +/// const auto id{sourcemeta::core::identify( /// document, sourcemeta::core::schema_resolver)}; -/// assert(id.has_value()); -/// assert(id.value() == "https://sourcemeta.com/example-schema"); +/// assert(!id.empty()); +/// assert(id == "https://sourcemeta.com/example-schema"); /// ``` SOURCEMETA_CORE_JSONSCHEMA_EXPORT auto identify(const JSON &schema, const SchemaResolver &resolver, - const std::optional &default_dialect = std::nullopt, - const std::optional &default_id = std::nullopt) - -> std::optional; + std::string_view default_dialect = "", + std::string_view default_id = "") -> std::string_view; /// @ingroup jsonschema /// /// A shortcut to sourcemeta::core::identify if you know the base dialect /// of the schema. SOURCEMETA_CORE_JSONSCHEMA_EXPORT -auto identify(const JSON &schema, const std::string &base_dialect, - const std::optional &default_id = std::nullopt) - -> std::optional; +auto identify(const JSON &schema, const SchemaBaseDialect base_dialect, + std::string_view default_id = "") -> std::string_view; /// @ingroup jsonschema /// @@ -173,14 +183,14 @@ auto identify(const JSON &schema, const std::string &base_dialect, /// })JSON"); /// /// sourcemeta::core::anonymize(document, -/// "https://json-schema.org/draft/2020-12/schema"); +/// sourcemeta::core::SchemaBaseDialect::JSON_Schema_2020_12); /// -/// std::optional id{sourcemeta::core::identify( +/// const auto id{sourcemeta::core::identify( /// document, sourcemeta::core::schema_resolver)}; -/// assert(!id.has_value()); +/// assert(id.empty()); /// ``` SOURCEMETA_CORE_JSONSCHEMA_EXPORT -auto anonymize(JSON &schema, const std::string &base_dialect) -> void; +auto anonymize(JSON &schema, const SchemaBaseDialect base_dialect) -> void; /// @ingroup jsonschema /// @@ -202,24 +212,23 @@ auto anonymize(JSON &schema, const std::string &base_dialect) -> void; /// "https://example.com/my-new-id", /// sourcemeta::core::schema_resolver); /// -/// std::optional id{sourcemeta::core::identify( +/// const auto id{sourcemeta::core::identify( /// document, sourcemeta::core::schema_resolver)}; -/// assert(id.has_value()); -/// assert(id.value() == "https://example.com/my-new-id"); +/// assert(!id.empty()); +/// assert(id == "https://example.com/my-new-id"); /// ``` SOURCEMETA_CORE_JSONSCHEMA_EXPORT -auto reidentify( - JSON &schema, const std::string &new_identifier, - const SchemaResolver &resolver, - const std::optional &default_dialect = std::nullopt) -> void; +auto reidentify(JSON &schema, std::string_view new_identifier, + const SchemaResolver &resolver, + std::string_view default_dialect = "") -> void; /// @ingroup jsonschema /// /// A shortcut to sourcemeta::core::reidentify if you know the base /// dialect of the schema. SOURCEMETA_CORE_JSONSCHEMA_EXPORT -auto reidentify(JSON &schema, const std::string &new_identifier, - const std::string &base_dialect) -> void; +auto reidentify(JSON &schema, std::string_view new_identifier, + const SchemaBaseDialect base_dialect) -> void; /// @ingroup jsonschema /// @@ -237,16 +246,13 @@ auto reidentify(JSON &schema, const std::string &new_identifier, /// "type": "object" /// })JSON"); /// -/// const std::optional -/// dialect{sourcemeta::core::dialect(document)}; -/// assert(dialect.has_value()); -/// assert(dialect.value() == -/// "https://json-schema.org/draft/2020-12/schema"); +/// const auto dialect{sourcemeta::core::dialect(document)}; +/// assert(!dialect.empty()); +/// assert(dialect == "https://json-schema.org/draft/2020-12/schema"); /// ``` SOURCEMETA_CORE_JSONSCHEMA_EXPORT -auto dialect(const JSON &schema, - const std::optional &default_dialect = std::nullopt) - -> std::optional; +auto dialect(const JSON &schema, std::string_view default_dialect = "") + -> std::string_view; /// @ingroup jsonschema /// @@ -273,16 +279,14 @@ auto dialect(const JSON &schema, /// /// This function will throw if the metaschema cannot be determined or resolved. SOURCEMETA_CORE_JSONSCHEMA_EXPORT -auto metaschema( - const JSON &schema, const SchemaResolver &resolver, - const std::optional &default_dialect = std::nullopt) -> JSON; +auto metaschema(const JSON &schema, const SchemaResolver &resolver, + std::string_view default_dialect = "") -> JSON; /// @ingroup jsonschema /// -/// Get the URI of the base dialect that applies to the given schema. If you set +/// Get the base dialect that applies to the given schema. If you set /// a default dialect URI, this will be used if the given schema does not -/// declare the `$schema` keyword. The result of this function is unset -/// if its base dialect could not be determined. For example: +/// declare the `$schema` keyword. For example: /// /// ```cpp /// #include @@ -295,18 +299,18 @@ auto metaschema( /// "type": "object" /// })JSON"); /// -/// const std::optional base_dialect{ +/// const auto base_dialect{ /// sourcemeta::core::base_dialect( /// document, sourcemeta::core::schema_resolver)}; /// /// assert(base_dialect.has_value()); /// assert(base_dialect.value() == -/// "https://json-schema.org/draft/2020-12/schema"); +/// sourcemeta::core::SchemaBaseDialect::JSON_Schema_2020_12); /// ``` SOURCEMETA_CORE_JSONSCHEMA_EXPORT auto base_dialect(const JSON &schema, const SchemaResolver &resolver, - const std::optional &default_dialect = - std::nullopt) -> std::optional; + std::string_view default_dialect = "") + -> std::optional; /// @ingroup jsonschema /// @@ -342,8 +346,7 @@ auto base_dialect(const JSON &schema, const SchemaResolver &resolver, /// ``` SOURCEMETA_CORE_JSONSCHEMA_EXPORT auto vocabularies(const JSON &schema, const SchemaResolver &resolver, - const std::optional &default_dialect = - std::nullopt) -> Vocabularies; + std::string_view default_dialect = "") -> Vocabularies; /// @ingroup jsonschema /// @@ -351,8 +354,8 @@ auto vocabularies(const JSON &schema, const SchemaResolver &resolver, /// dialect and dialect URI. SOURCEMETA_CORE_JSONSCHEMA_EXPORT auto vocabularies(const SchemaResolver &resolver, - const std::string &base_dialect, const std::string &dialect) - -> Vocabularies; + const SchemaBaseDialect base_dialect, + std::string_view dialect) -> Vocabularies; /// @ingroup jsonschema /// @@ -378,8 +381,7 @@ auto vocabularies(const SchemaResolver &resolver, SOURCEMETA_CORE_JSONSCHEMA_EXPORT auto format(JSON &schema, const SchemaWalker &walker, const SchemaResolver &resolver, - const std::optional &default_dialect = std::nullopt) - -> void; + std::string_view default_dialect = "") -> void; /// @ingroup jsonschema /// @@ -401,7 +403,7 @@ auto format(JSON &schema, const SchemaWalker &walker, /// std::cerr << "\n"; /// ``` SOURCEMETA_CORE_JSONSCHEMA_EXPORT -auto wrap(const JSON::String &identifier) -> JSON; +auto wrap(std::string_view identifier) -> JSON; /// @ingroup jsonschema /// @@ -429,8 +431,7 @@ auto wrap(const JSON::String &identifier) -> JSON; /// ``` SOURCEMETA_CORE_JSONSCHEMA_EXPORT auto wrap(const JSON &schema, const Pointer &pointer, - const SchemaResolver &resolver, - const std::optional &default_dialect = std::nullopt) + const SchemaResolver &resolver, std::string_view default_dialect = "") -> JSON; /// @ingroup jsonschema diff --git a/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_bundle.h b/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_bundle.h index fedff5b6..17dbebbd 100644 --- a/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_bundle.h +++ b/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_bundle.h @@ -13,20 +13,19 @@ #include // NOLINTEND(misc-include-cleaner) -#include // std::function -#include // std::optional, std::nullopt +#include // std::function +#include // std::string_view namespace sourcemeta::core { /// @ingroup jsonschema /// A callback to get dependency information -/// - Origin URI +/// - Origin URI (empty if none) /// - Pointer (reference keyword from the origin) /// - Target URI /// - Target schema -using DependencyCallback = - std::function &, const Pointer &, - const JSON::String &, const JSON &)>; +using DependencyCallback = std::function; /// @ingroup jsonschema /// @@ -67,12 +66,13 @@ using DependencyCallback = /// }); /// ``` SOURCEMETA_CORE_JSONSCHEMA_EXPORT -auto dependencies( - const JSON &schema, const SchemaWalker &walker, - const SchemaResolver &resolver, const DependencyCallback &callback, - const std::optional &default_dialect = std::nullopt, - const std::optional &default_id = std::nullopt, - const SchemaFrame::Paths &paths = {empty_pointer}) -> void; +auto dependencies(const JSON &schema, const SchemaWalker &walker, + const SchemaResolver &resolver, + const DependencyCallback &callback, + std::string_view default_dialect = "", + std::string_view default_id = "", + const SchemaFrame::Paths &paths = {empty_weak_pointer}) + -> void; /// @ingroup jsonschema /// @@ -126,10 +126,10 @@ auto dependencies( SOURCEMETA_CORE_JSONSCHEMA_EXPORT auto bundle(JSON &schema, const SchemaWalker &walker, const SchemaResolver &resolver, - const std::optional &default_dialect = std::nullopt, - const std::optional &default_id = std::nullopt, + std::string_view default_dialect = "", + std::string_view default_id = "", const std::optional &default_container = std::nullopt, - const SchemaFrame::Paths &paths = {empty_pointer}) -> void; + const SchemaFrame::Paths &paths = {empty_weak_pointer}) -> void; /// @ingroup jsonschema /// @@ -185,10 +185,10 @@ auto bundle(JSON &schema, const SchemaWalker &walker, SOURCEMETA_CORE_JSONSCHEMA_EXPORT auto bundle(const JSON &schema, const SchemaWalker &walker, const SchemaResolver &resolver, - const std::optional &default_dialect = std::nullopt, - const std::optional &default_id = std::nullopt, + std::string_view default_dialect = "", + std::string_view default_id = "", const std::optional &default_container = std::nullopt, - const SchemaFrame::Paths &paths = {empty_pointer}) -> JSON; + const SchemaFrame::Paths &paths = {empty_weak_pointer}) -> JSON; } // namespace sourcemeta::core diff --git a/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_error.h b/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_error.h index af29c9f8..56eac655 100644 --- a/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_error.h +++ b/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_error.h @@ -8,9 +8,9 @@ #include #include -#include // std::exception -#include // std::string -#include // std::move +#include // std::exception +#include // std::string +#include // std::string_view namespace sourcemeta::core { @@ -43,12 +43,8 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaError : public std::exception { class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaResolutionError : public std::exception { public: - SchemaResolutionError(std::string identifier, const char *message) - : identifier_{std::move(identifier)}, message_{message} {} - SchemaResolutionError(std::string identifier, std::string message) = delete; - SchemaResolutionError(std::string identifier, std::string &&message) = delete; - SchemaResolutionError(std::string identifier, - std::string_view message) = delete; + SchemaResolutionError(const std::string_view identifier, const char *message) + : identifier_{identifier}, message_{message} {} [[nodiscard]] auto what() const noexcept -> const char * override { return this->message_; @@ -70,8 +66,8 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaResolutionError class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaRelativeMetaschemaResolutionError : public SchemaResolutionError { public: - SchemaRelativeMetaschemaResolutionError(std::string identifier) - : SchemaResolutionError{std::move(identifier), + SchemaRelativeMetaschemaResolutionError(const std::string_view identifier) + : SchemaResolutionError{identifier, "Relative meta-schema URIs are not valid " "according to the JSON Schema specification"} {} }; @@ -81,11 +77,8 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaRelativeMetaschemaResolutionError class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaVocabularyError : public std::exception { public: - SchemaVocabularyError(std::string uri, const char *message) - : uri_{std::move(uri)}, message_{message} {} - SchemaVocabularyError(std::string uri, std::string message) = delete; - SchemaVocabularyError(std::string uri, std::string &&message) = delete; - SchemaVocabularyError(std::string uri, std::string_view message) = delete; + SchemaVocabularyError(const std::string_view uri, const char *message) + : uri_{uri}, message_{message} {} [[nodiscard]] auto what() const noexcept -> const char * override { return this->message_; @@ -105,16 +98,10 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaVocabularyError class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaReferenceError : public std::exception { public: - SchemaReferenceError(std::string identifier, Pointer schema_location, - const char *message) - : identifier_{std::move(identifier)}, - schema_location_{std::move(schema_location)}, message_{message} {} - SchemaReferenceError(std::string identifier, Pointer schema_location, - std::string message) = delete; - SchemaReferenceError(std::string identifier, Pointer schema_location, - std::string &&message) = delete; - SchemaReferenceError(std::string identifier, Pointer schema_location, - std::string_view message) = delete; + SchemaReferenceError(const std::string_view identifier, + Pointer schema_location, const char *message) + : identifier_{identifier}, schema_location_{std::move(schema_location)}, + message_{message} {} [[nodiscard]] auto what() const noexcept -> const char * override { return this->message_; @@ -189,18 +176,19 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaUnknownBaseDialectError class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaTransformRuleProcessedTwiceError : public std::exception { public: - SchemaTransformRuleProcessedTwiceError(std::string name, Pointer location) - : name_{std::move(name)}, location_{std::move(location)} {} + SchemaTransformRuleProcessedTwiceError(const std::string_view name, + Pointer location) + : name_{name}, location_{std::move(location)} {} [[nodiscard]] auto what() const noexcept -> const char * override { return "Transformation rules must only be processed once"; } - [[nodiscard]] auto name() const noexcept -> const auto & { + [[nodiscard]] auto name() const noexcept -> std::string_view { return this->name_; } - [[nodiscard]] auto location() const noexcept -> const auto & { + [[nodiscard]] auto location() const noexcept -> const Pointer & { return this->location_; } @@ -215,8 +203,8 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaTransformRuleProcessedTwiceError class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaReferenceObjectResourceError : public std::exception { public: - SchemaReferenceObjectResourceError(std::string identifier) - : identifier_{std::move(identifier)} {} + SchemaReferenceObjectResourceError(const std::string_view identifier) + : identifier_{identifier} {} [[nodiscard]] auto what() const noexcept -> const char * override { return "A schema with a top-level `$ref` in JSON Schema Draft 7 and older " @@ -225,7 +213,7 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaReferenceObjectResourceError "bundling, are not possible without undefined behavior"; } - [[nodiscard]] auto identifier() const noexcept -> const auto & { + [[nodiscard]] auto identifier() const noexcept -> std::string_view { return this->identifier_; } @@ -238,8 +226,8 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaReferenceObjectResourceError class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaBaseDialectError : public std::exception { public: - SchemaBaseDialectError(std::string base_dialect) - : base_dialect_{std::move(base_dialect)} {} + SchemaBaseDialectError(const std::string_view base_dialect) + : base_dialect_{base_dialect} {} [[nodiscard]] auto what() const noexcept -> const char * override { return "Unrecognized base dialect"; @@ -258,11 +246,8 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaBaseDialectError class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaFrameError : public std::exception { public: - SchemaFrameError(std::string identifier, const char *message) - : identifier_{std::move(identifier)}, message_{message} {} - SchemaFrameError(std::string identifier, std::string message) = delete; - SchemaFrameError(std::string identifier, std::string &&message) = delete; - SchemaFrameError(std::string identifier, std::string_view message) = delete; + SchemaFrameError(const std::string_view identifier, const char *message) + : identifier_{identifier}, message_{message} {} [[nodiscard]] auto what() const noexcept -> const char * override { return this->message_; diff --git a/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_frame.h b/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_frame.h index df933326..497f7679 100644 --- a/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_frame.h +++ b/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_frame.h @@ -52,59 +52,12 @@ namespace sourcemeta::core { /// frame.analyse(document, /// sourcemeta::core::schema_walker, /// sourcemeta::core::schema_resolver); -/// -/// // IDs -/// assert(frame.locations().contains({sourcemeta::core::SchemaReferenceType::Static, -/// "https://www.example.com/schema"})); -/// assert(frame.locations().contains({sourcemeta::core::SchemaReferenceType::Static, -/// "https://www.example.com/foo"})); -/// -/// // Anchors -/// assert(frame.locations().contains({sourcemeta::core::SchemaReferenceType::Static, -/// "https://www.example.com/schema#test"})); -/// -/// // Root Pointers -/// assert(frame.locations().contains({sourcemeta::core::SchemaReferenceType::Static, -/// "https://www.example.com/schema#/$id"})); -/// assert(frame.locations().contains({sourcemeta::core::SchemaReferenceType::Static, -/// "https://www.example.com/schema#/$schema"})); -/// assert(frame.locations().contains({sourcemeta::core::SchemaReferenceType::Static, -/// "https://www.example.com/schema#/items"})); -/// assert(frame.locations().contains({sourcemeta::core::SchemaReferenceType::Static, -/// "https://www.example.com/schema#/items/$id"})); -/// assert(frame.locations().contains({sourcemeta::core::SchemaReferenceType::Static, -/// "https://www.example.com/schema#/items/type"})); -/// assert(frame.locations().contains({sourcemeta::core::SchemaReferenceType::Static, -/// "https://www.example.com/schema#/properties"})); -/// assert(frame.locations().contains({sourcemeta::core::SchemaReferenceType::Static, -/// "https://www.example.com/schema#/properties/foo"})); -/// assert(frame.locations().contains({sourcemeta::core::SchemaReferenceType::Static, -/// "https://www.example.com/schema#/properties/foo/$anchor"})); -/// assert(frame.locations().contains({sourcemeta::core::SchemaReferenceType::Static, -/// "https://www.example.com/schema#/properties/foo/type"})); -/// assert(frame.locations().contains({sourcemeta::core::SchemaReferenceType::Static, -/// "https://www.example.com/schema#/properties/bar"})); -/// assert(frame.locations().contains({sourcemeta::core::SchemaReferenceType::Static, -/// "https://www.example.com/schema#/properties/bar/$ref"})); -/// -/// // Subpointers -/// assert(frame.locations().contains({sourcemeta::core::SchemaReferenceType::Static, -/// "https://www.example.com/foo#/$id"})); -/// assert(frame.locations().contains({sourcemeta::core::SchemaReferenceType::Static, -/// "https://www.example.com/foo#/type"})); -/// -/// // References -/// assert(frame.references().contains({sourcemeta::core::SchemaReferenceType::Static, -/// { "properties", "bar", "$ref" }})); -/// assert(frame.references().at({sourcemeta::core::SchemaReferenceType::Static, -/// { "properties", "bar", "$ref" }}).destination == -/// "https://www.example.com/schema#/properties/foo"); /// ``` class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaFrame { public: /// The mode of framing. More extensive analysis can be compute and memory /// intensive - enum class Mode : std::uint8_t { Locations, References, Instances }; + enum class Mode : std::uint8_t { Locations, References }; SchemaFrame(const Mode mode) : mode_{mode} {} @@ -113,12 +66,13 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaFrame { /// A single entry in a JSON Schema reference map struct ReferencesEntry { - JSON::String original; + std::string_view original; + // TODO: This one is tricky to turn into a view, as there is no + // location entry to point to if it is an external unresolved reference JSON::String destination; - // TODO: This string can be a `string_view` over the `destination` - std::optional base; - // TODO: This string can be a `string_view` over the `destination` - std::optional fragment; + // Empty means no base + std::string_view base; + std::optional fragment; }; /// A JSON Schema reference map is a mapping of a JSON Pointer @@ -152,17 +106,14 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaFrame { /// A location entry struct Location { - // TODO: Turn this into a weak pointer - std::optional parent; + std::optional parent; LocationType type; - std::optional root; - JSON::String base; + std::string_view base; // TODO: Turn this into a weak pointer Pointer pointer; - // TODO: Turn this into a weak pointer - Pointer relative_pointer; - JSON::String dialect; - JSON::String base_dialect; + std::size_t relative_pointer; + std::string_view dialect; + SchemaBaseDialect base_dialect; }; /// A JSON Schema reference frame is a mapping of URIs to schema identifiers, @@ -176,12 +127,8 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaFrame { // point to different places. std::map, Location>; - // TODO: Turn the mapped value into a proper set - /// A set of unresolved instance locations - using Instances = std::map>; - - /// A set of paths to frame within a schema wrapper - using Paths = std::set; + /// A list of paths to frame within a schema wrapper + using Paths = std::vector; /// Export the frame entries as JSON [[nodiscard]] auto to_json( @@ -190,12 +137,11 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaFrame { /// Analyse a schema or set of schemas from a given root. Passing /// multiple paths that have any overlap is undefined behaviour - auto - analyse(const JSON &root, const SchemaWalker &walker, - const SchemaResolver &resolver, - const std::optional &default_dialect = std::nullopt, - const std::optional &default_id = std::nullopt, - const Paths &paths = {empty_pointer}) -> void; + auto analyse(const JSON &root, const SchemaWalker &walker, + const SchemaResolver &resolver, + std::string_view default_dialect = "", + std::string_view default_id = "", + const Paths &paths = {empty_weak_pointer}) -> void; /// Access the analysed schema locations [[nodiscard]] auto locations() const noexcept -> const Locations &; @@ -203,9 +149,17 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaFrame { /// Access the analysed schema references [[nodiscard]] auto references() const noexcept -> const References &; + /// Get a specific reference entry by type and pointer + [[nodiscard]] auto reference(const SchemaReferenceType type, + const Pointer &pointer) const + -> std::optional>; + /// Check whether the analysed schema has no external references [[nodiscard]] auto standalone() const -> bool; + /// Get the root schema identifier (empty if none) + [[nodiscard]] auto root() const noexcept -> const JSON::String &; + /// Get the vocabularies associated with a location entry [[nodiscard]] auto vocabularies(const Location &location, const SchemaResolver &resolver) const @@ -223,7 +177,11 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaFrame { -> const Location &; /// Get the location associated with a given URI - [[nodiscard]] auto traverse(const JSON::String &uri) const + [[nodiscard]] auto traverse(const std::string_view uri) const + -> std::optional>; + + /// Get the location associated with a given pointer + [[nodiscard]] auto traverse(const Pointer &pointer) const -> std::optional>; /// Turn an absolute pointer into a location URI @@ -237,13 +195,32 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaFrame { -> std::pair>>; - /// Get the unresolved instance locations associated with a location entry - [[nodiscard]] auto instance_locations(const Location &location) const -> const - typename Instances::mapped_type &; + /// Iterate over all resource URIs in the frame + auto for_each_resource_uri( + const std::function &callback) const -> void; + + /// Iterate over all unresolved references (where destination cannot be + /// traversed) + auto for_each_unresolved_reference( + const std::function + &callback) const -> void; + + /// Check if there are any references to a given location pointer + [[nodiscard]] auto has_references_to(const Pointer &pointer) const -> bool; + + /// Check if there are any references that go through a given location pointer + [[nodiscard]] auto has_references_through(const Pointer &pointer) const + -> bool; + + /// Get the relative instance location pointer for a given location entry + [[nodiscard]] auto relative_instance_location(const Location &location) const + -> Pointer; + + /// Check if the frame has no analysed data + [[nodiscard]] auto empty() const noexcept -> bool; - /// Find all references to a given location pointer - [[nodiscard]] auto references_to(const Pointer &pointer) const -> std::vector< - std::reference_wrapper>; + /// Reset the frame, clearing all analysed data + auto reset() -> void; private: Mode mode_; @@ -253,9 +230,9 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaFrame { #if defined(_MSC_VER) #pragma warning(disable : 4251 4275) #endif + JSON::String root_; Locations locations_; References references_; - Instances instances_; #if defined(_MSC_VER) #pragma warning(default : 4251 4275) #endif diff --git a/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_transform.h b/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_transform.h index eb609061..a3770872 100644 --- a/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_transform.h +++ b/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_transform.h @@ -60,7 +60,8 @@ namespace sourcemeta::core { class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaTransformRule { public: /// Create a transformation rule. Each rule must have a unique name. - SchemaTransformRule(std::string &&name, std::string &&message); + SchemaTransformRule(const std::string_view name, + const std::string_view message); // Necessary to wrap rules on smart pointers virtual ~SchemaTransformRule() = default; @@ -75,10 +76,10 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaTransformRule { auto operator==(const SchemaTransformRule &other) const -> bool; /// Fetch the name of a rule - [[nodiscard]] auto name() const -> const std::string &; + [[nodiscard]] auto name() const noexcept -> std::string_view; /// Fetch the message of a rule - [[nodiscard]] auto message() const -> const std::string &; + [[nodiscard]] auto message() const noexcept -> std::string_view; /// The result of evaluating a rule struct Result { @@ -108,13 +109,6 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaTransformRule { std::optional description; }; - /// Apply the rule to a schema - auto apply(JSON &schema, const JSON &root, const Vocabularies &vocabularies, - const SchemaWalker &walker, const SchemaResolver &resolver, - const SchemaFrame &frame, - const SchemaFrame::Location &location) const - -> std::pair; - /// Check if the rule applies to a schema [[nodiscard]] auto check(const JSON &schema, const JSON &root, const Vocabularies &vocabularies, @@ -125,10 +119,9 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaTransformRule { /// A method to optionally fix any reference location that was affected by the /// transformation. [[nodiscard]] virtual auto - rereference(const std::string &reference, const Pointer &origin, + rereference(const std::string_view reference, const Pointer &origin, const Pointer &target, const Pointer ¤t) const -> Pointer; -private: /// The rule condition [[nodiscard]] virtual auto condition(const JSON &schema, const JSON &root, @@ -140,6 +133,7 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaTransformRule { /// then the rule condition is considered to not be fixable. virtual auto transform(JSON &schema, const Result &result) const -> void; +private: // Exporting symbols that depends on the standard C++ library is considered // safe. // https://learn.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-2-c4275?view=msvc-170&redirectedfrom=MSDN @@ -237,7 +231,7 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaTransformer { } /// Remove a rule from the bundle - auto remove(const std::string &name) -> bool; + auto remove(const std::string_view name) -> bool; /// The callback that is called whenever the condition of a rule holds true. /// The arguments are as follows: @@ -251,19 +245,19 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaTransformer { const SchemaTransformRule::Result &)>; /// Apply the bundle of rules to a schema - [[nodiscard]] auto - apply(JSON &schema, const SchemaWalker &walker, - const SchemaResolver &resolver, const Callback &callback, - const std::optional &default_dialect = std::nullopt, - const std::optional &default_id = std::nullopt) const + [[nodiscard]] auto apply(JSON &schema, const SchemaWalker &walker, + const SchemaResolver &resolver, + const Callback &callback, + std::string_view default_dialect = "", + std::string_view default_id = "") const -> std::pair; /// Report back the rules from the bundle that need to be applied to a schema - [[nodiscard]] auto - check(const JSON &schema, const SchemaWalker &walker, - const SchemaResolver &resolver, const Callback &callback, - const std::optional &default_dialect = std::nullopt, - const std::optional &default_id = std::nullopt) const + [[nodiscard]] auto check(const JSON &schema, const SchemaWalker &walker, + const SchemaResolver &resolver, + const Callback &callback, + std::string_view default_dialect = "", + std::string_view default_id = "") const -> std::pair; [[nodiscard]] auto begin() const -> auto { return this->rules.cbegin(); } diff --git a/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_types.h b/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_types.h index 8f2845bc..1413d964 100644 --- a/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_types.h +++ b/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_types.h @@ -35,6 +35,26 @@ using SchemaResolver = std::function(std::string_view)>; /// The reference type enum class SchemaReferenceType : std::uint8_t { Static, Dynamic }; +/// @ingroup jsonschema +/// All the known JSON Schema base dialects +enum class SchemaBaseDialect : std::uint8_t { + JSON_Schema_2020_12, + JSON_Schema_2020_12_Hyper, + JSON_Schema_2019_09, + JSON_Schema_2019_09_Hyper, + JSON_Schema_Draft_7, + JSON_Schema_Draft_7_Hyper, + JSON_Schema_Draft_6, + JSON_Schema_Draft_6_Hyper, + JSON_Schema_Draft_4, + JSON_Schema_Draft_4_Hyper, + JSON_Schema_Draft_3, + JSON_Schema_Draft_3_Hyper, + JSON_Schema_Draft_2_Hyper, + JSON_Schema_Draft_1_Hyper, + JSON_Schema_Draft_0_Hyper +}; + #if defined(__GNUC__) #pragma GCC diagnostic push // For some strange reason, GCC on Debian 11 believes that a member of @@ -191,37 +211,20 @@ struct SchemaWalkerResult { /// For walking purposes, some functions need to understand which JSON Schema /// keywords declare other JSON Schema definitions. To accomplish this in a /// generic and flexible way that does not assume the use any vocabulary other -/// than `core`, these functions take a walker function as argument, of the type -/// sourcemeta::core::SchemaWalker. -/// -/// For convenience, we provide the following default walkers: -/// -/// - sourcemeta::core::schema_walker -/// - sourcemeta::core::schema_walker_none +/// than `core`, these functions take a walker function as argument. using SchemaWalker = std::function; /// @ingroup jsonschema /// An entry of a schema iterator. struct SchemaIteratorEntry { - // TODO: Turn this into a weak pointer - std::optional parent; - // TODO: Turn this into a weak pointer - Pointer pointer; + std::optional parent; + WeakPointer pointer; // TODO: Use "known" enum classes + strings for dialects - std::optional dialect; + std::string_view dialect; Vocabularies vocabularies; - // TODO: Use "known" enum classes for base dialects - std::optional base_dialect; + std::optional base_dialect; std::reference_wrapper subschema; - - // TODO: These two pointer templates contain some overlap. - // Instead, have a `base_instance_location` and a `relative_instance_location` - // that when concatenated, represent the full `instance_location` - // TODO: Make these WeakPointerTemplate - PointerTemplate instance_location; - PointerTemplate relative_instance_location; - bool orphan; }; diff --git a/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_vocabularies.h b/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_vocabularies.h index bcbc4360..9333bd65 100644 --- a/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_vocabularies.h +++ b/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_vocabularies.h @@ -13,7 +13,6 @@ #include // std::optional #include // std::ostream #include // std::out_of_range -#include // std::string #include // std::string_view #include // std::unordered_map #include // std::unordered_set diff --git a/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_walker.h b/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_walker.h index 82790233..7c039ed2 100644 --- a/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_walker.h +++ b/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_walker.h @@ -11,7 +11,6 @@ #include // std::uint64_t #include // std::optional -#include // std::string #include // std::string_view #include // std::vector @@ -61,10 +60,9 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaIterator { public: using const_iterator = typename internal::const_iterator; - SchemaIterator( - const JSON &input, const SchemaWalker &walker, - const SchemaResolver &resolver, - const std::optional &default_dialect = std::nullopt); + SchemaIterator(const JSON &input, const SchemaWalker &walker, + const SchemaResolver &resolver, + std::string_view default_dialect = ""); [[nodiscard]] auto begin() const -> const_iterator; [[nodiscard]] auto end() const -> const_iterator; [[nodiscard]] auto cbegin() const -> const_iterator; @@ -128,10 +126,9 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaIteratorFlat { public: using const_iterator = typename internal::const_iterator; - SchemaIteratorFlat( - const JSON &input, const SchemaWalker &walker, - const SchemaResolver &resolver, - const std::optional &default_dialect = std::nullopt); + SchemaIteratorFlat(const JSON &input, const SchemaWalker &walker, + const SchemaResolver &resolver, + std::string_view default_dialect = ""); [[nodiscard]] auto begin() const -> const_iterator; [[nodiscard]] auto end() const -> const_iterator; [[nodiscard]] auto cbegin() const -> const_iterator; @@ -185,10 +182,9 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaKeywordIterator { public: using const_iterator = typename internal::const_iterator; - SchemaKeywordIterator( - const JSON &input, const SchemaWalker &walker, - const SchemaResolver &resolver, - const std::optional &default_dialect = std::nullopt); + SchemaKeywordIterator(const JSON &input, const SchemaWalker &walker, + const SchemaResolver &resolver, + std::string_view default_dialect = ""); [[nodiscard]] auto begin() const -> const_iterator; [[nodiscard]] auto end() const -> const_iterator; [[nodiscard]] auto cbegin() const -> const_iterator; diff --git a/vendor/core/src/core/jsonschema/jsonschema.cc b/vendor/core/src/core/jsonschema/jsonschema.cc index b7be25a5..937ee43b 100644 --- a/vendor/core/src/core/jsonschema/jsonschema.cc +++ b/vendor/core/src/core/jsonschema/jsonschema.cc @@ -20,66 +20,144 @@ auto sourcemeta::core::is_empty_schema(const sourcemeta::core::JSON &schema) (schema.is_object() && schema.empty()); } +auto sourcemeta::core::to_string(const SchemaBaseDialect base_dialect) + -> std::string_view { + switch (base_dialect) { + case SchemaBaseDialect::JSON_Schema_2020_12: + return "https://json-schema.org/draft/2020-12/schema"; + case SchemaBaseDialect::JSON_Schema_2020_12_Hyper: + return "https://json-schema.org/draft/2020-12/hyper-schema"; + case SchemaBaseDialect::JSON_Schema_2019_09: + return "https://json-schema.org/draft/2019-09/schema"; + case SchemaBaseDialect::JSON_Schema_2019_09_Hyper: + return "https://json-schema.org/draft/2019-09/hyper-schema"; + case SchemaBaseDialect::JSON_Schema_Draft_7: + return "http://json-schema.org/draft-07/schema#"; + case SchemaBaseDialect::JSON_Schema_Draft_7_Hyper: + return "http://json-schema.org/draft-07/hyper-schema#"; + case SchemaBaseDialect::JSON_Schema_Draft_6: + return "http://json-schema.org/draft-06/schema#"; + case SchemaBaseDialect::JSON_Schema_Draft_6_Hyper: + return "http://json-schema.org/draft-06/hyper-schema#"; + case SchemaBaseDialect::JSON_Schema_Draft_4: + return "http://json-schema.org/draft-04/schema#"; + case SchemaBaseDialect::JSON_Schema_Draft_4_Hyper: + return "http://json-schema.org/draft-04/hyper-schema#"; + case SchemaBaseDialect::JSON_Schema_Draft_3: + return "http://json-schema.org/draft-03/schema#"; + case SchemaBaseDialect::JSON_Schema_Draft_3_Hyper: + return "http://json-schema.org/draft-03/hyper-schema#"; + case SchemaBaseDialect::JSON_Schema_Draft_2_Hyper: + return "http://json-schema.org/draft-02/hyper-schema#"; + case SchemaBaseDialect::JSON_Schema_Draft_1_Hyper: + return "http://json-schema.org/draft-01/hyper-schema#"; + case SchemaBaseDialect::JSON_Schema_Draft_0_Hyper: + return "http://json-schema.org/draft-00/hyper-schema#"; + } + + assert(false); + return {}; +} + +auto sourcemeta::core::to_base_dialect(const std::string_view base_dialect) + -> std::optional { + if (base_dialect == "https://json-schema.org/draft/2020-12/schema") { + return SchemaBaseDialect::JSON_Schema_2020_12; + } else if (base_dialect == + "https://json-schema.org/draft/2020-12/hyper-schema") { + return SchemaBaseDialect::JSON_Schema_2020_12_Hyper; + } else if (base_dialect == "https://json-schema.org/draft/2019-09/schema") { + return SchemaBaseDialect::JSON_Schema_2019_09; + } else if (base_dialect == + "https://json-schema.org/draft/2019-09/hyper-schema") { + return SchemaBaseDialect::JSON_Schema_2019_09_Hyper; + } else if (base_dialect == "http://json-schema.org/draft-07/schema#") { + return SchemaBaseDialect::JSON_Schema_Draft_7; + } else if (base_dialect == "http://json-schema.org/draft-07/hyper-schema#") { + return SchemaBaseDialect::JSON_Schema_Draft_7_Hyper; + } else if (base_dialect == "http://json-schema.org/draft-06/schema#") { + return SchemaBaseDialect::JSON_Schema_Draft_6; + } else if (base_dialect == "http://json-schema.org/draft-06/hyper-schema#") { + return SchemaBaseDialect::JSON_Schema_Draft_6_Hyper; + } else if (base_dialect == "http://json-schema.org/draft-04/schema#") { + return SchemaBaseDialect::JSON_Schema_Draft_4; + } else if (base_dialect == "http://json-schema.org/draft-04/hyper-schema#") { + return SchemaBaseDialect::JSON_Schema_Draft_4_Hyper; + } else if (base_dialect == "http://json-schema.org/draft-03/schema#") { + return SchemaBaseDialect::JSON_Schema_Draft_3; + } else if (base_dialect == "http://json-schema.org/draft-03/hyper-schema#") { + return SchemaBaseDialect::JSON_Schema_Draft_3_Hyper; + } else if (base_dialect == "http://json-schema.org/draft-02/hyper-schema#") { + return SchemaBaseDialect::JSON_Schema_Draft_2_Hyper; + } else if (base_dialect == "http://json-schema.org/draft-01/hyper-schema#") { + return SchemaBaseDialect::JSON_Schema_Draft_1_Hyper; + } else if (base_dialect == "http://json-schema.org/draft-00/hyper-schema#") { + return SchemaBaseDialect::JSON_Schema_Draft_0_Hyper; + } + + return std::nullopt; +} + namespace { -static auto id_keyword(const std::string &base_dialect) -> std::string { - if (base_dialect == "https://json-schema.org/draft/2020-12/schema" || - base_dialect == "https://json-schema.org/draft/2020-12/hyper-schema" || - base_dialect == "https://json-schema.org/draft/2019-09/schema" || - base_dialect == "https://json-schema.org/draft/2019-09/hyper-schema" || - base_dialect == "http://json-schema.org/draft-07/schema#" || - base_dialect == "http://json-schema.org/draft-07/hyper-schema#" || - base_dialect == "http://json-schema.org/draft-06/schema#" || - base_dialect == "http://json-schema.org/draft-06/hyper-schema#") { - return "$id"; - } - - if (base_dialect == "http://json-schema.org/draft-04/schema#" || - base_dialect == "http://json-schema.org/draft-04/hyper-schema#" || - base_dialect == "http://json-schema.org/draft-03/schema#" || - base_dialect == "http://json-schema.org/draft-03/hyper-schema#" || - base_dialect == "http://json-schema.org/draft-02/hyper-schema#" || - base_dialect == "http://json-schema.org/draft-01/hyper-schema#" || - base_dialect == "http://json-schema.org/draft-00/hyper-schema#") { - return "id"; - } - - throw sourcemeta::core::SchemaBaseDialectError(base_dialect); +static auto id_keyword(const sourcemeta::core::SchemaBaseDialect base_dialect) + -> std::string_view { + using sourcemeta::core::SchemaBaseDialect; + switch (base_dialect) { + case SchemaBaseDialect::JSON_Schema_2020_12: + case SchemaBaseDialect::JSON_Schema_2020_12_Hyper: + case SchemaBaseDialect::JSON_Schema_2019_09: + case SchemaBaseDialect::JSON_Schema_2019_09_Hyper: + case SchemaBaseDialect::JSON_Schema_Draft_7: + case SchemaBaseDialect::JSON_Schema_Draft_7_Hyper: + case SchemaBaseDialect::JSON_Schema_Draft_6: + case SchemaBaseDialect::JSON_Schema_Draft_6_Hyper: + return "$id"; + case SchemaBaseDialect::JSON_Schema_Draft_4: + case SchemaBaseDialect::JSON_Schema_Draft_4_Hyper: + case SchemaBaseDialect::JSON_Schema_Draft_3: + case SchemaBaseDialect::JSON_Schema_Draft_3_Hyper: + case SchemaBaseDialect::JSON_Schema_Draft_2_Hyper: + case SchemaBaseDialect::JSON_Schema_Draft_1_Hyper: + case SchemaBaseDialect::JSON_Schema_Draft_0_Hyper: + return "id"; + } + + assert(false); + return {}; } } // namespace -auto sourcemeta::core::identify( - const sourcemeta::core::JSON &schema, const SchemaResolver &resolver, - const std::optional &default_dialect, - const std::optional &default_id) - -> std::optional { +auto sourcemeta::core::identify(const sourcemeta::core::JSON &schema, + const SchemaResolver &resolver, + std::string_view default_dialect, + std::string_view default_id) + -> std::string_view { try { const auto maybe_base_dialect{ sourcemeta::core::base_dialect(schema, resolver, default_dialect)}; if (maybe_base_dialect.has_value()) { return identify(schema, maybe_base_dialect.value(), default_id); - } else { - return default_id; } + return default_id; } catch (const SchemaResolutionError &) { - if (default_id.has_value()) { + if (!default_id.empty()) { return default_id; - } else { - throw; } + throw; } } auto sourcemeta::core::identify(const JSON &schema, - const std::string &base_dialect, - const std::optional &default_id) - -> std::optional { + const SchemaBaseDialect base_dialect, + std::string_view default_id) + -> std::string_view { if (!schema.is_object()) { return default_id; } - const auto keyword{id_keyword(base_dialect)}; + const std::string keyword{id_keyword(base_dialect)}; if (!schema.defines(keyword)) { return default_id; @@ -97,90 +175,86 @@ auto sourcemeta::core::identify(const JSON &schema, // See // https://json-schema.org/draft-07/draft-handrews-json-schema-01#rfc.section.8.3 if (schema.defines("$ref") && - (base_dialect == "http://json-schema.org/draft-07/schema#" || - base_dialect == "http://json-schema.org/draft-07/hyper-schema#" || - base_dialect == "http://json-schema.org/draft-06/schema#" || - base_dialect == "http://json-schema.org/draft-06/hyper-schema#" || - base_dialect == "http://json-schema.org/draft-04/schema#" || - base_dialect == "http://json-schema.org/draft-04/hyper-schema#" || - base_dialect == "http://json-schema.org/draft-03/schema#" || - base_dialect == "http://json-schema.org/draft-03/hyper-schema#")) { + (base_dialect == SchemaBaseDialect::JSON_Schema_Draft_7 || + base_dialect == SchemaBaseDialect::JSON_Schema_Draft_7_Hyper || + base_dialect == SchemaBaseDialect::JSON_Schema_Draft_6 || + base_dialect == SchemaBaseDialect::JSON_Schema_Draft_6_Hyper || + base_dialect == SchemaBaseDialect::JSON_Schema_Draft_4 || + base_dialect == SchemaBaseDialect::JSON_Schema_Draft_4_Hyper || + base_dialect == SchemaBaseDialect::JSON_Schema_Draft_3 || + base_dialect == SchemaBaseDialect::JSON_Schema_Draft_3_Hyper)) { return default_id; } return identifier.to_string(); } -auto sourcemeta::core::anonymize(JSON &schema, const std::string &base_dialect) - -> void { +auto sourcemeta::core::anonymize(JSON &schema, + const SchemaBaseDialect base_dialect) -> void { if (schema.is_object()) { - schema.erase(id_keyword(base_dialect)); + schema.erase(std::string{id_keyword(base_dialect)}); } } -auto sourcemeta::core::reidentify( - JSON &schema, const std::string &new_identifier, - const SchemaResolver &resolver, - const std::optional &default_dialect) -> void { - const auto base_dialect{ +auto sourcemeta::core::reidentify(JSON &schema, std::string_view new_identifier, + const SchemaResolver &resolver, + std::string_view default_dialect) -> void { + const auto resolved_base_dialect{ sourcemeta::core::base_dialect(schema, resolver, default_dialect)}; - if (!base_dialect.has_value()) { + if (!resolved_base_dialect.has_value()) { throw sourcemeta::core::SchemaUnknownBaseDialectError(); } - reidentify(schema, new_identifier, base_dialect.value()); + reidentify(schema, new_identifier, resolved_base_dialect.value()); } -auto sourcemeta::core::reidentify(JSON &schema, - const std::string &new_identifier, - const std::string &base_dialect) -> void { +auto sourcemeta::core::reidentify(JSON &schema, std::string_view new_identifier, + const SchemaBaseDialect base_dialect) + -> void { assert(is_schema(schema)); assert(schema.is_object()); - schema.assign(id_keyword(base_dialect), JSON{new_identifier}); + schema.assign(std::string{id_keyword(base_dialect)}, JSON{new_identifier}); // If we reidentify, and the identifier is still not retrievable, then // we are facing the Draft 7 `$ref` sibling edge case, and we cannot // really continue - if (schema.defines("$ref") && !identify(schema, base_dialect).has_value()) { + if (schema.defines("$ref") && identify(schema, base_dialect).empty()) { throw SchemaReferenceObjectResourceError(new_identifier); } } -auto sourcemeta::core::dialect( - const sourcemeta::core::JSON &schema, - const std::optional &default_dialect) - -> std::optional { +auto sourcemeta::core::dialect(const sourcemeta::core::JSON &schema, + std::string_view default_dialect) + -> std::string_view { assert(sourcemeta::core::is_schema(schema)); if (schema.is_boolean() || !schema.defines("$schema")) { return default_dialect; } - const sourcemeta::core::JSON &dialect{schema.at("$schema")}; - assert(dialect.is_string() && !dialect.empty()); - return dialect.to_string(); + return schema.at("$schema").to_string(); } auto sourcemeta::core::metaschema( const sourcemeta::core::JSON &schema, const sourcemeta::core::SchemaResolver &resolver, - const std::optional &default_dialect) -> JSON { - const auto maybe_dialect{sourcemeta::core::dialect(schema, default_dialect)}; - if (!maybe_dialect.has_value()) { + std::string_view default_dialect) -> JSON { + const auto effective_dialect{ + sourcemeta::core::dialect(schema, default_dialect)}; + if (effective_dialect.empty()) { throw sourcemeta::core::SchemaUnknownDialectError(); } - const auto maybe_metaschema{resolver(maybe_dialect.value())}; + const auto maybe_metaschema{resolver(effective_dialect)}; if (!maybe_metaschema.has_value()) { // Relative meta-schema references are invalid according to the // JSON Schema specifications. They must be absolute ones - const URI effective_dialect_uri{maybe_dialect.value()}; + const URI effective_dialect_uri{effective_dialect}; if (effective_dialect_uri.is_relative()) { throw sourcemeta::core::SchemaRelativeMetaschemaResolutionError( - maybe_dialect.value()); + effective_dialect); } else { throw sourcemeta::core::SchemaResolutionError( - maybe_dialect.value(), - "Could not resolve the metaschema of the schema"); + effective_dialect, "Could not resolve the metaschema of the schema"); } } @@ -190,53 +264,21 @@ auto sourcemeta::core::metaschema( auto sourcemeta::core::base_dialect( const sourcemeta::core::JSON &schema, const sourcemeta::core::SchemaResolver &resolver, - const std::optional &default_dialect) - -> std::optional { + std::string_view default_dialect) -> std::optional { assert(sourcemeta::core::is_schema(schema)); - const std::optional dialect{ + const std::string_view effective_dialect{ sourcemeta::core::dialect(schema, default_dialect)}; // There is no metaschema information whatsoever // Nothing we can do at this point - if (!dialect.has_value()) { + if (effective_dialect.empty()) { return std::nullopt; } - const std::string &effective_dialect{dialect.value()}; - - // As a performance optimization shortcut - if (effective_dialect == "https://json-schema.org/draft/2020-12/schema" || - effective_dialect == "https://json-schema.org/draft/2019-09/schema" || - effective_dialect == "http://json-schema.org/draft-07/schema#" || - effective_dialect == "http://json-schema.org/draft-06/schema#") { - return effective_dialect; - } - - // For compatibility with older JSON Schema drafts that didn't support $id nor - // $vocabulary - if ( - // In Draft 0, 1, and 2, the official metaschema is defined on top of - // the official hyper-schema metaschema. See - // http://json-schema.org/draft-00/schema# - effective_dialect == "http://json-schema.org/draft-00/hyper-schema#" || - effective_dialect == "http://json-schema.org/draft-01/hyper-schema#" || - effective_dialect == "http://json-schema.org/draft-02/hyper-schema#" || - - // Draft 3 and 4 have both schema and hyper-schema dialects - effective_dialect == "http://json-schema.org/draft-03/hyper-schema#" || - effective_dialect == "http://json-schema.org/draft-03/schema#" || - effective_dialect == "http://json-schema.org/draft-04/hyper-schema#" || - effective_dialect == "http://json-schema.org/draft-04/schema#") { - return effective_dialect; - } - - // If we reach the bottom of the metaschema hierarchy, where the schema - // defines itself, then we got to the base dialect - if (schema.is_object() && schema.defines("$id")) { - assert(schema.at("$id").is_string()); - if (schema.at("$id").to_string() == effective_dialect) { - return schema.at("$id").to_string(); - } + // Check for known base dialects + const auto result{to_base_dialect(effective_dialect)}; + if (result.has_value()) { + return result; } // Otherwise, traverse the metaschema hierarchy up @@ -248,32 +290,45 @@ auto sourcemeta::core::base_dialect( const URI effective_dialect_uri{effective_dialect}; if (effective_dialect_uri.is_relative()) { throw sourcemeta::core::SchemaRelativeMetaschemaResolutionError( - effective_dialect); + std::string{effective_dialect}); } else { throw sourcemeta::core::SchemaResolutionError( - effective_dialect, "Could not resolve the metaschema of the schema"); + std::string{effective_dialect}, + "Could not resolve the metaschema of the schema"); } } + // If the metaschema declares the same dialect (self-descriptive), and it's + // not an official dialect, we cannot determine the base dialect + const std::string_view metaschema_dialect{ + dialect(metaschema.value(), effective_dialect)}; + if (metaschema_dialect == effective_dialect) { + throw sourcemeta::core::SchemaUnknownBaseDialectError(); + } + return base_dialect(metaschema.value(), resolver, effective_dialect); } namespace { -auto core_vocabulary_known(std::string_view base_dialect) +auto core_vocabulary_known( + const sourcemeta::core::SchemaBaseDialect base_dialect) -> sourcemeta::core::Vocabularies::Known { - if (base_dialect == "https://json-schema.org/draft/2020-12/schema" || - base_dialect == "https://json-schema.org/draft/2020-12/hyper-schema") { - return sourcemeta::core::Vocabularies::Known::JSON_Schema_2020_12_Core; - } else if (base_dialect == "https://json-schema.org/draft/2019-09/schema" || - base_dialect == - "https://json-schema.org/draft/2019-09/hyper-schema") { - return sourcemeta::core::Vocabularies::Known::JSON_Schema_2019_09_Core; - } else { - throw sourcemeta::core::SchemaBaseDialectError(std::string{base_dialect}); + using sourcemeta::core::SchemaBaseDialect; + using sourcemeta::core::Vocabularies; + switch (base_dialect) { + case SchemaBaseDialect::JSON_Schema_2020_12: + case SchemaBaseDialect::JSON_Schema_2020_12_Hyper: + return Vocabularies::Known::JSON_Schema_2020_12_Core; + case SchemaBaseDialect::JSON_Schema_2019_09: + case SchemaBaseDialect::JSON_Schema_2019_09_Hyper: + return Vocabularies::Known::JSON_Schema_2019_09_Core; + default: + assert(false); + return Vocabularies::Known::JSON_Schema_2020_12_Core; } } -auto dialect_to_known(std::string_view dialect) +auto dialect_to_known(const std::string_view dialect) -> std::optional { using sourcemeta::core::Vocabularies; if (dialect == "http://json-schema.org/draft-07/schema#") { @@ -320,39 +375,94 @@ auto dialect_to_known(std::string_view dialect) } return std::nullopt; } + +auto base_dialect_to_known(const sourcemeta::core::SchemaBaseDialect dialect) + -> sourcemeta::core::Vocabularies::Known { + using sourcemeta::core::SchemaBaseDialect; + using sourcemeta::core::Vocabularies; + switch (dialect) { + case SchemaBaseDialect::JSON_Schema_Draft_7: + return Vocabularies::Known::JSON_Schema_Draft_7; + case SchemaBaseDialect::JSON_Schema_Draft_7_Hyper: + return Vocabularies::Known::JSON_Schema_Draft_7_Hyper; + case SchemaBaseDialect::JSON_Schema_Draft_6: + return Vocabularies::Known::JSON_Schema_Draft_6; + case SchemaBaseDialect::JSON_Schema_Draft_6_Hyper: + return Vocabularies::Known::JSON_Schema_Draft_6_Hyper; + case SchemaBaseDialect::JSON_Schema_Draft_4: + return Vocabularies::Known::JSON_Schema_Draft_4; + case SchemaBaseDialect::JSON_Schema_Draft_4_Hyper: + return Vocabularies::Known::JSON_Schema_Draft_4_Hyper; + case SchemaBaseDialect::JSON_Schema_Draft_3: + return Vocabularies::Known::JSON_Schema_Draft_3; + case SchemaBaseDialect::JSON_Schema_Draft_3_Hyper: + return Vocabularies::Known::JSON_Schema_Draft_3_Hyper; + case SchemaBaseDialect::JSON_Schema_Draft_2_Hyper: + return Vocabularies::Known::JSON_Schema_Draft_2_Hyper; + case SchemaBaseDialect::JSON_Schema_Draft_1_Hyper: + return Vocabularies::Known::JSON_Schema_Draft_1_Hyper; + case SchemaBaseDialect::JSON_Schema_Draft_0_Hyper: + return Vocabularies::Known::JSON_Schema_Draft_0_Hyper; + default: + assert(false); + return Vocabularies::Known::JSON_Schema_Draft_7; + } +} + +auto is_pre_vocabulary_base_dialect( + const sourcemeta::core::SchemaBaseDialect base_dialect) -> bool { + using sourcemeta::core::SchemaBaseDialect; + switch (base_dialect) { + case SchemaBaseDialect::JSON_Schema_Draft_7: + case SchemaBaseDialect::JSON_Schema_Draft_7_Hyper: + case SchemaBaseDialect::JSON_Schema_Draft_6: + case SchemaBaseDialect::JSON_Schema_Draft_6_Hyper: + case SchemaBaseDialect::JSON_Schema_Draft_4: + case SchemaBaseDialect::JSON_Schema_Draft_4_Hyper: + case SchemaBaseDialect::JSON_Schema_Draft_3: + case SchemaBaseDialect::JSON_Schema_Draft_3_Hyper: + case SchemaBaseDialect::JSON_Schema_Draft_2_Hyper: + case SchemaBaseDialect::JSON_Schema_Draft_1_Hyper: + case SchemaBaseDialect::JSON_Schema_Draft_0_Hyper: + return true; + default: + return false; + } +} } // namespace auto sourcemeta::core::vocabularies( const sourcemeta::core::JSON &schema, const sourcemeta::core::SchemaResolver &resolver, - const std::optional &default_dialect) - -> sourcemeta::core::Vocabularies { - const std::optional maybe_base_dialect{ + std::string_view default_dialect) -> sourcemeta::core::Vocabularies { + const auto resolved_base_dialect{ sourcemeta::core::base_dialect(schema, resolver, default_dialect)}; - if (!maybe_base_dialect.has_value()) { + if (!resolved_base_dialect.has_value()) { throw sourcemeta::core::SchemaUnknownBaseDialectError(); } - const std::optional maybe_dialect{ + const std::string_view resolved_dialect{ sourcemeta::core::dialect(schema, default_dialect)}; - if (!maybe_dialect.has_value()) { + if (resolved_dialect.empty()) { // If the schema has no declared metaschema and the user didn't // provide a explicit default, then we cannot do anything. // Better to abort instead of trying to guess. throw sourcemeta::core::SchemaUnknownDialectError(); } - return vocabularies(resolver, maybe_base_dialect.value(), - maybe_dialect.value()); + return vocabularies(resolver, resolved_base_dialect.value(), + resolved_dialect); } auto sourcemeta::core::vocabularies(const SchemaResolver &resolver, - const std::string &base_dialect, - const std::string &dialect) + const SchemaBaseDialect base_dialect, + std::string_view dialect) -> sourcemeta::core::Vocabularies { + const auto base_dialect_string{to_string(base_dialect)}; + // As a performance optimization shortcut - if (base_dialect == dialect) { - if (dialect == "https://json-schema.org/draft/2020-12/schema") { + if (base_dialect_string == dialect) { + if (base_dialect == SchemaBaseDialect::JSON_Schema_2020_12) { return Vocabularies{ {Vocabularies::Known::JSON_Schema_2020_12_Core, true}, {Vocabularies::Known::JSON_Schema_2020_12_Applicator, true}, @@ -361,7 +471,7 @@ auto sourcemeta::core::vocabularies(const SchemaResolver &resolver, {Vocabularies::Known::JSON_Schema_2020_12_Meta_Data, true}, {Vocabularies::Known::JSON_Schema_2020_12_Format_Annotation, true}, {Vocabularies::Known::JSON_Schema_2020_12_Content, true}}; - } else if (dialect == "https://json-schema.org/draft/2019-09/schema") { + } else if (base_dialect == SchemaBaseDialect::JSON_Schema_2019_09) { return Vocabularies{ {Vocabularies::Known::JSON_Schema_2019_09_Core, true}, {Vocabularies::Known::JSON_Schema_2019_09_Applicator, true}, @@ -390,7 +500,7 @@ auto sourcemeta::core::vocabularies(const SchemaResolver &resolver, if (known.has_value()) { return Vocabularies{{known.value(), true}}; } - return Vocabularies{{dialect, true}}; + return Vocabularies{{std::string{dialect}, true}}; } /* @@ -398,23 +508,8 @@ auto sourcemeta::core::vocabularies(const SchemaResolver &resolver, * base dialect itself is conceptually the only vocabulary */ - // This is an exhaustive list of all base dialects in the pre-vocabulary world - if (base_dialect == "http://json-schema.org/draft-07/schema#" || - base_dialect == "http://json-schema.org/draft-07/hyper-schema#" || - base_dialect == "http://json-schema.org/draft-06/schema#" || - base_dialect == "http://json-schema.org/draft-06/hyper-schema#" || - base_dialect == "http://json-schema.org/draft-04/schema#" || - base_dialect == "http://json-schema.org/draft-04/hyper-schema#" || - base_dialect == "http://json-schema.org/draft-03/schema#" || - base_dialect == "http://json-schema.org/draft-03/hyper-schema#" || - base_dialect == "http://json-schema.org/draft-02/hyper-schema#" || - base_dialect == "http://json-schema.org/draft-01/hyper-schema#" || - base_dialect == "http://json-schema.org/draft-00/hyper-schema#") { - const auto known = dialect_to_known(base_dialect); - if (known.has_value()) { - return Vocabularies{{known.value(), true}}; - } - return Vocabularies{{base_dialect, true}}; + if (is_pre_vocabulary_base_dialect(base_dialect)) { + return Vocabularies{{base_dialect_to_known(base_dialect), true}}; } /* @@ -425,7 +520,7 @@ auto sourcemeta::core::vocabularies(const SchemaResolver &resolver, resolver(dialect)}; if (!maybe_schema_dialect.has_value()) { throw sourcemeta::core::SchemaResolutionError( - dialect, "Could not resolve the metaschema of the schema"); + std::string{dialect}, "Could not resolve the metaschema of the schema"); } const sourcemeta::core::JSON &schema_dialect{maybe_schema_dialect.value()}; // At this point we are sure that the dialect is vocabulary aware and the @@ -493,7 +588,7 @@ auto sourcemeta::core::schema_keyword_priority( return std::max(priority_from_dependencies, priority_from_order_dependencies); } -auto sourcemeta::core::wrap(const sourcemeta::core::JSON::String &identifier) +auto sourcemeta::core::wrap(const std::string_view identifier) -> sourcemeta::core::JSON { auto result{JSON::make_object()}; // JSON Schema 2020-12 is the first dialect that truly supports cross-dialect @@ -507,7 +602,7 @@ auto sourcemeta::core::wrap(const sourcemeta::core::JSON::String &identifier) auto sourcemeta::core::wrap(const sourcemeta::core::JSON &schema, const sourcemeta::core::Pointer &pointer, const sourcemeta::core::SchemaResolver &resolver, - const std::optional &default_dialect) + std::string_view default_dialect) -> sourcemeta::core::JSON { assert(try_get(schema, pointer)); if (pointer.empty()) { @@ -516,8 +611,8 @@ auto sourcemeta::core::wrap(const sourcemeta::core::JSON &schema, auto copy = schema; const auto effective_dialect{dialect(copy, default_dialect)}; - if (effective_dialect.has_value()) { - copy.assign("$schema", JSON{effective_dialect.value()}); + if (!effective_dialect.empty()) { + copy.assign("$schema", JSON{effective_dialect}); } else { throw SchemaUnknownBaseDialectError(); } @@ -535,9 +630,11 @@ auto sourcemeta::core::wrap(const sourcemeta::core::JSON &schema, // However, note that we use a relative URI so that references to // other schemas whose top-level identifiers are relative URIs don't // get affected. Otherwise, we would cause unintended base resolution. - constexpr auto WRAPPER_IDENTIFIER{"__sourcemeta-core-wrap__"}; - const auto id{ - identify(copy, resolver, default_dialect).value_or(WRAPPER_IDENTIFIER)}; + constexpr std::string_view WRAPPER_IDENTIFIER{"__sourcemeta-core-wrap__"}; + const auto maybe_id{identify(copy, resolver, default_dialect)}; + const auto id{maybe_id.empty() ? WRAPPER_IDENTIFIER : maybe_id}; + + URI uri{id}; try { reidentify(copy, id, resolver, default_dialect); @@ -555,7 +652,6 @@ auto sourcemeta::core::wrap(const sourcemeta::core::JSON &schema, result.at("$defs").assign_assume_new("schema", std::move(copy)); // Add a reference to the schema - URI uri{id}; if (!uri.fragment().has_value() || uri.fragment().value().empty()) { uri.fragment(to_string(pointer)); result.assign_assume_new("$ref", JSON{uri.recompose()}); diff --git a/vendor/core/src/core/jsonschema/known_resolver.in.cc b/vendor/core/src/core/jsonschema/known_resolver.in.cc index 0c6bdcfd..3cf290cc 100644 --- a/vendor/core/src/core/jsonschema/known_resolver.in.cc +++ b/vendor/core/src/core/jsonschema/known_resolver.in.cc @@ -1,6 +1,6 @@ #include -auto sourcemeta::core::schema_resolver(std::string_view identifier) +auto sourcemeta::core::schema_resolver(const std::string_view identifier) -> std::optional { // JSON Schema 2020-12 if (identifier == "https://json-schema.org/draft/2020-12/schema" || diff --git a/vendor/core/src/core/jsonschema/known_walker.cc b/vendor/core/src/core/jsonschema/known_walker.cc index 88e19770..a0157806 100644 --- a/vendor/core/src/core/jsonschema/known_walker.cc +++ b/vendor/core/src/core/jsonschema/known_walker.cc @@ -2194,7 +2194,8 @@ auto handle_example(const Vocabularies &vocabularies) } // anonymous namespace -auto schema_walker(std::string_view keyword, const Vocabularies &vocabularies) +auto schema_walker(const std::string_view keyword, + const Vocabularies &vocabularies) -> const SchemaWalkerResult & { // TODO: Make use of JSON key's perfect hashes, as we mostly run the walker by // checking JSON property names diff --git a/vendor/core/src/core/jsonschema/transformer.cc b/vendor/core/src/core/jsonschema/transformer.cc index 32287b52..a46e5e84 100644 --- a/vendor/core/src/core/jsonschema/transformer.cc +++ b/vendor/core/src/core/jsonschema/transformer.cc @@ -3,14 +3,26 @@ #include // std::erase_if #include // assert -#include // std::set +#include // std::hash #include // std::ostringstream #include // std::tuple #include // std::unordered_set #include // std::move, std::pair +#include // std::vector namespace { +struct ProcessedRuleHasher { + auto + operator()(const std::tuple &value) const noexcept + -> std::size_t { + return std::hash{}(std::get<0>(value)) ^ + (std::hash{}(std::get<1>(value)) << 1) ^ + (std::hash{}(std::get<2>(value)) << 2); + } +}; + auto calculate_health_percentage(const std::size_t subschemas, const std::size_t failed_subschemas) -> std::uint8_t { @@ -28,20 +40,20 @@ auto calculate_health_percentage(const std::size_t subschemas, namespace sourcemeta::core { -SchemaTransformRule::SchemaTransformRule(std::string &&name, - std::string &&message) - : name_{std::move(name)}, message_{std::move(message)} {} +SchemaTransformRule::SchemaTransformRule(const std::string_view name, + const std::string_view message) + : name_{name}, message_{message} {} auto SchemaTransformRule::operator==(const SchemaTransformRule &other) const -> bool { return this->name() == other.name(); } -auto SchemaTransformRule::name() const -> const std::string & { +auto SchemaTransformRule::name() const noexcept -> std::string_view { return this->name_; } -auto SchemaTransformRule::message() const -> const std::string & { +auto SchemaTransformRule::message() const noexcept -> std::string_view { return this->message_; } @@ -49,47 +61,13 @@ auto SchemaTransformRule::transform(JSON &, const Result &) const -> void { throw SchemaAbortError("This rule cannot be automatically transformed"); } -auto SchemaTransformRule::rereference(const std::string &reference, +auto SchemaTransformRule::rereference(const std::string_view reference, const Pointer &origin, const Pointer &, const Pointer &) const -> Pointer { throw SchemaBrokenReferenceError(reference, origin, "The reference broke after transformation"); } -auto SchemaTransformRule::apply(JSON &schema, const JSON &root, - const Vocabularies &vocabularies, - const SchemaWalker &walker, - const SchemaResolver &resolver, - const SchemaFrame &frame, - const SchemaFrame::Location &location) const - -> std::pair { - auto outcome{this->condition(schema, root, vocabularies, frame, location, - walker, resolver)}; - if (!outcome.applies) { - return {true, std::move(outcome)}; - } - - try { - this->transform(schema, outcome); - } catch (const SchemaAbortError &) { - return {false, std::move(outcome)}; - } - - // The condition must always be false after applying the - // transformation in order to avoid infinite loops - if (this->condition(schema, root, vocabularies, frame, location, walker, - resolver) - .applies) { - // TODO: Throw a better custom error that also highlights the schema - // location - std::ostringstream error; - error << "Rule condition holds after application: " << this->name(); - throw std::runtime_error(error.str()); - } - - return {true, std::move(outcome)}; -} - auto SchemaTransformRule::check(const JSON &schema, const JSON &root, const Vocabularies &vocabularies, const SchemaWalker &walker, @@ -101,18 +79,17 @@ auto SchemaTransformRule::check(const JSON &schema, const JSON &root, resolver); } -auto SchemaTransformer::check( - const JSON &schema, const SchemaWalker &walker, - const SchemaResolver &resolver, const SchemaTransformer::Callback &callback, - const std::optional &default_dialect, - const std::optional &default_id) const +auto SchemaTransformer::check(const JSON &schema, const SchemaWalker &walker, + const SchemaResolver &resolver, + const SchemaTransformer::Callback &callback, + std::string_view default_dialect, + std::string_view default_id) const -> std::pair { - SchemaFrame frame{SchemaFrame::Mode::Instances}; + SchemaFrame frame{SchemaFrame::Mode::References}; // If we use the default id when there is already one, framing will duplicate // the locations leading to duplicate check reports - if (sourcemeta::core::identify(schema, resolver, default_dialect) - .has_value()) { + if (!sourcemeta::core::identify(schema, resolver, default_dialect).empty()) { frame.analyse(schema, walker, resolver, default_dialect); } else { frame.analyse(schema, walker, resolver, default_dialect, default_id); @@ -158,28 +135,43 @@ auto SchemaTransformer::check( calculate_health_percentage(subschema_count, subschema_failures)}; } -auto SchemaTransformer::apply( - JSON &schema, const SchemaWalker &walker, const SchemaResolver &resolver, - const SchemaTransformer::Callback &callback, - const std::optional &default_dialect, - const std::optional &default_id) const +auto SchemaTransformer::apply(JSON &schema, const SchemaWalker &walker, + const SchemaResolver &resolver, + const SchemaTransformer::Callback &callback, + std::string_view default_dialect, + std::string_view default_id) const -> std::pair { - // There is no point in applying an empty bundle assert(!this->rules.empty()); - std::set> + std::unordered_set, + ProcessedRuleHasher> processed_rules; bool result{true}; std::size_t subschema_count{0}; std::size_t subschema_failures{0}; + + SchemaFrame frame{SchemaFrame::Mode::References}; + + struct PotentiallyBrokenReference { + Pointer origin; + JSON::String original; + JSON::String destination; + Pointer target_pointer; + std::size_t target_relative_pointer; + }; + + std::vector potentially_broken_references; + while (true) { - SchemaFrame frame{SchemaFrame::Mode::Instances}; - frame.analyse(schema, walker, resolver, default_dialect, default_id); - std::unordered_set visited; + if (frame.empty()) { + frame.analyse(schema, walker, resolver, default_dialect, default_id); + } + std::unordered_set visited; bool applied{false}; subschema_count = 0; subschema_failures = 0; + for (const auto &entry : frame.locations()) { if (entry.second.type != SchemaFrame::LocationType::Resource && entry.second.type != SchemaFrame::LocationType::Subschema) { @@ -200,70 +192,116 @@ auto SchemaTransformer::apply( bool subschema_failed{false}; for (const auto &rule : this->rules) { - const auto subresult{rule->apply(current, schema, current_vocabularies, - walker, resolver, frame, - entry.second)}; - // This means the rule is fixable - if (subresult.first) { - applied = subresult.second.applies || applied; - } else { - result = false; - subschema_failed = true; - callback(entry.second.pointer, rule->name(), rule->message(), - subresult.second); - } + auto outcome{rule->condition(current, schema, current_vocabularies, + frame, entry.second, walker, resolver)}; - if (!applied) { + if (!outcome.applies) { continue; } - std::tuple mark{ - ¤t, &rule->name(), - // Allow applying the same rule to the same location if the schema - // has changed, which means we are still "making progress". The - // hashing is not perfect, but its enough - current.fast_hash()}; - if (processed_rules.contains(mark)) { - throw SchemaTransformRuleProcessedTwiceError(rule->name(), - entry.second.pointer); - } + // Store data we need before invalidating the frame + const auto transformed_pointer{entry.second.pointer}; + const auto transformed_relative_pointer{entry.second.relative_pointer}; - // Identify and try to address broken references, if any + // Collect reference information BEFORE invalidating the frame. + // We need to save this data because after the transform, the old + // frame's views may point to invalid memory, and a new frame won't + // have location entries for paths that no longer exist. + potentially_broken_references.clear(); for (const auto &reference : frame.references()) { const auto destination{frame.traverse(reference.second.destination)}; if (!destination.has_value() || - // We only care about references with JSON Pointer fragments, - // as these are the only cases, by definition, where the target - // is location-dependent. !reference.second.fragment.has_value() || !reference.second.fragment.value().starts_with('/')) { continue; } const auto &target{destination.value().get()}; + potentially_broken_references.push_back( + {reference.first.second, JSON::String{reference.second.original}, + reference.second.destination, target.pointer, + target.relative_pointer}); + } + + try { + rule->transform(current, outcome); + } catch (const SchemaAbortError &) { + result = false; + subschema_failed = true; + callback(transformed_pointer, rule->name(), rule->message(), outcome); + continue; + } + + applied = true; + + frame.analyse(schema, walker, resolver, default_dialect, default_id); + + const auto new_location{frame.traverse(transformed_pointer)}; + // The location should still exist after transform + assert(new_location.has_value()); + + // Get vocabularies from the new frame + const auto new_vocabularies{ + frame.vocabularies(new_location.value().get(), resolver)}; + + // The condition must always be false after applying the + // transformation in order to avoid infinite loops + if (rule->condition(current, schema, new_vocabularies, frame, + new_location.value().get(), walker, resolver) + .applies) { + std::ostringstream error; + error << "Rule condition holds after application: " << rule->name(); + throw std::runtime_error(error.str()); + } + + // Identify and fix broken references using the saved data + bool references_fixed{false}; + for (const auto &saved_reference : potentially_broken_references) { // The destination still exists, so we don't have to do anything - if (try_get(schema, target.pointer)) { + if (try_get(schema, saved_reference.target_pointer)) { continue; } // If the source no longer exists, we don't need to fix the reference - if (!try_get(schema, reference.first.second.initial())) { + if (!try_get(schema, saved_reference.origin.initial())) { continue; } const auto new_fragment{rule->rereference( - reference.second.destination, reference.first.second, - target.relative_pointer, entry.second.relative_pointer)}; + saved_reference.destination, saved_reference.origin, + saved_reference.target_pointer.slice( + saved_reference.target_relative_pointer), + transformed_pointer.slice(transformed_relative_pointer))}; // Note we use the base from the original reference before any // canonicalisation takes place so that we don't overly change // user's references when only fixing up their pointer fragments - URI original{reference.second.original}; + URI original{saved_reference.original}; original.fragment(to_string(new_fragment)); - set(schema, reference.first.second, JSON{original.recompose()}); + set(schema, saved_reference.origin, JSON{original.recompose()}); + references_fixed = true; + } + + std::tuple mark{ + ¤t, rule->name(), + // Allow applying the same rule to the same location if the schema + // has changed, which means we are still "making progress". The + // hashing is not perfect, but its enough + current.fast_hash()}; + if (processed_rules.contains(mark)) { + throw SchemaTransformRuleProcessedTwiceError(rule->name(), + transformed_pointer); } processed_rules.emplace(std::move(mark)); + + // If we fixed references, the schema changed again, so we need to + // invalidate the frame. Otherwise, we can reuse it for the next + // iteration. + if (references_fixed) { + frame.reset(); + } + goto core_transformer_start_again; } @@ -282,7 +320,7 @@ auto SchemaTransformer::apply( calculate_health_percentage(subschema_count, subschema_failures)}; } -auto SchemaTransformer::remove(const std::string &name) -> bool { +auto SchemaTransformer::remove(const std::string_view name) -> bool { return std::erase_if(this->rules, [&name](const auto &rule) { return rule->name() == name; }) > 0; diff --git a/vendor/core/src/core/jsonschema/vocabularies.cc b/vendor/core/src/core/jsonschema/vocabularies.cc index 4bc2e2f2..1759ecb7 100644 --- a/vendor/core/src/core/jsonschema/vocabularies.cc +++ b/vendor/core/src/core/jsonschema/vocabularies.cc @@ -71,7 +71,7 @@ X(OpenAPI_3_2_Base, "https://spec.openapis.org/oas/3.2/vocab/base") namespace { -auto uri_to_known_vocabulary(std::string_view uri) +auto uri_to_known_vocabulary(const std::string_view uri) -> std::optional { using sourcemeta::core::Vocabularies; diff --git a/vendor/core/src/core/jsonschema/walker.cc b/vendor/core/src/core/jsonschema/walker.cc index 67353224..1ae0b372 100644 --- a/vendor/core/src/core/jsonschema/walker.cc +++ b/vendor/core/src/core/jsonschema/walker.cc @@ -6,30 +6,36 @@ namespace { enum class SchemaWalkerType_t : std::uint8_t { Deep, Flat }; -auto ref_overrides_adjacent_keywords(const std::string &base_dialect) -> bool { +auto ref_overrides_adjacent_keywords( + const sourcemeta::core::SchemaBaseDialect base_dialect) -> bool { + using sourcemeta::core::SchemaBaseDialect; // In older drafts, the presence of `$ref` would override any sibling // keywords // See // https://json-schema.org/draft-07/draft-handrews-json-schema-01#rfc.section.8.3 - return base_dialect == "http://json-schema.org/draft-07/schema#" || - base_dialect == "http://json-schema.org/draft-07/hyper-schema#" || - base_dialect == "http://json-schema.org/draft-06/schema#" || - base_dialect == "http://json-schema.org/draft-06/hyper-schema#" || - base_dialect == "http://json-schema.org/draft-04/schema#" || - base_dialect == "http://json-schema.org/draft-04/hyper-schema#" || - base_dialect == "http://json-schema.org/draft-03/schema#" || - base_dialect == "http://json-schema.org/draft-03/hyper-schema#"; + switch (base_dialect) { + case SchemaBaseDialect::JSON_Schema_Draft_7: + case SchemaBaseDialect::JSON_Schema_Draft_7_Hyper: + case SchemaBaseDialect::JSON_Schema_Draft_6: + case SchemaBaseDialect::JSON_Schema_Draft_6_Hyper: + case SchemaBaseDialect::JSON_Schema_Draft_4: + case SchemaBaseDialect::JSON_Schema_Draft_4_Hyper: + case SchemaBaseDialect::JSON_Schema_Draft_3: + case SchemaBaseDialect::JSON_Schema_Draft_3_Hyper: + return true; + default: + return false; + } } -auto walk(const std::optional &parent, - const sourcemeta::core::Pointer &pointer, - const sourcemeta::core::PointerTemplate &instance_location, - const sourcemeta::core::PointerTemplate &relative_instance_location, +auto walk(const std::optional &parent, + const sourcemeta::core::WeakPointer &pointer, std::vector &subschemas, const sourcemeta::core::JSON &subschema, const sourcemeta::core::SchemaWalker &walker, const sourcemeta::core::SchemaResolver &resolver, - const std::string &dialect, const std::string &base_dialect, + const std::string_view dialect, + const sourcemeta::core::SchemaBaseDialect base_dialect, const SchemaWalkerType_t type, const std::size_t level, const bool orphan) -> void { if (!is_schema(subschema)) { @@ -50,43 +56,43 @@ auto walk(const std::optional &parent, // enough information to detect those cases and throw an error if they desire // to be more strict. auto maybe_current_dialect{sourcemeta::core::dialect(subschema, dialect)}; - assert(maybe_current_dialect.has_value()); + assert(!maybe_current_dialect.empty()); // TODO: Note that we determine the identifier here, but the framing does it // all over again. Maybe we should be storing this instead? auto id{ sourcemeta::core::identify(subschema, resolver, maybe_current_dialect)}; - const auto different_parent_dialect{maybe_current_dialect.value() != dialect}; - if (!id.has_value() && different_parent_dialect) { + const auto different_parent_dialect{maybe_current_dialect != dialect}; + if (id.empty() && different_parent_dialect) { id = sourcemeta::core::identify(subschema, base_dialect); - if (id.has_value()) { + if (!id.empty()) { maybe_current_dialect = dialect; } } - const auto is_schema_resource{level == 0 || id.has_value()}; - const auto ¤t_dialect{is_schema_resource ? maybe_current_dialect.value() - : dialect}; - auto current_base_dialect{ + const auto is_schema_resource{level == 0 || !id.empty()}; + const std::string_view current_dialect{ + is_schema_resource ? maybe_current_dialect : dialect}; + const auto maybe_resolved_base_dialect{ is_schema_resource && current_dialect != dialect ? sourcemeta::core::base_dialect(subschema, resolver, current_dialect) - .value_or(base_dialect) - : base_dialect}; + : std::nullopt}; + const auto current_base_dialect{maybe_resolved_base_dialect.has_value() + ? maybe_resolved_base_dialect.value() + : base_dialect}; const auto vocabularies{sourcemeta::core::vocabularies( resolver, current_base_dialect, current_dialect)}; if (type == SchemaWalkerType_t::Deep || level > 0) { - sourcemeta::core::SchemaIteratorEntry entry{ - .parent = parent, - .pointer = pointer, - .dialect = current_dialect, - .vocabularies = vocabularies, - .base_dialect = current_base_dialect, - .subschema = subschema, - .instance_location = instance_location, - .relative_instance_location = relative_instance_location, - .orphan = orphan}; + sourcemeta::core::SchemaIteratorEntry entry{.parent = parent, + .pointer = pointer, + .dialect = current_dialect, + .vocabularies = vocabularies, + .base_dialect = + current_base_dialect, + .subschema = subschema, + .orphan = orphan}; subschemas.push_back(std::move(entry)); } @@ -114,114 +120,71 @@ auto walk(const std::optional &parent, switch (keyword_info.type) { case sourcemeta::core::SchemaKeywordType:: ApplicatorValueTraverseSomeProperty: { - sourcemeta::core::Pointer new_pointer{pointer}; - new_pointer.emplace_back(pair.first); - auto new_instance_location{instance_location}; - new_instance_location.emplace_back( - sourcemeta::core::PointerTemplate::Condition{pair.first}); - new_instance_location.emplace_back( - sourcemeta::core::PointerTemplate::Wildcard::Property); - walk(pointer, new_pointer, new_instance_location, - {sourcemeta::core::PointerTemplate::Condition{pair.first}, - sourcemeta::core::PointerTemplate::Wildcard::Property}, - subschemas, pair.second, walker, resolver, current_dialect, - current_base_dialect, type, level + 1, orphan); + sourcemeta::core::WeakPointer new_pointer{pointer}; + new_pointer.push_back(std::cref(pair.first)); + walk(pointer, new_pointer, subschemas, pair.second, walker, resolver, + current_dialect, current_base_dialect, type, level + 1, orphan); } break; case sourcemeta::core::SchemaKeywordType:: ApplicatorValueTraverseAnyPropertyKey: { - sourcemeta::core::Pointer new_pointer{pointer}; - new_pointer.emplace_back(pair.first); - auto new_instance_location{instance_location}; - new_instance_location.emplace_back( - sourcemeta::core::PointerTemplate::Wildcard::Key); - walk(pointer, new_pointer, new_instance_location, - {sourcemeta::core::PointerTemplate::Wildcard::Key}, subschemas, - pair.second, walker, resolver, current_dialect, - current_base_dialect, type, level + 1, orphan); + sourcemeta::core::WeakPointer new_pointer{pointer}; + new_pointer.push_back(std::cref(pair.first)); + walk(pointer, new_pointer, subschemas, pair.second, walker, resolver, + current_dialect, current_base_dialect, type, level + 1, orphan); } break; case sourcemeta::core::SchemaKeywordType:: ApplicatorValueTraverseAnyItem: { - sourcemeta::core::Pointer new_pointer{pointer}; - new_pointer.emplace_back(pair.first); - auto new_instance_location{instance_location}; - new_instance_location.emplace_back( - sourcemeta::core::PointerTemplate::Wildcard::Item); - walk(pointer, new_pointer, new_instance_location, - {sourcemeta::core::PointerTemplate::Wildcard::Item}, subschemas, - pair.second, walker, resolver, current_dialect, - current_base_dialect, type, level + 1, orphan); + sourcemeta::core::WeakPointer new_pointer{pointer}; + new_pointer.push_back(std::cref(pair.first)); + walk(pointer, new_pointer, subschemas, pair.second, walker, resolver, + current_dialect, current_base_dialect, type, level + 1, orphan); } break; case sourcemeta::core::SchemaKeywordType:: ApplicatorValueTraverseSomeItem: { - sourcemeta::core::Pointer new_pointer{pointer}; - new_pointer.emplace_back(pair.first); - auto new_instance_location{instance_location}; - new_instance_location.emplace_back( - sourcemeta::core::PointerTemplate::Condition{pair.first}); - new_instance_location.emplace_back( - sourcemeta::core::PointerTemplate::Wildcard::Item); - walk(pointer, new_pointer, new_instance_location, - {sourcemeta::core::PointerTemplate::Condition{pair.first}, - sourcemeta::core::PointerTemplate::Wildcard::Item}, - subschemas, pair.second, walker, resolver, current_dialect, - current_base_dialect, type, level + 1, orphan); + sourcemeta::core::WeakPointer new_pointer{pointer}; + new_pointer.push_back(std::cref(pair.first)); + walk(pointer, new_pointer, subschemas, pair.second, walker, resolver, + current_dialect, current_base_dialect, type, level + 1, orphan); } break; case sourcemeta::core::SchemaKeywordType::ApplicatorValueTraverseParent: { - sourcemeta::core::Pointer new_pointer{pointer}; - new_pointer.emplace_back(pair.first); - auto new_instance_location{instance_location}; - new_instance_location.pop_back(); - walk(pointer, new_pointer, new_instance_location, {}, subschemas, - pair.second, walker, resolver, current_dialect, - current_base_dialect, type, level + 1, orphan); + sourcemeta::core::WeakPointer new_pointer{pointer}; + new_pointer.push_back(std::cref(pair.first)); + walk(pointer, new_pointer, subschemas, pair.second, walker, resolver, + current_dialect, current_base_dialect, type, level + 1, orphan); } break; case sourcemeta::core::SchemaKeywordType::ApplicatorValueInPlaceOther: { - sourcemeta::core::Pointer new_pointer{pointer}; - new_pointer.emplace_back(pair.first); - walk(pointer, new_pointer, instance_location, {}, subschemas, - pair.second, walker, resolver, current_dialect, - current_base_dialect, type, level + 1, orphan); + sourcemeta::core::WeakPointer new_pointer{pointer}; + new_pointer.push_back(std::cref(pair.first)); + walk(pointer, new_pointer, subschemas, pair.second, walker, resolver, + current_dialect, current_base_dialect, type, level + 1, orphan); } break; case sourcemeta::core::SchemaKeywordType::ApplicatorValueInPlaceNegate: { - sourcemeta::core::Pointer new_pointer{pointer}; - new_pointer.emplace_back(pair.first); - auto new_instance_location{instance_location}; - new_instance_location.emplace_back( - sourcemeta::core::PointerTemplate::Negation{}); - walk(pointer, new_pointer, new_instance_location, - {sourcemeta::core::PointerTemplate::Negation{}}, subschemas, - pair.second, walker, resolver, current_dialect, - current_base_dialect, type, level + 1, orphan); + sourcemeta::core::WeakPointer new_pointer{pointer}; + new_pointer.push_back(std::cref(pair.first)); + walk(pointer, new_pointer, subschemas, pair.second, walker, resolver, + current_dialect, current_base_dialect, type, level + 1, orphan); } break; case sourcemeta::core::SchemaKeywordType::ApplicatorValueInPlaceMaybe: { - sourcemeta::core::Pointer new_pointer{pointer}; - new_pointer.emplace_back(pair.first); - auto new_instance_location{instance_location}; - new_instance_location.emplace_back( - sourcemeta::core::PointerTemplate::Condition{pair.first}); - walk(pointer, new_pointer, new_instance_location, - {sourcemeta::core::PointerTemplate::Condition{pair.first}}, - subschemas, pair.second, walker, resolver, current_dialect, - current_base_dialect, type, level + 1, orphan); + sourcemeta::core::WeakPointer new_pointer{pointer}; + new_pointer.push_back(std::cref(pair.first)); + walk(pointer, new_pointer, subschemas, pair.second, walker, resolver, + current_dialect, current_base_dialect, type, level + 1, orphan); } break; case sourcemeta::core::SchemaKeywordType::ApplicatorElementsTraverseItem: if (pair.second.is_array()) { for (std::size_t index = 0; index < pair.second.size(); index++) { - sourcemeta::core::Pointer new_pointer{pointer}; - new_pointer.emplace_back(pair.first); + sourcemeta::core::WeakPointer new_pointer{pointer}; + new_pointer.push_back(std::cref(pair.first)); new_pointer.emplace_back(index); - auto new_instance_location{instance_location}; - new_instance_location.emplace_back(new_pointer.back()); - walk(pointer, new_pointer, new_instance_location, - {new_pointer.back()}, subschemas, pair.second.at(index), + walk(pointer, new_pointer, subschemas, pair.second.at(index), walker, resolver, current_dialect, current_base_dialect, type, level + 1, orphan); } @@ -232,12 +195,12 @@ auto walk(const std::optional &parent, case sourcemeta::core::SchemaKeywordType::ApplicatorElementsInPlace: if (pair.second.is_array()) { for (std::size_t index = 0; index < pair.second.size(); index++) { - sourcemeta::core::Pointer new_pointer{pointer}; - new_pointer.emplace_back(pair.first); + sourcemeta::core::WeakPointer new_pointer{pointer}; + new_pointer.push_back(std::cref(pair.first)); new_pointer.emplace_back(index); - walk(pointer, new_pointer, instance_location, {}, subschemas, - pair.second.at(index), walker, resolver, current_dialect, - current_base_dialect, type, level + 1, orphan); + walk(pointer, new_pointer, subschemas, pair.second.at(index), + walker, resolver, current_dialect, current_base_dialect, type, + level + 1, orphan); } } @@ -246,22 +209,12 @@ auto walk(const std::optional &parent, case sourcemeta::core::SchemaKeywordType::ApplicatorElementsInPlaceSome: if (pair.second.is_array()) { for (std::size_t index = 0; index < pair.second.size(); index++) { - sourcemeta::core::Pointer new_pointer{pointer}; - new_pointer.emplace_back(pair.first); + sourcemeta::core::WeakPointer new_pointer{pointer}; + new_pointer.push_back(std::cref(pair.first)); new_pointer.emplace_back(index); - auto new_instance_location{instance_location}; - new_instance_location.emplace_back( - sourcemeta::core::PointerTemplate::Condition{pair.first}); - new_instance_location.emplace_back( - sourcemeta::core::PointerTemplate::Condition{ - std::to_string(index)}); - walk(pointer, new_pointer, new_instance_location, - {sourcemeta::core::PointerTemplate::Condition{pair.first}, - sourcemeta::core::PointerTemplate::Condition{ - std::to_string(index)}}, - subschemas, pair.second.at(index), walker, resolver, - current_dialect, current_base_dialect, type, level + 1, - orphan); + walk(pointer, new_pointer, subschemas, pair.second.at(index), + walker, resolver, current_dialect, current_base_dialect, type, + level + 1, orphan); } } @@ -271,25 +224,12 @@ auto walk(const std::optional &parent, ApplicatorElementsInPlaceSomeNegate: if (pair.second.is_array()) { for (std::size_t index = 0; index < pair.second.size(); index++) { - sourcemeta::core::Pointer new_pointer{pointer}; - new_pointer.emplace_back(pair.first); + sourcemeta::core::WeakPointer new_pointer{pointer}; + new_pointer.push_back(std::cref(pair.first)); new_pointer.emplace_back(index); - auto new_instance_location{instance_location}; - new_instance_location.emplace_back( - sourcemeta::core::PointerTemplate::Condition{pair.first}); - new_instance_location.emplace_back( - sourcemeta::core::PointerTemplate::Condition{ - std::to_string(index)}); - new_instance_location.emplace_back( - sourcemeta::core::PointerTemplate::Negation{}); - walk(pointer, new_pointer, new_instance_location, - {sourcemeta::core::PointerTemplate::Condition{pair.first}, - sourcemeta::core::PointerTemplate::Condition{ - std::to_string(index)}, - sourcemeta::core::PointerTemplate::Negation{}}, - subschemas, pair.second.at(index), walker, resolver, - current_dialect, current_base_dialect, type, level + 1, - orphan); + walk(pointer, new_pointer, subschemas, pair.second.at(index), + walker, resolver, current_dialect, current_base_dialect, type, + level + 1, orphan); } } @@ -299,13 +239,10 @@ auto walk(const std::optional &parent, ApplicatorMembersTraversePropertyStatic: if (pair.second.is_object()) { for (auto &subpair : pair.second.as_object()) { - sourcemeta::core::Pointer new_pointer{pointer}; - new_pointer.emplace_back(pair.first); - new_pointer.emplace_back(subpair.first); - auto new_instance_location{instance_location}; - new_instance_location.emplace_back(new_pointer.back()); - walk(pointer, new_pointer, new_instance_location, - {new_pointer.back()}, subschemas, subpair.second, walker, + sourcemeta::core::WeakPointer new_pointer{pointer}; + new_pointer.push_back(std::cref(pair.first)); + new_pointer.push_back(std::cref(subpair.first)); + walk(pointer, new_pointer, subschemas, subpair.second, walker, resolver, current_dialect, current_base_dialect, type, level + 1, orphan); } @@ -317,14 +254,12 @@ auto walk(const std::optional &parent, ApplicatorMembersTraversePropertyRegex: if (pair.second.is_object()) { for (auto &subpair : pair.second.as_object()) { - sourcemeta::core::Pointer new_pointer{pointer}; - new_pointer.emplace_back(pair.first); - new_pointer.emplace_back(subpair.first); - auto new_instance_location{instance_location}; - new_instance_location.emplace_back(subpair.first); - walk(pointer, new_pointer, new_instance_location, {subpair.first}, - subschemas, subpair.second, walker, resolver, current_dialect, - current_base_dialect, type, level + 1, orphan); + sourcemeta::core::WeakPointer new_pointer{pointer}; + new_pointer.push_back(std::cref(pair.first)); + new_pointer.push_back(std::cref(subpair.first)); + walk(pointer, new_pointer, subschemas, subpair.second, walker, + resolver, current_dialect, current_base_dialect, type, + level + 1, orphan); } } @@ -333,19 +268,12 @@ auto walk(const std::optional &parent, case sourcemeta::core::SchemaKeywordType::ApplicatorMembersInPlaceSome: if (pair.second.is_object()) { for (auto &subpair : pair.second.as_object()) { - sourcemeta::core::Pointer new_pointer{pointer}; - new_pointer.emplace_back(pair.first); - new_pointer.emplace_back(subpair.first); - auto new_instance_location{instance_location}; - new_instance_location.emplace_back( - sourcemeta::core::PointerTemplate::Condition{pair.first}); - new_instance_location.emplace_back( - sourcemeta::core::PointerTemplate::Condition{subpair.first}); - walk(pointer, new_pointer, new_instance_location, - {sourcemeta::core::PointerTemplate::Condition{pair.first}, - sourcemeta::core::PointerTemplate::Condition{subpair.first}}, - subschemas, subpair.second, walker, resolver, current_dialect, - current_base_dialect, type, level + 1, orphan); + sourcemeta::core::WeakPointer new_pointer{pointer}; + new_pointer.push_back(std::cref(pair.first)); + new_pointer.push_back(std::cref(subpair.first)); + walk(pointer, new_pointer, subschemas, subpair.second, walker, + resolver, current_dialect, current_base_dialect, type, + level + 1, orphan); } } @@ -354,12 +282,12 @@ auto walk(const std::optional &parent, case sourcemeta::core::SchemaKeywordType::LocationMembers: if (pair.second.is_object()) { for (auto &subpair : pair.second.as_object()) { - sourcemeta::core::Pointer new_pointer{pointer}; - new_pointer.emplace_back(pair.first); - new_pointer.emplace_back(subpair.first); - walk(pointer, new_pointer, instance_location, {}, subschemas, - subpair.second, walker, resolver, current_dialect, - current_base_dialect, type, level + 1, true); + sourcemeta::core::WeakPointer new_pointer{pointer}; + new_pointer.push_back(std::cref(pair.first)); + new_pointer.push_back(std::cref(subpair.first)); + walk(pointer, new_pointer, subschemas, subpair.second, walker, + resolver, current_dialect, current_base_dialect, type, + level + 1, true); } } @@ -369,26 +297,18 @@ auto walk(const std::optional &parent, ApplicatorValueOrElementsTraverseAnyItemOrItem: if (pair.second.is_array()) { for (std::size_t index = 0; index < pair.second.size(); index++) { - sourcemeta::core::Pointer new_pointer{pointer}; - new_pointer.emplace_back(pair.first); + sourcemeta::core::WeakPointer new_pointer{pointer}; + new_pointer.push_back(std::cref(pair.first)); new_pointer.emplace_back(index); - auto new_instance_location{instance_location}; - new_instance_location.emplace_back(new_pointer.back()); - walk(pointer, new_pointer, new_instance_location, - {new_pointer.back()}, subschemas, pair.second.at(index), + walk(pointer, new_pointer, subschemas, pair.second.at(index), walker, resolver, current_dialect, current_base_dialect, type, level + 1, orphan); } } else { - sourcemeta::core::Pointer new_pointer{pointer}; - new_pointer.emplace_back(pair.first); - auto new_instance_location{instance_location}; - new_instance_location.emplace_back( - sourcemeta::core::PointerTemplate::Wildcard::Item); - walk(pointer, new_pointer, new_instance_location, - {sourcemeta::core::PointerTemplate::Wildcard::Item}, subschemas, - pair.second, walker, resolver, current_dialect, - current_base_dialect, type, level + 1, orphan); + sourcemeta::core::WeakPointer new_pointer{pointer}; + new_pointer.push_back(std::cref(pair.first)); + walk(pointer, new_pointer, subschemas, pair.second, walker, resolver, + current_dialect, current_base_dialect, type, level + 1, orphan); } break; @@ -397,19 +317,18 @@ auto walk(const std::optional &parent, ApplicatorValueOrElementsInPlace: if (pair.second.is_array()) { for (std::size_t index = 0; index < pair.second.size(); index++) { - sourcemeta::core::Pointer new_pointer{pointer}; - new_pointer.emplace_back(pair.first); + sourcemeta::core::WeakPointer new_pointer{pointer}; + new_pointer.push_back(std::cref(pair.first)); new_pointer.emplace_back(index); - walk(pointer, new_pointer, instance_location, {}, subschemas, - pair.second.at(index), walker, resolver, current_dialect, - current_base_dialect, type, level + 1, orphan); + walk(pointer, new_pointer, subschemas, pair.second.at(index), + walker, resolver, current_dialect, current_base_dialect, type, + level + 1, orphan); } } else { - sourcemeta::core::Pointer new_pointer{pointer}; - new_pointer.emplace_back(pair.first); - walk(pointer, new_pointer, instance_location, {}, subschemas, - pair.second, walker, resolver, current_dialect, - current_base_dialect, type, level + 1, orphan); + sourcemeta::core::WeakPointer new_pointer{pointer}; + new_pointer.push_back(std::cref(pair.first)); + walk(pointer, new_pointer, subschemas, pair.second, walker, resolver, + current_dialect, current_base_dialect, type, level + 1, orphan); } break; @@ -432,36 +351,30 @@ sourcemeta::core::SchemaIterator::SchemaIterator( const sourcemeta::core::JSON &schema, const sourcemeta::core::SchemaWalker &walker, const sourcemeta::core::SchemaResolver &resolver, - const std::optional &default_dialect) { - const std::optional dialect{ + std::string_view default_dialect) { + const std::string_view resolved_dialect{ sourcemeta::core::dialect(schema, default_dialect)}; - sourcemeta::core::Pointer pointer; - sourcemeta::core::PointerTemplate instance_location; + sourcemeta::core::WeakPointer pointer; // If the given schema declares no dialect and the user didn't // not pass a default, then there is nothing we can do. We know // the current schema is a subschema, but cannot walk any further. - if (!dialect.has_value()) { - sourcemeta::core::SchemaIteratorEntry entry{ - .parent = std::nullopt, - .pointer = pointer, - .dialect = std::nullopt, - .vocabularies = {}, - .base_dialect = std::nullopt, - .subschema = schema, - // TODO: Only compute these if needed, i.e. when framing with instance - // locations - .instance_location = instance_location, - .relative_instance_location = instance_location, - .orphan = false}; + if (resolved_dialect.empty()) { + sourcemeta::core::SchemaIteratorEntry entry{.parent = std::nullopt, + .pointer = pointer, + .dialect = "", + .vocabularies = {}, + .base_dialect = std::nullopt, + .subschema = schema, + .orphan = false}; this->subschemas.push_back(std::move(entry)); } else { - const auto base_dialect{ - sourcemeta::core::base_dialect(schema, resolver, dialect)}; - assert(base_dialect.has_value()); - walk(std::nullopt, pointer, instance_location, instance_location, - this->subschemas, schema, walker, resolver, dialect.value(), - base_dialect.value(), SchemaWalkerType_t::Deep, 0, false); + const auto resolved_base_dialect{ + sourcemeta::core::base_dialect(schema, resolver, resolved_dialect)}; + assert(resolved_base_dialect.has_value()); + walk(std::nullopt, pointer, this->subschemas, schema, walker, resolver, + resolved_dialect, resolved_base_dialect.value(), + SchemaWalkerType_t::Deep, 0, false); } } @@ -469,18 +382,17 @@ sourcemeta::core::SchemaIteratorFlat::SchemaIteratorFlat( const sourcemeta::core::JSON &schema, const sourcemeta::core::SchemaWalker &walker, const sourcemeta::core::SchemaResolver &resolver, - const std::optional &default_dialect) { - const std::optional dialect{ + const std::string_view default_dialect) { + const std::string_view resolved_dialect{ sourcemeta::core::dialect(schema, default_dialect)}; - if (dialect.has_value()) { - sourcemeta::core::Pointer pointer; - sourcemeta::core::PointerTemplate instance_location; - const auto base_dialect{ - sourcemeta::core::base_dialect(schema, resolver, dialect)}; - assert(base_dialect.has_value()); - walk(std::nullopt, pointer, instance_location, instance_location, - this->subschemas, schema, walker, resolver, dialect.value(), - base_dialect.value(), SchemaWalkerType_t::Flat, 0, false); + if (!resolved_dialect.empty()) { + sourcemeta::core::WeakPointer pointer; + const auto resolved_base_dialect{ + sourcemeta::core::base_dialect(schema, resolver, resolved_dialect)}; + assert(resolved_base_dialect.has_value()); + walk(std::nullopt, pointer, this->subschemas, schema, walker, resolver, + resolved_dialect, resolved_base_dialect.value(), + SchemaWalkerType_t::Flat, 0, false); } } @@ -488,35 +400,33 @@ sourcemeta::core::SchemaKeywordIterator::SchemaKeywordIterator( const sourcemeta::core::JSON &schema, const sourcemeta::core::SchemaWalker &walker, const sourcemeta::core::SchemaResolver &resolver, - const std::optional &default_dialect) { + const std::string_view default_dialect) { assert(is_schema(schema)); if (schema.is_boolean()) { return; } - const std::optional dialect{ + const std::string_view resolved_dialect{ sourcemeta::core::dialect(schema, default_dialect)}; - const std::optional base_dialect{ - sourcemeta::core::base_dialect(schema, resolver, dialect)}; + const auto maybe_base_dialect{ + sourcemeta::core::base_dialect(schema, resolver, resolved_dialect)}; Vocabularies vocabularies{ - base_dialect.has_value() && dialect.has_value() - ? sourcemeta::core::vocabularies(resolver, base_dialect.value(), - dialect.value()) + maybe_base_dialect.has_value() && !resolved_dialect.empty() + ? sourcemeta::core::vocabularies(resolver, maybe_base_dialect.value(), + resolved_dialect) : Vocabularies{}}; for (const auto &entry : schema.as_object()) { + sourcemeta::core::WeakPointer entry_pointer; + entry_pointer.push_back(std::cref(entry.first)); sourcemeta::core::SchemaIteratorEntry subschema_entry{ .parent = std::nullopt, - .pointer = {entry.first}, - .dialect = dialect, + .pointer = std::move(entry_pointer), + .dialect = resolved_dialect, .vocabularies = vocabularies, - .base_dialect = base_dialect, + .base_dialect = maybe_base_dialect, .subschema = entry.second, - // TODO: Only compute these if needed, i.e. when framing with instance - // locations - .instance_location = {}, - .relative_instance_location = {}, .orphan = false}; this->entries.push_back(std::move(subschema_entry)); } diff --git a/vendor/core/src/core/md5/include/sourcemeta/core/md5.h b/vendor/core/src/core/md5/include/sourcemeta/core/md5.h index c6705c7f..e39796fa 100644 --- a/vendor/core/src/core/md5/include/sourcemeta/core/md5.h +++ b/vendor/core/src/core/md5/include/sourcemeta/core/md5.h @@ -31,7 +31,7 @@ namespace sourcemeta::core { /// sourcemeta::hydra::md5("foo bar", result); /// std::cout << result.str() << "\n"; /// ``` -auto SOURCEMETA_CORE_MD5_EXPORT md5(std::string_view input, +auto SOURCEMETA_CORE_MD5_EXPORT md5(const std::string_view input, std::ostream &output) -> void; } // namespace sourcemeta::core diff --git a/vendor/core/src/core/md5/md5.cc b/vendor/core/src/core/md5/md5.cc index a637dad8..92431c9a 100644 --- a/vendor/core/src/core/md5/md5.cc +++ b/vendor/core/src/core/md5/md5.cc @@ -91,7 +91,7 @@ inline auto md5_process_block(const unsigned char *block, namespace sourcemeta::core { -auto md5(std::string_view input, std::ostream &output) -> void { +auto md5(const std::string_view input, std::ostream &output) -> void { // Initial state as per RFC 1321 std::array state{}; state[0] = 0x67452301U; diff --git a/vendor/core/src/core/punycode/include/sourcemeta/core/punycode.h b/vendor/core/src/core/punycode/include/sourcemeta/core/punycode.h index 3e41fdb6..b996e60e 100644 --- a/vendor/core/src/core/punycode/include/sourcemeta/core/punycode.h +++ b/vendor/core/src/core/punycode/include/sourcemeta/core/punycode.h @@ -75,7 +75,7 @@ auto utf8_to_punycode(std::istream &input, std::ostream &output) -> void; /// "Mnchen-3ya"); /// ``` SOURCEMETA_CORE_PUNYCODE_EXPORT -auto utf8_to_punycode(std::string_view input) -> std::string; +auto utf8_to_punycode(const std::string_view input) -> std::string; /// @ingroup punycode /// Decode Punycode to Unicode code points (UTF-32). For example: @@ -97,7 +97,7 @@ auto utf8_to_punycode(std::string_view input) -> std::string; /// (`std::ctype`) for `std::basic_istream` and /// `std::basic_ostream` to function properly. SOURCEMETA_CORE_PUNYCODE_EXPORT -auto punycode_to_utf32(std::string_view input) -> std::u32string; +auto punycode_to_utf32(const std::string_view input) -> std::u32string; /// @ingroup punycode /// Decode Punycode to UTF-8 using streams. For example: @@ -126,7 +126,7 @@ auto punycode_to_utf8(std::istream &input, std::ostream &output) -> void; /// "M\xC3\xBCnchen"); /// ``` SOURCEMETA_CORE_PUNYCODE_EXPORT -auto punycode_to_utf8(std::string_view input) -> std::string; +auto punycode_to_utf8(const std::string_view input) -> std::string; } // namespace sourcemeta::core diff --git a/vendor/core/src/core/punycode/punycode.cc b/vendor/core/src/core/punycode/punycode.cc index f114577a..835d93b9 100644 --- a/vendor/core/src/core/punycode/punycode.cc +++ b/vendor/core/src/core/punycode/punycode.cc @@ -259,7 +259,7 @@ auto utf32_to_punycode(std::u32string_view input) -> std::string { return result; } -auto punycode_to_utf32(std::string_view input) -> std::u32string { +auto punycode_to_utf32(const std::string_view input) -> std::u32string { std::u32string result; punycode_decode(input, result); return result; @@ -288,7 +288,7 @@ auto punycode_to_utf8(std::istream &input, std::ostream &output) -> void { utf32_to_utf8(decoded, output); } -auto utf8_to_punycode(std::string_view input) -> std::string { +auto utf8_to_punycode(const std::string_view input) -> std::string { std::istringstream input_stream{std::string{input}}; const auto codepoints = utf8_to_utf32(input_stream); if (!codepoints.has_value()) { @@ -300,7 +300,7 @@ auto utf8_to_punycode(std::string_view input) -> std::string { return result; } -auto punycode_to_utf8(std::string_view input) -> std::string { +auto punycode_to_utf8(const std::string_view input) -> std::string { std::u32string decoded; punycode_decode(input, decoded); std::ostringstream output_stream; diff --git a/vendor/core/src/core/regex/preprocess.h b/vendor/core/src/core/regex/preprocess.h index 485aeec7..227b41c4 100644 --- a/vendor/core/src/core/regex/preprocess.h +++ b/vendor/core/src/core/regex/preprocess.h @@ -538,7 +538,7 @@ inline auto expand_char_class(const std::string &content) return result.none() ? "(?!)" : bitset_to_class(result); } -inline auto translate_property(std::string_view name, bool negated) +inline auto translate_property(const std::string_view name, const bool negated) -> std::optional { for (const auto &[prop_name, pcre_name] : unicode_property_map) { if (name == prop_name) { diff --git a/vendor/core/src/core/uri/canonicalize.cc b/vendor/core/src/core/uri/canonicalize.cc index be961d68..8ec0d39c 100644 --- a/vendor/core/src/core/uri/canonicalize.cc +++ b/vendor/core/src/core/uri/canonicalize.cc @@ -9,7 +9,7 @@ namespace { -auto to_lowercase(std::string_view input) -> std::string { +auto to_lowercase(const std::string_view input) -> std::string { std::string result; result.reserve(input.size()); for (const auto character : input) { @@ -62,7 +62,7 @@ auto URI::canonicalize() -> URI & { return *this; } -auto URI::canonicalize(const std::string &input) -> std::string { +auto URI::canonicalize(const std::string_view input) -> std::string { return URI{input}.canonicalize().recompose(); } diff --git a/vendor/core/src/core/uri/escaping.h b/vendor/core/src/core/uri/escaping.h index 697bb1f8..f9d7b554 100644 --- a/vendor/core/src/core/uri/escaping.h +++ b/vendor/core/src/core/uri/escaping.h @@ -177,7 +177,8 @@ inline auto uri_unescape_selective_inplace(std::string &str) -> void { // Full unescaping for URI normalization (copy version for compatibility) // Decodes all percent-encoded sequences -inline auto uri_unescape_selective(std::string_view input) -> std::string { +inline auto uri_unescape_selective(const std::string_view input) + -> std::string { std::string result{input}; uri_unescape_selective_inplace(result); return result; diff --git a/vendor/core/src/core/uri/include/sourcemeta/core/uri.h b/vendor/core/src/core/uri/include/sourcemeta/core/uri.h index 13956038..a4bae290 100644 --- a/vendor/core/src/core/uri/include/sourcemeta/core/uri.h +++ b/vendor/core/src/core/uri/include/sourcemeta/core/uri.h @@ -9,6 +9,7 @@ #include // NOLINTEND(misc-include-cleaner) +#include // std::convertible_to #include // std::uint32_t #include // std::filesystem #include // std::istream @@ -17,6 +18,7 @@ #include // std::span #include // std::string #include // std::string_view +#include // std::is_same_v #include // std::vector /// @defgroup uri URI @@ -48,14 +50,19 @@ class SOURCEMETA_CORE_URI_EXPORT URI { /// Move assignment operator auto operator=(URI &&) noexcept -> URI & = default; - /// This constructor creates a URI from a string type. For example: + /// This constructor creates a URI from a string. For example: /// /// ```cpp /// #include /// /// const sourcemeta::core::URI uri{"https://www.sourcemeta.com"}; /// ``` - URI(const std::string &input); + template + requires std::convertible_to && + (!std::is_same_v, URI>) + URI(T &&input) { + this->parse(std::string_view{std::forward(input)}); + } /// This constructor creates a URI from a C++ input stream. For example: /// @@ -291,7 +298,7 @@ class SOURCEMETA_CORE_URI_EXPORT URI { /// assert(uri.fragment().has_value()); /// assert(uri.fragment().value() == "foo"); /// ``` - auto fragment(std::string_view fragment) -> URI &; + auto fragment(const std::string_view fragment) -> URI &; /// Get the non-dissected query part of the URI, if any. For example: /// @@ -434,7 +441,7 @@ class SOURCEMETA_CORE_URI_EXPORT URI { /// sourcemeta::core::URI::from_fragment("foo")}; /// assert(uri.recompose() == "#foo"); /// ``` - static auto from_fragment(std::string_view fragment) -> URI; + static auto from_fragment(const std::string_view fragment) -> URI; /// Create a URI from a file system path. For example: /// @@ -460,10 +467,10 @@ class SOURCEMETA_CORE_URI_EXPORT URI { /// sourcemeta::core::URI::canonicalize("hTtP://exAmpLe.com:80/TEST")}; /// assert(result == "http://example.com/TEST"); /// ``` - static auto canonicalize(const std::string &input) -> std::string; + static auto canonicalize(std::string_view input) -> std::string; private: - auto parse(const std::string &input) -> void; + auto parse(std::string_view input) -> void; // Exporting symbols that depends on the standard C++ library is considered // safe. diff --git a/vendor/core/src/core/uri/parse.cc b/vendor/core/src/core/uri/parse.cc index 2bbf4cb9..fa95685b 100644 --- a/vendor/core/src/core/uri/parse.cc +++ b/vendor/core/src/core/uri/parse.cc @@ -8,14 +8,15 @@ #include // std::uint64_t #include // std::optional #include // std::string, std::stoul +#include // std::string_view namespace { using namespace sourcemeta::core; -auto validate_percent_encoded_utf8(const std::string &input, - std::string::size_type position) - -> std::string::size_type { +auto validate_percent_encoded_utf8(const std::string_view input, + std::string_view::size_type position) + -> std::string_view::size_type { if (input[position] != URI_PERCENT) { return 3; } @@ -85,7 +86,8 @@ auto validate_percent_encoded_utf8(const std::string &input, return 3 * (1 + continuation_count); } -auto parse_scheme(const std::string &input, std::string::size_type &position) +auto parse_scheme(const std::string_view input, + std::string_view::size_type &position) -> std::optional { if (position >= input.size() || !std::isalpha(static_cast(input[position]))) { @@ -100,7 +102,7 @@ auto parse_scheme(const std::string &input, std::string::size_type &position) } if (position < input.size() && input[position] == URI_COLON) { - auto scheme = input.substr(start, position - start); + std::string scheme{input.substr(start, position - start)}; position += 1; return scheme; } @@ -109,7 +111,8 @@ auto parse_scheme(const std::string &input, std::string::size_type &position) return std::nullopt; } -auto parse_port(const std::string &input, std::string::size_type &position) +auto parse_port(const std::string_view input, + std::string_view::size_type &position) -> std::optional { if (position >= input.size() || !std::isdigit(static_cast(input[position]))) { @@ -122,12 +125,12 @@ auto parse_port(const std::string &input, std::string::size_type &position) position += 1; } - const auto port_string = input.substr(start, position - start); + const std::string port_string{input.substr(start, position - start)}; return std::stoul(port_string); } -auto parse_ipv6(const std::string &input, std::string::size_type &position) - -> std::string { +auto parse_ipv6(const std::string_view input, + std::string_view::size_type &position) -> std::string { assert(input[position] == URI_OPEN_BRACKET); const auto start = position; @@ -142,13 +145,13 @@ auto parse_ipv6(const std::string &input, std::string::size_type &position) static_cast(start + 1)}; } - auto ipv6 = input.substr(start + 1, position - start - 1); + std::string ipv6{input.substr(start + 1, position - start - 1)}; position += 1; return ipv6; } -auto parse_host(const std::string &input, std::string::size_type &position) - -> std::string { +auto parse_host(const std::string_view input, + std::string_view::size_type &position) -> std::string { if (position >= input.size()) { return std::string{}; } @@ -180,16 +183,17 @@ auto parse_host(const std::string &input, std::string::size_type &position) return std::string{}; } - return input.substr(start, position - start); + return std::string{input.substr(start, position - start)}; } -auto parse_userinfo(const std::string &input, std::string::size_type &position) +auto parse_userinfo(const std::string_view input, + std::string_view::size_type &position) -> std::optional { const auto start = position; while (position < input.size()) { const auto current = input[position]; if (current == URI_AT) { - auto userinfo = input.substr(start, position - start); + std::string userinfo{input.substr(start, position - start)}; position += 1; return userinfo; } @@ -209,7 +213,8 @@ auto parse_userinfo(const std::string &input, std::string::size_type &position) return std::nullopt; } -auto parse_path(const std::string &input, std::string::size_type &position) +auto parse_path(const std::string_view input, + std::string_view::size_type &position) -> std::optional { if (position >= input.size()) { return std::nullopt; @@ -238,10 +243,11 @@ auto parse_path(const std::string &input, std::string::size_type &position) } } - return input.substr(start, position - start); + return std::string{input.substr(start, position - start)}; } -auto parse_query(const std::string &input, std::string::size_type &position) +auto parse_query(const std::string_view input, + std::string_view::size_type &position) -> std::optional { if (position >= input.size() || input[position] != URI_QUESTION) { return std::nullopt; @@ -268,10 +274,11 @@ auto parse_query(const std::string &input, std::string::size_type &position) } } - return input.substr(start, position - start); + return std::string{input.substr(start, position - start)}; } -auto parse_fragment(const std::string &input, std::string::size_type &position) +auto parse_fragment(const std::string_view input, + std::string_view::size_type &position) -> std::optional { if (position >= input.size() || input[position] != URI_HASH) { return std::nullopt; @@ -295,14 +302,15 @@ auto parse_fragment(const std::string &input, std::string::size_type &position) } } - return input.substr(start, position - start); + return std::string{input.substr(start, position - start)}; } } // namespace namespace sourcemeta::core { -auto parse_authority(const std::string &input, std::string::size_type &position, +auto parse_authority(const std::string_view input, + std::string_view::size_type &position, std::optional &userinfo, std::optional &host, std::optional &port) -> void { @@ -332,7 +340,7 @@ auto parse_authority(const std::string &input, std::string::size_type &position, } } -auto URI::parse(const std::string &input) -> void { +auto URI::parse(const std::string_view input) -> void { assert(!this->scheme_.has_value()); assert(!this->userinfo_.has_value()); assert(!this->host_.has_value()); @@ -345,7 +353,7 @@ auto URI::parse(const std::string &input) -> void { return; } - auto position = std::string::size_type{0}; + std::string_view::size_type position{0}; this->scheme_ = parse_scheme(input, position); diff --git a/vendor/core/src/core/uri/setters.cc b/vendor/core/src/core/uri/setters.cc index 637ab360..6ba2984a 100644 --- a/vendor/core/src/core/uri/setters.cc +++ b/vendor/core/src/core/uri/setters.cc @@ -25,7 +25,7 @@ auto apply_leading_slash_transform(std::optional parsed_path, return parsed_path; } -auto normalize_fragment(std::string_view input) -> std::string { +auto normalize_fragment(const std::string_view input) -> std::string { if (input.empty()) { return ""; } @@ -149,7 +149,7 @@ auto URI::extension(std::string &&extension) -> URI & { return *this; } -auto URI::fragment(std::string_view fragment) -> URI & { +auto URI::fragment(const std::string_view fragment) -> URI & { this->fragment_ = normalize_fragment(std::string{fragment}); return *this; } diff --git a/vendor/core/src/core/uri/uri.cc b/vendor/core/src/core/uri/uri.cc index 3c7ec98e..ef61941c 100644 --- a/vendor/core/src/core/uri/uri.cc +++ b/vendor/core/src/core/uri/uri.cc @@ -6,15 +6,13 @@ namespace sourcemeta::core { -URI::URI(const std::string &input) { this->parse(input); } - URI::URI(std::istream &input) { std::ostringstream output; output << input.rdbuf(); this->parse(output.str()); } -auto URI::from_fragment(std::string_view fragment) -> URI { +auto URI::from_fragment(const std::string_view fragment) -> URI { URI result; result.fragment(fragment); return result; diff --git a/vendor/core/src/core/uritemplate/CMakeLists.txt b/vendor/core/src/core/uritemplate/CMakeLists.txt new file mode 100644 index 00000000..427d7955 --- /dev/null +++ b/vendor/core/src/core/uritemplate/CMakeLists.txt @@ -0,0 +1,9 @@ +sourcemeta_library(NAMESPACE sourcemeta PROJECT core NAME uritemplate + PRIVATE_HEADERS error.h token.h router.h + SOURCES helpers.h uritemplate.cc uritemplate_router.cc uritemplate_router_view.cc) + +if(SOURCEMETA_CORE_INSTALL) + sourcemeta_library_install(NAMESPACE sourcemeta PROJECT core NAME uritemplate) +endif() + +target_link_libraries(sourcemeta_core_uritemplate PUBLIC sourcemeta::core::io) diff --git a/vendor/core/src/core/uritemplate/helpers.h b/vendor/core/src/core/uritemplate/helpers.h new file mode 100644 index 00000000..5b84b56b --- /dev/null +++ b/vendor/core/src/core/uritemplate/helpers.h @@ -0,0 +1,402 @@ +#ifndef SOURCEMETA_CORE_URITEMPLATE_HELPERS_H_ +#define SOURCEMETA_CORE_URITEMPLATE_HELPERS_H_ + +#include + +#include // std::array +#include // std::size_t +#include // std::string +#include // std::string_view +#include // std::void_t + +namespace sourcemeta::core { + +// Type traits to detect optional static members +template struct has_op : std::false_type {}; +template +struct has_op> : std::true_type {}; + +template struct has_prefix : std::false_type {}; +template +struct has_prefix> : std::true_type {}; + +template +struct has_empty_suffix : std::false_type {}; +template +struct has_empty_suffix> + : std::true_type {}; + +inline auto is_unreserved(const char character) -> bool { + return (character >= 'A' && character <= 'Z') || + (character >= 'a' && character <= 'z') || + (character >= '0' && character <= '9') || character == '-' || + character == '.' || character == '_' || character == '~'; +} + +inline auto is_reserved(const char character) -> bool { + return character == ':' || character == '/' || character == '?' || + character == '#' || character == '[' || character == ']' || + character == '@' || character == '!' || character == '$' || + character == '&' || character == '\'' || character == '(' || + character == ')' || character == '*' || character == '+' || + character == ',' || character == ';' || character == '='; +} + +inline auto is_hex(const char character) -> bool { + return (character >= '0' && character <= '9') || + (character >= 'A' && character <= 'F') || + (character >= 'a' && character <= 'f'); +} + +static constexpr std::array HEX_DIGITS = { + {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', + 'F'}}; + +inline auto append_percent_encoded(std::string &output, const char character) + -> void { + const auto byte = static_cast(character); + output += '%'; + output += HEX_DIGITS[byte >> 4]; + output += HEX_DIGITS[byte & 0x0F]; +} + +inline auto percent_encode(std::string &output, const std::string_view input) + -> void { + output.reserve(output.size() + input.size() * 3); + for (const char character : input) { + if (is_unreserved(character)) { + output += character; + } else { + append_percent_encoded(output, character); + } + } +} + +inline auto percent_encode_reserved(std::string &output, + const std::string_view input) -> void { + output.reserve(output.size() + input.size() * 3); + for (std::size_t index = 0; index < input.size(); ++index) { + const char character = input[index]; + if (is_unreserved(character) || is_reserved(character) || + (character == '%' && index + 2 < input.size() && + is_hex(input[index + 1]) && is_hex(input[index + 2]))) { + output += character; + } else { + append_percent_encoded(output, character); + } + } +} + +template +inline auto encode(std::string &output, const std::string_view input) -> void { + if constexpr (T::allow_reserved) { + percent_encode_reserved(output, input); + } else { + percent_encode(output, input); + } +} + +template +inline auto append_name(std::string &result, const std::string_view name, + const bool value_empty, const bool has_more) -> void { + if constexpr (T::named) { + result += name; + if (value_empty && !has_more) { + if constexpr (has_empty_suffix::value) { + result += T::empty_suffix; + } + } else { + result += '='; + } + } +} + +// RFC 6570 Section 2.3: varchar = ALPHA / DIGIT / "_" +inline auto is_varchar(const char character) noexcept -> bool { + return (character >= 'A' && character <= 'Z') || + (character >= 'a' && character <= 'z') || + (character >= '0' && character <= '9') || character == '_'; +} + +// Variable name character including dot for dotted names like "foo.bar" +inline auto is_varname_char(const char character) noexcept -> bool { + return is_varchar(character) || character == '.'; +} + +// RFC 6570 Section 2.2: operator = op-level2 / op-level3 / op-reserve +inline auto is_operator(const char character) noexcept -> bool { + return character == '+' || character == '#' || character == '.' || + character == '/' || character == ';' || character == '?' || + character == '&'; +} + +// RFC 6570 Section 2.2: op-reserve = "=" / "," / "!" / "@" / "|" +inline auto is_reserved_operator(const char character) noexcept -> bool { + return character == '=' || character == ',' || character == '!' || + character == '@' || character == '|'; +} + +// RFC 6570 Section 2.4: modifier = prefix / explode +inline auto is_modifier(const char character) noexcept -> bool { + return character == ':' || character == '*'; +} + +inline auto parse_varname(const std::string_view input, std::size_t position) + -> std::size_t { + if (position >= input.size() || + (!is_varchar(input[position]) && input[position] != '%')) { + throw URITemplateParseError(position + 1); + } + + while (position < input.size() && input[position] != '}' && + input[position] != ',' && input[position] != ':' && + input[position] != '*') { + const char character = input[position]; + + if (is_varchar(character)) { + position++; + } else if (character == '.') { + position++; + if (position >= input.size() || + (!is_varchar(input[position]) && input[position] != '%')) { + throw URITemplateParseError(position + 1); + } + } else if (character == '%') { + if (position + 2 >= input.size()) { + throw URITemplateParseError(position + 1); + } + if (!is_hex(input[position + 1]) || !is_hex(input[position + 2])) { + throw URITemplateParseError(position + 1); + } + position += 3; + } else { + throw URITemplateParseError(position + 1); + } + } + + return position; +} + +inline auto +parse_variable_list(const std::string_view input, std::size_t position, + std::vector &variables) + -> std::size_t { + while (true) { + const auto start = position; + position = parse_varname(input, position); + + if (position == start) { + throw URITemplateParseError(position + 1); + } + + const auto name = input.substr(start, position - start); + std::uint16_t length = 0; + bool explode = false; + + if (position >= input.size()) { + throw URITemplateParseError(1); + } + + if (input[position] == ':') { + position++; + if (position >= input.size() || input[position] < '1' || + input[position] > '9') { + throw URITemplateParseError(position + 1); + } + + const auto prefix_start = position; + while (position < input.size() && input[position] >= '0' && + input[position] <= '9') { + position++; + if (position - prefix_start > 4) { + throw URITemplateParseError(position); + } + } + + const auto prefix_str = + input.substr(prefix_start, position - prefix_start); + std::uint16_t value = 0; + for (const char character : prefix_str) { + value = static_cast( + value * 10 + static_cast(character - '0')); + } + + if (value > 9999 || value == 0) { + throw URITemplateParseError(prefix_start + 1); + } + + length = value; + } else if (input[position] == '*') { + explode = true; + position++; + } + + variables.push_back(URITemplateVariableSpecification{ + .name = name, .length = length, .explode = explode}); + + if (position >= input.size()) { + throw URITemplateParseError(1); + } + + if (input[position] == '}') { + break; + } + + if (input[position] == ',') { + position++; + } + } + + return position; +} + +template +auto parse_expression(const std::string_view input) -> URITemplateParseResult { + if constexpr (std::is_same_v) { + if (input.empty() || input[0] == '{') { + return std::nullopt; + } + + if (input[0] == '}') { + throw URITemplateParseError(1); + } + + std::size_t position = 1; + while (position < input.size()) { + if (input[position] == '{') { + break; + } + if (input[position] == '}') { + throw URITemplateParseError(position + 1); + } + position++; + } + + return std::make_pair( + URITemplateToken{URITemplateTokenLiteral{input.substr(0, position)}}, + position); + } else { + if (input.empty() || input[0] != '{') { + return std::nullopt; + } + + std::size_t var_start; + if constexpr (has_op::value) { + if (input.size() < 3 || input[1] != T::op) { + return std::nullopt; + } + var_start = 2; + } else { + if (input.size() < 2) { + throw URITemplateParseError(1); + } + // Not a simple variable if it has an operator + if (is_operator(input[1])) { + return std::nullopt; + } + var_start = 1; + } + + std::vector variables; + const auto end_position = parse_variable_list(input, var_start, variables); + return std::make_pair(URITemplateToken{T{std::move(variables)}}, + end_position + 1); + } +} + +template +auto expand_expression( + std::string &result, + const std::vector &variables, + const std::function &callback) -> void { + bool first_var = true; + + for (const auto &variable : variables) { + auto response = callback(variable.name); + if (!response.has_value()) { + continue; + } + + bool first_value = true; + + while (true) { + const auto &[value, object_key, has_more] = response.value(); + + if (variable.length > 0 && + (has_more || object_key.has_value() || !first_value)) { + throw URITemplateExpansionError{ + "Prefix modifier cannot be applied to composite values"}; + } + + auto actual_value = value; + if (variable.length > 0) { + actual_value = actual_value.substr(0, variable.length); + } + + if (variable.explode) { + if (first_var && first_value) { + if constexpr (has_prefix::value) { + result += T::prefix; + } + first_var = false; + } else { + result += T::separator; + } + + if (object_key.has_value()) { + encode(result, object_key.value()); + result += '='; + encode(result, actual_value); + } else if constexpr (T::named) { + result += variable.name; + if (actual_value.empty()) { + if constexpr (has_empty_suffix::value) { + result += T::empty_suffix; + } + } else { + result += '='; + encode(result, actual_value); + } + } else { + encode(result, actual_value); + } + } else { + if (first_var && first_value) { + if constexpr (has_prefix::value) { + result += T::prefix; + } + first_var = false; + append_name(result, variable.name, actual_value.empty(), has_more); + } else if (first_value) { + result += T::separator; + append_name(result, variable.name, actual_value.empty(), has_more); + } else { + result += ','; + } + + if (!first_value || !actual_value.empty() || has_more) { + if (object_key.has_value()) { + encode(result, object_key.value()); + result += ','; + } + encode(result, actual_value); + } + } + + first_value = false; + + if (!has_more) { + break; + } + + response = callback(variable.name); + if (!response.has_value()) { + break; + } + } + } +} + +} // namespace sourcemeta::core + +#endif diff --git a/vendor/core/src/core/uritemplate/include/sourcemeta/core/uritemplate.h b/vendor/core/src/core/uritemplate/include/sourcemeta/core/uritemplate.h new file mode 100644 index 00000000..4f5540ff --- /dev/null +++ b/vendor/core/src/core/uritemplate/include/sourcemeta/core/uritemplate.h @@ -0,0 +1,118 @@ +#ifndef SOURCEMETA_CORE_URITEMPLATE_H_ +#define SOURCEMETA_CORE_URITEMPLATE_H_ + +#ifndef SOURCEMETA_CORE_URITEMPLATE_EXPORT +#include +#endif + +// NOLINTBEGIN(misc-include-cleaner) +#include +#include +#include +// NOLINTEND(misc-include-cleaner) + +#include // std::size_t +#include // std::uint64_t +#include // std::function +#include // std::optional +#include // std::string +#include // std::string_view +#include // std::tuple +#include // std::void_t +#include // std::vector + +/// @defgroup uritemplate URI Template +/// @brief A strict RFC 6570 URI Template implementation. +/// +/// This functionality is included as follows: +/// +/// ```cpp +/// #include +/// ``` + +namespace sourcemeta::core { + +/// @ingroup uritemplate +/// The return type for URI Template variable callbacks (value, key?, has_more) +using URITemplateValue = std::optional< + std::tuple, bool>>; + +/// @ingroup uritemplate +/// The result of parsing a token: the token and how many characters were +/// consumed +using URITemplateParseResult = + std::optional>; + +/// @ingroup uritemplate +/// A parsed URI Template per RFC 6570. This class behaves like a view. The +/// source string must outlive the template +class SOURCEMETA_CORE_URITEMPLATE_EXPORT URITemplate { +public: + /// Parse a URI Template from a string view. For example: + /// + /// ```cpp + /// #include + /// + /// const std::string source{"http://example.com/~{username}/"}; + /// const sourcemeta::core::URITemplate uri_template{source}; + /// ``` + URITemplate(const std::string_view source); + + /// Get the number of tokens in the template + [[nodiscard]] auto size() const noexcept -> std::uint64_t; + + /// Check if the template is empty + [[nodiscard]] auto empty() const noexcept -> bool; + + /// Get the token at the given index + [[nodiscard]] auto at(std::size_t index) const & -> const URITemplateToken &; + + /// Get the token at the given index (move overload) + [[nodiscard]] auto at(std::size_t index) && -> URITemplateToken; + + /// Iterator to the beginning of the tokens + [[nodiscard]] auto begin() const noexcept + -> std::vector::const_iterator; + + /// Iterator to the end of the tokens + [[nodiscard]] auto end() const noexcept + -> std::vector::const_iterator; + + /// Expand the template by looking up variable values via a callback. + /// The callback is called repeatedly for composite values + [[nodiscard]] auto expand( + const std::function &callback) const + -> std::string; + + /// Expand the template using an associative container (string values only) + template > + [[nodiscard]] auto expand(const Container &variables) const -> std::string { + return this->expand([&variables]( + const std::string_view name) -> URITemplateValue { + const auto iterator{variables.find(typename Container::key_type{name})}; + if (iterator == variables.end()) { + return std::nullopt; + } else { + return std::make_tuple(std::string_view{iterator->second}, std::nullopt, + false); + } + }); + } + +private: +// Exporting symbols that depends on the standard C++ library is considered +// safe. +// https://learn.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-2-c4275?view=msvc-170&redirectedfrom=MSDN +#if defined(_MSC_VER) +#pragma warning(disable : 4251) +#endif + std::vector tokens_; +#if defined(_MSC_VER) +#pragma warning(default : 4251) +#endif +}; + +} // namespace sourcemeta::core + +#endif diff --git a/vendor/core/src/core/uritemplate/include/sourcemeta/core/uritemplate_error.h b/vendor/core/src/core/uritemplate/include/sourcemeta/core/uritemplate_error.h new file mode 100644 index 00000000..0898a39c --- /dev/null +++ b/vendor/core/src/core/uritemplate/include/sourcemeta/core/uritemplate_error.h @@ -0,0 +1,132 @@ +#ifndef SOURCEMETA_CORE_URITEMPLATE_ERROR_H_ +#define SOURCEMETA_CORE_URITEMPLATE_ERROR_H_ + +#ifndef SOURCEMETA_CORE_URITEMPLATE_EXPORT +#include +#endif + +#include // std::uint64_t +#include // std::exception +#include // std::filesystem::path +#include // std::runtime_error +#include // std::string +#include // std::string_view +#include // std::move + +namespace sourcemeta::core { + +// Exporting symbols that depends on the standard C++ library is considered +// safe. +// https://learn.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-2-c4275?view=msvc-170&redirectedfrom=MSDN +#if defined(_MSC_VER) +#pragma warning(disable : 4251 4275) +#endif + +/// @ingroup uritemplate +/// An error that represents a URI Template parsing failure +class SOURCEMETA_CORE_URITEMPLATE_EXPORT URITemplateParseError + : public std::exception { +public: + URITemplateParseError(const std::uint64_t column) : column_{column} {} + + [[nodiscard]] auto what() const noexcept -> const char * override { + return "The input is not a valid URI Template"; + } + + /// Get the column number of the error + [[nodiscard]] auto column() const noexcept -> std::uint64_t { + return this->column_; + } + +private: + std::uint64_t column_; +}; + +/// @ingroup uritemplate +/// An error that represents a URI Template expansion failure +class SOURCEMETA_CORE_URITEMPLATE_EXPORT URITemplateExpansionError + : public std::runtime_error { +public: + URITemplateExpansionError(const std::string &message) + : std::runtime_error{message} {} +}; + +/// @ingroup uritemplate +/// An error that represents a variable name mismatch when adding routes +class SOURCEMETA_CORE_URITEMPLATE_EXPORT URITemplateRouterVariableMismatchError + : public std::exception { +public: + URITemplateRouterVariableMismatchError(const std::string_view left, + const std::string_view right) + : left_{left}, right_{right} {} + + [[nodiscard]] auto what() const noexcept -> const char * override { + return "Variable name mismatch when adding route"; + } + + /// Get the existing variable name + [[nodiscard]] auto left() const noexcept -> const std::string & { + return this->left_; + } + + /// Get the conflicting variable name + [[nodiscard]] auto right() const noexcept -> const std::string & { + return this->right_; + } + +private: + std::string left_; + std::string right_; +}; + +/// @ingroup uritemplate +/// An error for invalid segments when adding routes +class SOURCEMETA_CORE_URITEMPLATE_EXPORT URITemplateRouterInvalidSegmentError + : public std::exception { +public: + URITemplateRouterInvalidSegmentError(const char *message, + const std::string_view segment) + : message_{message}, segment_{segment} {} + + [[nodiscard]] auto what() const noexcept -> const char * override { + return this->message_; + } + + /// Get the offending segment + [[nodiscard]] auto segment() const noexcept -> const std::string & { + return this->segment_; + } + +private: + const char *message_; + std::string segment_; +}; + +/// @ingroup uritemplate +/// An error that represents a failure to save the router to disk +class SOURCEMETA_CORE_URITEMPLATE_EXPORT URITemplateRouterSaveError + : public std::exception { +public: + URITemplateRouterSaveError(std::filesystem::path path, const char *message) + : path_{std::move(path)}, message_{message} {} + + [[nodiscard]] auto what() const noexcept -> const char * override { + return this->message_; + } + + [[nodiscard]] auto path() const noexcept -> const std::filesystem::path & { + return this->path_; + } + +private: + std::filesystem::path path_; + const char *message_; +}; + +#if defined(_MSC_VER) +#pragma warning(default : 4251 4275) +#endif + +} // namespace sourcemeta::core + +#endif diff --git a/vendor/core/src/core/uritemplate/include/sourcemeta/core/uritemplate_router.h b/vendor/core/src/core/uritemplate/include/sourcemeta/core/uritemplate_router.h new file mode 100644 index 00000000..81d80103 --- /dev/null +++ b/vendor/core/src/core/uritemplate/include/sourcemeta/core/uritemplate_router.h @@ -0,0 +1,139 @@ +#ifndef SOURCEMETA_CORE_URITEMPLATE_ROUTER_H_ +#define SOURCEMETA_CORE_URITEMPLATE_ROUTER_H_ + +#ifndef SOURCEMETA_CORE_URITEMPLATE_EXPORT +#include +#endif + +#include + +#include // std::uint16_t, std::uint32_t, std::uint8_t +#include // std::filesystem::path +#include // std::function +#include // std::unique_ptr +#include // std::string_view +#include // std::vector + +namespace sourcemeta::core { + +#if defined(_MSC_VER) +#pragma warning(push) +#pragma warning(disable : 4251) +#endif + +/// @ingroup uritemplate +/// A URI Template path router. Keep in mind that the URI Template specification +/// DOES NOT define expansion. So this is an opinionated non-standard adaptation +/// of URI Template for path routing purposes +class SOURCEMETA_CORE_URITEMPLATE_EXPORT URITemplateRouter { +public: + /// A handler identifier 0 means "no handler" + using Identifier = std::uint16_t; + + /// The variable index type + using Index = std::uint8_t; + + /// The match callback (index, name, value) + using Callback = + std::function; + + /// The type of a node in the router trie + enum class NodeType : std::uint8_t { + Root = 0, + Literal = 1, + Variable = 2, + Expansion = 3 + }; + + /// A node in the router trie + struct Node { + Identifier identifier{0}; + NodeType type{NodeType::Root}; + std::string_view value; + + // This children distinction enforces that there can only be one non-literal + // child at the type level. Also allows us to more efficiently search on + // literals + std::vector> literals; + std::unique_ptr variable; + }; + + /// Construct an empty router + URITemplateRouter() = default; + + // To avoid mistakes + URITemplateRouter(const URITemplateRouter &) = delete; + URITemplateRouter(URITemplateRouter &&) = delete; + auto operator=(const URITemplateRouter &) -> URITemplateRouter & = delete; + auto operator=(URITemplateRouter &&) -> URITemplateRouter & = delete; + + /// Add a route to the router. Make sure the string lifetime survives the + /// router + auto add(const std::string_view uri_template, const Identifier identifier) + -> void; + + /// Match a path against the router. Note the callback might fire for + /// initial matches even though the entire match might still fail + [[nodiscard]] auto match(const std::string_view path, + const Callback &callback) const -> Identifier; + + /// Access the root node of the trie + [[nodiscard]] auto root() const noexcept -> const Node &; + +private: + Node root_; +}; + +/// @ingroup uritemplate +/// A read-only memory-mapped view of a serialized URI Template router +class SOURCEMETA_CORE_URITEMPLATE_EXPORT URITemplateRouterView { +public: + /// A serialized node in the binary format +#if defined(_MSC_VER) +#pragma warning(push) +#pragma warning(disable : 4324) +#endif + struct alignas(8) Node { + std::uint32_t string_offset; + std::uint32_t string_length; + std::uint32_t first_literal_child; + std::uint32_t literal_child_count; + std::uint32_t variable_child; + URITemplateRouter::NodeType type; + std::uint8_t padding; + URITemplateRouter::Identifier identifier; + }; +#if defined(_MSC_VER) +#pragma warning(pop) +#endif + + /// Save a router to a binary file + static auto save(const URITemplateRouter &router, + const std::filesystem::path &path) -> void; + + URITemplateRouterView(const std::filesystem::path &path); + + // To avoid mistakes + URITemplateRouterView(const URITemplateRouterView &) = delete; + URITemplateRouterView(URITemplateRouterView &&) = delete; + auto operator=(const URITemplateRouterView &) + -> URITemplateRouterView & = delete; + auto operator=(URITemplateRouterView &&) -> URITemplateRouterView & = delete; + + /// Match a path against the router. Note the callback might fire for + /// initial matches even though the entire match might still fail + [[nodiscard]] auto match(const std::string_view path, + const URITemplateRouter::Callback &callback) const + -> URITemplateRouter::Identifier; + +private: + FileView file_view_; +}; + +#if defined(_MSC_VER) +#pragma warning(pop) +#endif + +} // namespace sourcemeta::core + +#endif diff --git a/vendor/core/src/core/uritemplate/include/sourcemeta/core/uritemplate_token.h b/vendor/core/src/core/uritemplate/include/sourcemeta/core/uritemplate_token.h new file mode 100644 index 00000000..5b7f19bc --- /dev/null +++ b/vendor/core/src/core/uritemplate/include/sourcemeta/core/uritemplate_token.h @@ -0,0 +1,142 @@ +#ifndef SOURCEMETA_CORE_URITEMPLATE_TOKEN_H_ +#define SOURCEMETA_CORE_URITEMPLATE_TOKEN_H_ + +#ifndef SOURCEMETA_CORE_URITEMPLATE_EXPORT +#include +#endif + +#include // std::uint16_t +#include // std::string_view +#include // std::variant +#include // std::vector + +namespace sourcemeta::core { + +// Exporting symbols that depends on the standard C++ library is considered +// safe. +// https://learn.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-2-c4275?view=msvc-170&redirectedfrom=MSDN +#if defined(_MSC_VER) +#pragma warning(push) +#pragma warning(disable : 4251) +#endif + +/// @ingroup uritemplate +/// A literal string segment in a URI Template +struct SOURCEMETA_CORE_URITEMPLATE_EXPORT URITemplateTokenLiteral { + std::string_view value; +}; + +/// @ingroup uritemplate +/// A variable specification within a URI Template expression +struct SOURCEMETA_CORE_URITEMPLATE_EXPORT URITemplateVariableSpecification { + std::string_view name; + // As per the RFC, the range is 1-9999. 0 means "no prefix length" + std::uint16_t length{0}; + bool explode{false}; +}; + +/// @ingroup uritemplate +/// A simple string variable expansion {var} in a URI Template (Level 1) +struct SOURCEMETA_CORE_URITEMPLATE_EXPORT URITemplateTokenVariable { + std::vector variables; + static constexpr char separator = ','; + static constexpr bool named = false; + static constexpr bool allow_reserved = false; +}; + +/// @ingroup uritemplate +/// A reserved expansion {+var} in a URI Template (Level 2) +struct SOURCEMETA_CORE_URITEMPLATE_EXPORT URITemplateTokenReservedExpansion { + std::vector variables; + static constexpr char op = '+'; + static constexpr char separator = ','; + static constexpr bool named = false; + static constexpr bool allow_reserved = true; +}; + +/// @ingroup uritemplate +/// A fragment expansion {#var} in a URI Template (Level 2) +struct SOURCEMETA_CORE_URITEMPLATE_EXPORT URITemplateTokenFragmentExpansion { + std::vector variables; + static constexpr char op = '#'; + static constexpr char separator = ','; + static constexpr char prefix = '#'; + static constexpr bool named = false; + static constexpr bool allow_reserved = true; +}; + +/// @ingroup uritemplate +/// A label expansion {.var} in a URI Template (Level 3) +struct SOURCEMETA_CORE_URITEMPLATE_EXPORT URITemplateTokenLabelExpansion { + std::vector variables; + static constexpr char op = '.'; + static constexpr char separator = '.'; + static constexpr char prefix = '.'; + static constexpr bool named = false; + static constexpr bool allow_reserved = false; +}; + +/// @ingroup uritemplate +/// A path expansion {/var} in a URI Template (Level 3) +struct SOURCEMETA_CORE_URITEMPLATE_EXPORT URITemplateTokenPathExpansion { + std::vector variables; + static constexpr char op = '/'; + static constexpr char separator = '/'; + static constexpr char prefix = '/'; + static constexpr bool named = false; + static constexpr bool allow_reserved = false; +}; + +/// @ingroup uritemplate +/// A path parameter expansion {;var} in a URI Template (Level 3) +struct SOURCEMETA_CORE_URITEMPLATE_EXPORT + URITemplateTokenPathParameterExpansion { + std::vector variables; + static constexpr char op = ';'; + static constexpr char separator = ';'; + static constexpr char prefix = ';'; + static constexpr bool named = true; + static constexpr bool allow_reserved = false; +}; + +/// @ingroup uritemplate +/// A query expansion {?var} in a URI Template (Level 3) +struct SOURCEMETA_CORE_URITEMPLATE_EXPORT URITemplateTokenQueryExpansion { + std::vector variables; + static constexpr char op = '?'; + static constexpr char separator = '&'; + static constexpr char prefix = '?'; + static constexpr bool named = true; + static constexpr bool allow_reserved = false; + static constexpr char empty_suffix = '='; +}; + +/// @ingroup uritemplate +/// A query continuation expansion {&var} in a URI Template (Level 3) +struct SOURCEMETA_CORE_URITEMPLATE_EXPORT + URITemplateTokenQueryContinuationExpansion { + std::vector variables; + static constexpr char op = '&'; + static constexpr char separator = '&'; + static constexpr char prefix = '&'; + static constexpr bool named = true; + static constexpr bool allow_reserved = false; + static constexpr char empty_suffix = '='; +}; + +#if defined(_MSC_VER) +#pragma warning(pop) +#endif + +/// @ingroup uritemplate +/// A token in a parsed URI Template +using URITemplateToken = std::variant< + URITemplateTokenLiteral, URITemplateTokenVariable, + URITemplateTokenReservedExpansion, URITemplateTokenFragmentExpansion, + URITemplateTokenLabelExpansion, URITemplateTokenPathExpansion, + URITemplateTokenPathParameterExpansion, URITemplateTokenQueryExpansion, + URITemplateTokenQueryContinuationExpansion>; + +} // namespace sourcemeta::core + +#endif diff --git a/vendor/core/src/core/uritemplate/uritemplate.cc b/vendor/core/src/core/uritemplate/uritemplate.cc new file mode 100644 index 00000000..16ab6763 --- /dev/null +++ b/vendor/core/src/core/uritemplate/uritemplate.cc @@ -0,0 +1,103 @@ +#include + +#include "helpers.h" + +#include // assert +#include // std::pair +#include // std::vector + +namespace sourcemeta::core { + +template +static auto try_parse(std::string_view &remaining, std::size_t &offset, + std::vector &tokens) -> bool { + if (auto result = parse_expression(remaining)) { + tokens.emplace_back(std::move(result->first)); + remaining.remove_prefix(result->second); + offset += result->second; + return true; + } + + return false; +} + +template +static auto try_parse_any(std::string_view &remaining, std::size_t &offset, + std::vector &tokens) -> bool { + return (try_parse(remaining, offset, tokens) || ...); +} + +URITemplate::URITemplate(const std::string_view source) { + std::string_view remaining{source}; + std::size_t offset = 0; + + while (!remaining.empty()) { + try { + if (!try_parse_any( + remaining, offset, this->tokens_)) { + break; + } + } catch (URITemplateParseError &error) { + throw URITemplateParseError(offset + error.column()); + } + } +} + +auto URITemplate::size() const noexcept -> std::uint64_t { + return static_cast(this->tokens_.size()); +} + +auto URITemplate::empty() const noexcept -> bool { + return this->tokens_.empty(); +} + +auto URITemplate::at(const std::size_t index) const & -> const + URITemplateToken & { + assert(index < this->tokens_.size()); + return this->tokens_[index]; +} + +auto URITemplate::at(const std::size_t index) && -> URITemplateToken { + assert(index < this->tokens_.size()); + return std::move(this->tokens_[index]); +} + +auto URITemplate::begin() const noexcept + -> std::vector::const_iterator { + return this->tokens_.cbegin(); +} + +auto URITemplate::end() const noexcept + -> std::vector::const_iterator { + return this->tokens_.cend(); +} + +auto URITemplate::expand( + const std::function &callback) + const -> std::string { + std::string result; + + for (const auto &token : this->tokens_) { + std::visit( + [&result, &callback](const auto &expansion) { + using T = std::decay_t; + if constexpr (std::is_same_v) { + result += expansion.value; + } else { + expand_expression(result, expansion.variables, callback); + } + }, + token); + } + + return result; +} + +} // namespace sourcemeta::core diff --git a/vendor/core/src/core/uritemplate/uritemplate_router.cc b/vendor/core/src/core/uritemplate/uritemplate_router.cc new file mode 100644 index 00000000..2fedf142 --- /dev/null +++ b/vendor/core/src/core/uritemplate/uritemplate_router.cc @@ -0,0 +1,343 @@ +#include + +#include "helpers.h" + +#include // std::ranges::lower_bound +#include // assert +#include // std::numeric_limits + +namespace sourcemeta::core { + +namespace { + +using Node = URITemplateRouter::Node; +using NodeType = URITemplateRouter::NodeType; + +constexpr auto node_value = + [](const std::unique_ptr &child) -> decltype(auto) { + return child->value; +}; + +auto find_literal_child(const std::vector> &literals, + const std::string_view segment) -> Node * { + const auto iterator = + std::ranges::lower_bound(literals, segment, {}, node_value); + if (iterator != literals.end() && (*iterator)->value == segment) { + return iterator->get(); + } + return nullptr; +} + +auto find_or_create_literal_child(std::vector> &literals, + const std::string_view value) -> Node & { + auto iterator = std::ranges::lower_bound(literals, value, {}, node_value); + if (iterator != literals.end() && (*iterator)->value == value) { + return **iterator; + } + + auto child = std::make_unique(); + child->type = NodeType::Literal; + child->value = value; + auto &result = *child; + literals.insert(iterator, std::move(child)); + return result; +} + +auto find_or_create_variable_child(std::unique_ptr &variable, + const std::string_view name, + const NodeType type) -> Node * { + if (!variable) { + variable = std::make_unique(); + variable->type = type; + variable->value = name; + return variable.get(); + } + + if (variable->value != name) { + throw URITemplateRouterVariableMismatchError{variable->value, name}; + } + + if (type == NodeType::Expansion) { + if (variable->type == NodeType::Variable) { + variable->type = NodeType::Expansion; + return variable.get(); + } + } else if (variable->type == NodeType::Expansion) { + return nullptr; + } + + return variable.get(); +} + +// Find the end of a brace expression (including the closing brace) +inline auto find_expression_end(const char *start, const char *end) -> const + char * { + const char *position = start + 1; + while (position < end && *position != '}') { + ++position; + } + if (position < end) { + ++position; // include the '}' + } + return position; +} + +// Extract the current segment (from segment start to next / or end) +inline auto extract_segment(const char *start, const char *end) + -> std::string_view { + const char *position = start; + while (position < end && *position != '/') { + ++position; + } + return {start, static_cast(position - start)}; +} + +} // namespace + +auto URITemplateRouter::add(const std::string_view uri_template, + const Identifier identifier) -> void { + assert(identifier > 0); + + if (uri_template.empty()) { + this->root_.identifier = identifier; + return; + } + + Node *current = nullptr; + bool absorbed = false; + const char *position = uri_template.data(); + const char *const end = position + uri_template.size(); + + while (position < end && !absorbed) { + while (position < end && *position == '/') { + ++position; + } + + if (position >= end) { + break; + } + + const char *segment_start = position; + + if (*position == '}') { + throw URITemplateRouterInvalidSegmentError{ + "Unmatched closing brace", extract_segment(segment_start, end)}; + } + + if (*position == '{') { + const char *expression_start = position; + const char *expression_end = find_expression_end(position, end); + std::string_view expression{ + expression_start, + static_cast(expression_end - expression_start)}; + + ++position; + + if (position >= end) { + throw URITemplateRouterInvalidSegmentError{"Unclosed brace", + expression}; + } + + NodeType type = NodeType::Variable; + if (*position == '+') { + type = NodeType::Expansion; + ++position; + if (position >= end || *position == '}') { + throw URITemplateRouterInvalidSegmentError{"Empty variable name", + expression}; + } + } else if (is_operator(*position) && *position != '+') { + throw URITemplateRouterInvalidSegmentError{ + "Unsupported URI Template operator", expression}; + } else if (is_reserved_operator(*position)) { + throw URITemplateRouterInvalidSegmentError{ + "Reserved URI Template operator", expression}; + } else if (*position == '{') { + throw URITemplateRouterInvalidSegmentError{ + "Nested opening brace", extract_segment(expression_start, end)}; + } else if (*position == ' ') { + throw URITemplateRouterInvalidSegmentError{"Space before variable name", + expression}; + } else if (*position == '}') { + throw URITemplateRouterInvalidSegmentError{"Empty variable name", + expression}; + } + + const char *varname_start = position; + while (position < end && *position != '}' && *position != ' ' && + !is_modifier(*position) && *position != ',') { + if (!is_varname_char(*position)) { + throw URITemplateRouterInvalidSegmentError{ + "Invalid character in variable name", expression}; + } + ++position; + } + + if (position >= end) { + throw URITemplateRouterInvalidSegmentError{"Unclosed brace", + expression}; + } + + if (*position == ' ') { + throw URITemplateRouterInvalidSegmentError{ + "Space in variable expression", expression}; + } + + if (*position == ':') { + throw URITemplateRouterInvalidSegmentError{ + "Prefix modifier not supported", expression}; + } + + if (*position == '*') { + throw URITemplateRouterInvalidSegmentError{ + "Explode modifier not supported", expression}; + } + + if (*position == ',') { + throw URITemplateRouterInvalidSegmentError{ + "Multiple variables not supported", expression}; + } + + const std::string_view varname{ + varname_start, static_cast(position - varname_start)}; + + ++position; // skip '}' + + if (position < end && *position != '/') { + throw URITemplateRouterInvalidSegmentError{ + "Path segment cannot mix literals and variables", + extract_segment(expression_start, end)}; + } + + if (type == NodeType::Expansion && position < end) { + throw URITemplateRouterInvalidSegmentError{ + "Reserved expansion must be the last segment", expression}; + } + + auto &variable = current ? current->variable : this->root_.variable; + auto *result = find_or_create_variable_child(variable, varname, type); + if (result == nullptr) { + absorbed = true; + } else { + current = result; + } + } else { + while (position < end && *position != '/' && *position != '{') { + if (*position == '}') { + throw URITemplateRouterInvalidSegmentError{ + "Unmatched closing brace", extract_segment(segment_start, end)}; + } + ++position; + } + + if (position < end && *position == '{') { + const char *expr_end = find_expression_end(position, end); + const char *seg_end = expr_end; + while (seg_end < end && *seg_end != '/') { + ++seg_end; + } + throw URITemplateRouterInvalidSegmentError{ + "Path segment cannot mix literals and variables", + std::string_view{segment_start, static_cast( + seg_end - segment_start)}}; + } + + const std::string_view segment{ + segment_start, static_cast(position - segment_start)}; + + auto &literals = current ? current->literals : this->root_.literals; + current = &find_or_create_literal_child(literals, segment); + } + } + + if (current == nullptr && uri_template.size() == 1 && + uri_template[0] == '/') { + current = &find_or_create_literal_child(this->root_.literals, ""); + } + + if (!absorbed && current != nullptr) { + current->identifier = identifier; + } +} + +auto URITemplateRouter::root() const noexcept -> const Node & { + return this->root_; +} + +auto URITemplateRouter::match(const std::string_view path, + const Callback &callback) const -> Identifier { + if (path.empty()) { + return this->root_.identifier; + } + + if (path.size() == 1 && path[0] == '/') { + if (auto *child = find_literal_child(this->root_.literals, "")) { + return child->identifier; + } + return 0; + } + + const Node *current = nullptr; + const char *position = path.data(); + const char *const path_end = position + path.size(); + + const std::vector> *literal_children = + &this->root_.literals; + const std::unique_ptr *variable_child = &this->root_.variable; + + std::size_t variable_index = 0; + + // Skip leading slash + if (position < path_end && *position == '/') { + ++position; + } + + while (true) { + const char *segment_start = position; + while (position < path_end && *position != '/') { + ++position; + } + const std::string_view segment{ + segment_start, static_cast(position - segment_start)}; + + // Empty segment (from double slash or trailing slash) doesn't match + if (segment.empty()) { + return 0; + } + + if (auto *literal_match = find_literal_child(*literal_children, segment)) { + current = literal_match; + } else if (*variable_child) { + assert(variable_index <= + std::numeric_limits::max()); + if ((*variable_child)->type == NodeType::Expansion) { + const std::string_view remaining{ + segment_start, static_cast(path_end - segment_start)}; + callback(static_cast(variable_index), + (*variable_child)->value, remaining); + return (*variable_child)->identifier; + } + callback(static_cast(variable_index), + (*variable_child)->value, segment); + ++variable_index; + current = variable_child->get(); + } else { + return 0; + } + + literal_children = ¤t->literals; + variable_child = ¤t->variable; + + // Check if there's more path + if (position >= path_end) { + break; + } + + // Skip the slash and continue to next segment + ++position; + } + + return current ? current->identifier : this->root_.identifier; +} + +} // namespace sourcemeta::core diff --git a/vendor/core/src/core/uritemplate/uritemplate_router_view.cc b/vendor/core/src/core/uritemplate/uritemplate_router_view.cc new file mode 100644 index 00000000..425e0f34 --- /dev/null +++ b/vendor/core/src/core/uritemplate/uritemplate_router_view.cc @@ -0,0 +1,289 @@ +#include + +#include // assert +#include // std::memcmp +#include // std::ofstream +#include // std::numeric_limits +#include // std::queue +#include // std::string +#include // std::unordered_map +#include // std::vector + +namespace sourcemeta::core { + +namespace { + +constexpr std::uint32_t ROUTER_MAGIC = 0x52544552; // "RTER" +constexpr std::uint32_t ROUTER_VERSION = 1; +constexpr std::uint32_t NO_CHILD = std::numeric_limits::max(); + +struct RouterHeader { + std::uint32_t magic; + std::uint32_t version; + std::uint32_t node_count; + std::uint32_t string_table_offset; +}; + +// Binary search for a literal child matching the given segment +inline auto binary_search_literal_children( + const URITemplateRouterView::Node *nodes, const char *string_table, + const std::uint32_t first_child, const std::uint32_t child_count, + const char *segment, const std::uint32_t segment_length) noexcept + -> std::uint32_t { + std::uint32_t low = 0; + std::uint32_t high = child_count; + + while (low < high) { + const auto middle = low + (high - low) / 2; + const auto child_index = first_child + middle; + const auto &child = nodes[child_index]; + + // Compare segments lexicographically (content first, then length) + const auto min_length = segment_length < child.string_length + ? segment_length + : child.string_length; + const auto content_comparison = + std::memcmp(segment, string_table + child.string_offset, min_length); + const auto comparison = content_comparison != 0 + ? content_comparison + : static_cast(segment_length) - + static_cast(child.string_length); + + if (comparison < 0) { + high = middle; + } else if (comparison > 0) { + low = middle + 1; + } else { + return child_index; + } + } + + return NO_CHILD; +} + +} // namespace + +auto URITemplateRouterView::save(const URITemplateRouter &router, + const std::filesystem::path &path) -> void { + std::vector nodes; + std::string string_table; + std::queue queue; + std::unordered_map + node_indices; + + const auto &root = router.root(); + + Node root_serialized{}; + root_serialized.string_offset = 0; + root_serialized.string_length = 0; + root_serialized.type = URITemplateRouter::NodeType::Root; + root_serialized.padding = 0; + root_serialized.identifier = root.identifier; + + if (root.literals.empty()) { + root_serialized.first_literal_child = NO_CHILD; + root_serialized.literal_child_count = 0; + } else { + root_serialized.first_literal_child = 1; + root_serialized.literal_child_count = + static_cast(root.literals.size()); + for (const auto &child : root.literals) { + node_indices[child.get()] = static_cast(queue.size() + 1); + queue.push(child.get()); + } + } + + if (root.variable) { + root_serialized.variable_child = + static_cast(queue.size() + 1); + node_indices[root.variable.get()] = root_serialized.variable_child; + queue.push(root.variable.get()); + } else { + root_serialized.variable_child = NO_CHILD; + } + + nodes.push_back(root_serialized); + + while (!queue.empty()) { + const auto *node = queue.front(); + queue.pop(); + + Node serialized{}; + serialized.string_offset = static_cast(string_table.size()); + serialized.type = node->type; + serialized.string_length = static_cast(node->value.size()); + string_table += node->value; + + serialized.padding = 0; + serialized.identifier = node->identifier; + + const auto first_child_index = + static_cast(nodes.size() + queue.size() + 1); + + if (!node->literals.empty()) { + serialized.first_literal_child = first_child_index; + serialized.literal_child_count = + static_cast(node->literals.size()); + for (const auto &child : node->literals) { + node_indices[child.get()] = + static_cast(nodes.size() + queue.size() + 1); + queue.push(child.get()); + } + } else { + serialized.first_literal_child = NO_CHILD; + serialized.literal_child_count = 0; + } + + if (node->variable) { + serialized.variable_child = + static_cast(nodes.size() + queue.size() + 1); + node_indices[node->variable.get()] = serialized.variable_child; + queue.push(node->variable.get()); + } else { + serialized.variable_child = NO_CHILD; + } + + nodes.push_back(serialized); + } + + RouterHeader header{}; + header.magic = ROUTER_MAGIC; + header.version = ROUTER_VERSION; + header.node_count = static_cast(nodes.size()); + header.string_table_offset = static_cast( + sizeof(RouterHeader) + nodes.size() * sizeof(Node)); + + std::ofstream file(path, std::ios::binary); + if (!file) { + throw URITemplateRouterSaveError{path, "Failed to open file for writing"}; + } + + file.write(reinterpret_cast(&header), sizeof(header)); + file.write(reinterpret_cast(nodes.data()), + static_cast(nodes.size() * sizeof(Node))); + file.write(string_table.data(), + static_cast(string_table.size())); + + if (!file) { + throw URITemplateRouterSaveError{path, + "Failed to write router data to file"}; + } +} + +URITemplateRouterView::URITemplateRouterView(const std::filesystem::path &path) + : file_view_{path} {} + +auto URITemplateRouterView::match(const std::string_view path, + const URITemplateRouter::Callback &callback) + const -> URITemplateRouter::Identifier { + const auto *header = this->file_view_.as(); + assert(header->magic == ROUTER_MAGIC); + assert(header->version == ROUTER_VERSION); + + const auto *nodes = this->file_view_.as(sizeof(RouterHeader)); + const auto *string_table = + header->string_table_offset < this->file_view_.size() + ? this->file_view_.as(header->string_table_offset) + : nullptr; + + // Empty path matches empty template + if (path.empty()) { + return nodes[0].identifier; + } + + // Root path "/" is stored as an empty literal segment + if (path.size() == 1 && path[0] == '/') { + const auto &root = nodes[0]; + if (root.first_literal_child == NO_CHILD) { + return 0; + } + + const auto match = binary_search_literal_children( + nodes, string_table, root.first_literal_child, root.literal_child_count, + "", 0); + return match != NO_CHILD ? nodes[match].identifier : 0; + } + + // Walk the trie, matching each path segment + std::uint32_t current_node = 0; + const char *position = path.data(); + const char *const path_end = position + path.size(); + + std::size_t variable_index = 0; + + // Skip leading slash + if (position < path_end && *position == '/') { + ++position; + } + + while (true) { + // Extract segment + const char *segment_start = position; + while (position < path_end && *position != '/') { + ++position; + } + + const auto segment_length = + static_cast(position - segment_start); + + // Empty segment (from double slash or trailing slash) doesn't match + if (segment_length == 0) { + return 0; + } + + const auto &node = nodes[current_node]; + + // Try literal children first + if (node.first_literal_child != NO_CHILD) { + const auto literal_match = binary_search_literal_children( + nodes, string_table, node.first_literal_child, + node.literal_child_count, segment_start, segment_length); + if (literal_match != NO_CHILD) { + current_node = literal_match; + if (position >= path_end) { + break; + } + ++position; + continue; + } + } + + // Fall back to variable child + if (node.variable_child != NO_CHILD) { + assert(variable_index <= + std::numeric_limits::max()); + const auto &variable_node = nodes[node.variable_child]; + + // Check if this is an expansion (catch-all) + if (variable_node.type == URITemplateRouter::NodeType::Expansion) { + const auto remaining_length = + static_cast(path_end - segment_start); + callback(static_cast(variable_index), + {string_table + variable_node.string_offset, + variable_node.string_length}, + {segment_start, remaining_length}); + return variable_node.identifier; + } + + // Regular variable - match single segment + callback(static_cast(variable_index), + {string_table + variable_node.string_offset, + variable_node.string_length}, + {segment_start, segment_length}); + ++variable_index; + current_node = node.variable_child; + if (position >= path_end) { + break; + } + ++position; + continue; + } + + // No match + return 0; + } + + return nodes[current_node].identifier; +} + +} // namespace sourcemeta::core diff --git a/vendor/core/src/core/yaml/include/sourcemeta/core/yaml_error.h b/vendor/core/src/core/yaml/include/sourcemeta/core/yaml_error.h index f630fa02..5ac5ff46 100644 --- a/vendor/core/src/core/yaml/include/sourcemeta/core/yaml_error.h +++ b/vendor/core/src/core/yaml/include/sourcemeta/core/yaml_error.h @@ -58,11 +58,11 @@ class SOURCEMETA_CORE_YAML_EXPORT YAMLParseError : public std::exception { class SOURCEMETA_CORE_YAML_EXPORT YAMLUnknownAnchorError : public YAMLParseError { public: - YAMLUnknownAnchorError(std::string anchor_name) + YAMLUnknownAnchorError(const std::string_view anchor_name) : YAMLParseError{"YAML alias references undefined anchor"}, - anchor_name_{std::move(anchor_name)} {} + anchor_name_{anchor_name} {} - [[nodiscard]] auto anchor() const noexcept -> const std::string & { + [[nodiscard]] auto anchor() const noexcept -> std::string_view { return this->anchor_name_; } diff --git a/vendor/core/src/extension/alterschema/CMakeLists.txt b/vendor/core/src/extension/alterschema/CMakeLists.txt index 11ca6a60..506936b5 100644 --- a/vendor/core/src/extension/alterschema/CMakeLists.txt +++ b/vendor/core/src/extension/alterschema/CMakeLists.txt @@ -59,6 +59,7 @@ sourcemeta_library(NAMESPACE sourcemeta PROJECT core NAME alterschema common/unnecessary_allof_ref_wrapper_draft.h common/unnecessary_allof_ref_wrapper_modern.h common/unnecessary_allof_wrapper.h + common/unsatisfiable_in_place_applicator_type.h # Linter linter/additional_properties_default.h diff --git a/vendor/core/src/extension/alterschema/alterschema.cc b/vendor/core/src/extension/alterschema/alterschema.cc index 4e87f439..bd2af602 100644 --- a/vendor/core/src/extension/alterschema/alterschema.cc +++ b/vendor/core/src/extension/alterschema/alterschema.cc @@ -87,6 +87,7 @@ inline auto APPLIES_TO_POINTERS(std::vector &&keywords) #include "common/unnecessary_allof_ref_wrapper_draft.h" #include "common/unnecessary_allof_ref_wrapper_modern.h" #include "common/unnecessary_allof_wrapper.h" +#include "common/unsatisfiable_in_place_applicator_type.h" // Linter #include "linter/additional_properties_default.h" @@ -140,6 +141,7 @@ auto add(SchemaTransformer &bundle, const AlterSchemaMode mode) -> void { bundle.add(); bundle.add(); bundle.add(); + bundle.add(); bundle.add(); bundle.add(); bundle.add(); diff --git a/vendor/core/src/extension/alterschema/common/content_schema_without_media_type.h b/vendor/core/src/extension/alterschema/common/content_schema_without_media_type.h index f6d627b2..dc26b817 100644 --- a/vendor/core/src/extension/alterschema/common/content_schema_without_media_type.h +++ b/vendor/core/src/extension/alterschema/common/content_schema_without_media_type.h @@ -10,8 +10,8 @@ class ContentSchemaWithoutMediaType final : public SchemaTransformRule { condition(const sourcemeta::core::JSON &schema, const sourcemeta::core::JSON &, const sourcemeta::core::Vocabularies &vocabularies, - const sourcemeta::core::SchemaFrame &, - const sourcemeta::core::SchemaFrame::Location &, + const sourcemeta::core::SchemaFrame &frame, + const sourcemeta::core::SchemaFrame::Location &location, const sourcemeta::core::SchemaWalker &, const sourcemeta::core::SchemaResolver &) const -> sourcemeta::core::SchemaTransformRule::Result override { @@ -20,6 +20,8 @@ class ContentSchemaWithoutMediaType final : public SchemaTransformRule { Vocabularies::Known::JSON_Schema_2019_09_Content}) && schema.is_object() && schema.defines("contentSchema") && !schema.defines("contentMediaType")); + ONLY_CONTINUE_IF(!frame.has_references_through( + location.pointer.concat({"contentSchema"}))); return APPLIES_TO_KEYWORDS("contentSchema"); } diff --git a/vendor/core/src/extension/alterschema/common/duplicate_allof_branches.h b/vendor/core/src/extension/alterschema/common/duplicate_allof_branches.h index 0747e016..cf39f146 100644 --- a/vendor/core/src/extension/alterschema/common/duplicate_allof_branches.h +++ b/vendor/core/src/extension/alterschema/common/duplicate_allof_branches.h @@ -31,11 +31,44 @@ class DuplicateAllOfBranches final : public SchemaTransformRule { } auto transform(JSON &schema, const Result &) const -> void override { - auto collection = schema.at("allOf"); - std::sort(collection.as_array().begin(), collection.as_array().end()); - auto last = - std::unique(collection.as_array().begin(), collection.as_array().end()); - collection.erase(last, collection.as_array().end()); - schema.at("allOf").into(std::move(collection)); + this->index_mapping_.clear(); + const auto &original{schema.at("allOf")}; + + std::unordered_map, std::size_t, + HashJSON>, + EqualJSON>> + seen; + auto result{JSON::make_array()}; + + for (std::size_t index = 0; index < original.size(); ++index) { + const auto &value{original.at(index)}; + const auto match{seen.find(std::cref(value))}; + + if (match == seen.end()) { + this->index_mapping_[index] = seen.size(); + seen.emplace(std::cref(value), seen.size()); + result.push_back(value); + } else { + this->index_mapping_[index] = match->second; + } + } + + schema.assign("allOf", std::move(result)); } + + [[nodiscard]] auto rereference(const std::string_view, const Pointer &, + const Pointer &target, + const Pointer ¤t) const + -> Pointer override { + const auto allof_prefix{current.concat({"allOf"})}; + const auto relative{target.resolve_from(allof_prefix)}; + const auto old_index{relative.at(0).to_index()}; + const auto new_index{this->index_mapping_.at(old_index)}; + const Pointer old_prefix{allof_prefix.concat({old_index})}; + const Pointer new_prefix{allof_prefix.concat({new_index})}; + return target.rebase(old_prefix, new_prefix); + } + +private: + mutable std::unordered_map index_mapping_; }; diff --git a/vendor/core/src/extension/alterschema/common/duplicate_anyof_branches.h b/vendor/core/src/extension/alterschema/common/duplicate_anyof_branches.h index 512f6f96..1465628b 100644 --- a/vendor/core/src/extension/alterschema/common/duplicate_anyof_branches.h +++ b/vendor/core/src/extension/alterschema/common/duplicate_anyof_branches.h @@ -31,11 +31,44 @@ class DuplicateAnyOfBranches final : public SchemaTransformRule { } auto transform(JSON &schema, const Result &) const -> void override { - auto collection = schema.at("anyOf"); - std::sort(collection.as_array().begin(), collection.as_array().end()); - auto last = - std::unique(collection.as_array().begin(), collection.as_array().end()); - collection.erase(last, collection.as_array().end()); - schema.at("anyOf").into(std::move(collection)); + this->index_mapping_.clear(); + const auto &original{schema.at("anyOf")}; + + std::unordered_map, std::size_t, + HashJSON>, + EqualJSON>> + seen; + auto result{JSON::make_array()}; + + for (std::size_t index = 0; index < original.size(); ++index) { + const auto &value{original.at(index)}; + const auto match{seen.find(std::cref(value))}; + + if (match == seen.end()) { + this->index_mapping_[index] = seen.size(); + seen.emplace(std::cref(value), seen.size()); + result.push_back(value); + } else { + this->index_mapping_[index] = match->second; + } + } + + schema.assign("anyOf", std::move(result)); } + + [[nodiscard]] auto rereference(const std::string_view, const Pointer &, + const Pointer &target, + const Pointer ¤t) const + -> Pointer override { + const auto anyof_prefix{current.concat({"anyOf"})}; + const auto relative{target.resolve_from(anyof_prefix)}; + const auto old_index{relative.at(0).to_index()}; + const auto new_index{this->index_mapping_.at(old_index)}; + const Pointer old_prefix{anyof_prefix.concat({old_index})}; + const Pointer new_prefix{anyof_prefix.concat({new_index})}; + return target.rebase(old_prefix, new_prefix); + } + +private: + mutable std::unordered_map index_mapping_; }; diff --git a/vendor/core/src/extension/alterschema/common/else_empty.h b/vendor/core/src/extension/alterschema/common/else_empty.h index 1dfb6e7b..4b7f406b 100644 --- a/vendor/core/src/extension/alterschema/common/else_empty.h +++ b/vendor/core/src/extension/alterschema/common/else_empty.h @@ -7,7 +7,7 @@ class ElseEmpty final : public SchemaTransformRule { [[nodiscard]] auto condition(const JSON &schema, const JSON &, const Vocabularies &vocabularies, - const SchemaFrame &, const SchemaFrame::Location &, + const SchemaFrame &frame, const SchemaFrame::Location &location, const SchemaWalker &, const SchemaResolver &) const -> SchemaTransformRule::Result override { ONLY_CONTINUE_IF( @@ -20,6 +20,8 @@ class ElseEmpty final : public SchemaTransformRule { (schema.at("else").is_object() || (!schema.defines("if") || !(schema.at("if").is_boolean() && schema.at("if").to_boolean())))); + ONLY_CONTINUE_IF( + !frame.has_references_through(location.pointer.concat({"else"}))); return APPLIES_TO_KEYWORDS("else"); } diff --git a/vendor/core/src/extension/alterschema/common/else_without_if.h b/vendor/core/src/extension/alterschema/common/else_without_if.h index da3349c5..5039ec10 100644 --- a/vendor/core/src/extension/alterschema/common/else_without_if.h +++ b/vendor/core/src/extension/alterschema/common/else_without_if.h @@ -9,8 +9,8 @@ class ElseWithoutIf final : public SchemaTransformRule { condition(const sourcemeta::core::JSON &schema, const sourcemeta::core::JSON &, const sourcemeta::core::Vocabularies &vocabularies, - const sourcemeta::core::SchemaFrame &, - const sourcemeta::core::SchemaFrame::Location &, + const sourcemeta::core::SchemaFrame &frame, + const sourcemeta::core::SchemaFrame::Location &location, const sourcemeta::core::SchemaWalker &, const sourcemeta::core::SchemaResolver &) const -> sourcemeta::core::SchemaTransformRule::Result override { @@ -20,6 +20,8 @@ class ElseWithoutIf final : public SchemaTransformRule { Vocabularies::Known::JSON_Schema_Draft_7}) && schema.is_object() && schema.defines("else") && !schema.defines("if")); + ONLY_CONTINUE_IF( + !frame.has_references_through(location.pointer.concat({"else"}))); return APPLIES_TO_KEYWORDS("else"); } diff --git a/vendor/core/src/extension/alterschema/common/if_without_then_else.h b/vendor/core/src/extension/alterschema/common/if_without_then_else.h index a767054e..923fe117 100644 --- a/vendor/core/src/extension/alterschema/common/if_without_then_else.h +++ b/vendor/core/src/extension/alterschema/common/if_without_then_else.h @@ -10,8 +10,8 @@ class IfWithoutThenElse final : public SchemaTransformRule { condition(const sourcemeta::core::JSON &schema, const sourcemeta::core::JSON &, const sourcemeta::core::Vocabularies &vocabularies, - const sourcemeta::core::SchemaFrame &, - const sourcemeta::core::SchemaFrame::Location &, + const sourcemeta::core::SchemaFrame &frame, + const sourcemeta::core::SchemaFrame::Location &location, const sourcemeta::core::SchemaWalker &, const sourcemeta::core::SchemaResolver &) const -> sourcemeta::core::SchemaTransformRule::Result override { @@ -21,6 +21,8 @@ class IfWithoutThenElse final : public SchemaTransformRule { Vocabularies::Known::JSON_Schema_Draft_7}) && schema.is_object() && schema.defines("if") && !schema.defines("then") && !schema.defines("else")); + ONLY_CONTINUE_IF( + !frame.has_references_through(location.pointer.concat({"if"}))); return APPLIES_TO_KEYWORDS("if"); } diff --git a/vendor/core/src/extension/alterschema/common/ignored_metaschema.h b/vendor/core/src/extension/alterschema/common/ignored_metaschema.h index 8831cff1..4fa07a18 100644 --- a/vendor/core/src/extension/alterschema/common/ignored_metaschema.h +++ b/vendor/core/src/extension/alterschema/common/ignored_metaschema.h @@ -18,8 +18,8 @@ class IgnoredMetaschema final : public SchemaTransformRule { ONLY_CONTINUE_IF(schema.is_object() && schema.defines("$schema") && schema.at("$schema").is_string()); const auto dialect{sourcemeta::core::dialect(schema)}; - ONLY_CONTINUE_IF(dialect.has_value()); - ONLY_CONTINUE_IF(dialect.value() != location.dialect); + ONLY_CONTINUE_IF(!dialect.empty()); + ONLY_CONTINUE_IF(dialect != location.dialect); return APPLIES_TO_KEYWORDS("$schema"); } diff --git a/vendor/core/src/extension/alterschema/common/non_applicable_additional_items.h b/vendor/core/src/extension/alterschema/common/non_applicable_additional_items.h index 84317b21..d1a9b2b2 100644 --- a/vendor/core/src/extension/alterschema/common/non_applicable_additional_items.h +++ b/vendor/core/src/extension/alterschema/common/non_applicable_additional_items.h @@ -10,8 +10,8 @@ class NonApplicableAdditionalItems final : public SchemaTransformRule { condition(const sourcemeta::core::JSON &schema, const sourcemeta::core::JSON &, const sourcemeta::core::Vocabularies &vocabularies, - const sourcemeta::core::SchemaFrame &, - const sourcemeta::core::SchemaFrame::Location &, + const sourcemeta::core::SchemaFrame &frame, + const sourcemeta::core::SchemaFrame::Location &location, const sourcemeta::core::SchemaWalker &, const sourcemeta::core::SchemaResolver &) const -> sourcemeta::core::SchemaTransformRule::Result override { @@ -23,6 +23,9 @@ class NonApplicableAdditionalItems final : public SchemaTransformRule { Vocabularies::Known::JSON_Schema_Draft_3}) && schema.is_object() && schema.defines("additionalItems")); + ONLY_CONTINUE_IF(!frame.has_references_through( + location.pointer.concat({"additionalItems"}))); + if (schema.defines("items") && is_schema(schema.at("items"))) { return APPLIES_TO_KEYWORDS("additionalItems", "items"); } else if (!schema.defines("items")) { diff --git a/vendor/core/src/extension/alterschema/common/non_applicable_type_specific_keywords.h b/vendor/core/src/extension/alterschema/common/non_applicable_type_specific_keywords.h index 4d1445ac..0c90c8ce 100644 --- a/vendor/core/src/extension/alterschema/common/non_applicable_type_specific_keywords.h +++ b/vendor/core/src/extension/alterschema/common/non_applicable_type_specific_keywords.h @@ -9,8 +9,8 @@ class NonApplicableTypeSpecificKeywords final : public SchemaTransformRule { condition(const sourcemeta::core::JSON &schema, const sourcemeta::core::JSON &, const sourcemeta::core::Vocabularies &vocabularies, - const sourcemeta::core::SchemaFrame &, - const sourcemeta::core::SchemaFrame::Location &, + const sourcemeta::core::SchemaFrame &frame, + const sourcemeta::core::SchemaFrame::Location &location, const sourcemeta::core::SchemaWalker &walker, const sourcemeta::core::SchemaResolver &) const -> sourcemeta::core::SchemaTransformRule::Result override { @@ -73,6 +73,12 @@ class NonApplicableTypeSpecificKeywords final : public SchemaTransformRule { // If none of the types that the keyword applies to is a valid // type for the current schema, then by definition we can remove it if ((metadata.instances & current_types).none()) { + // Skip keywords that have references pointing to them + if (frame.has_references_through( + location.pointer.concat({entry.first}))) { + continue; + } + positions.push_back(Pointer{entry.first}); } } diff --git a/vendor/core/src/extension/alterschema/common/not_false.h b/vendor/core/src/extension/alterschema/common/not_false.h index 5c021943..65764398 100644 --- a/vendor/core/src/extension/alterschema/common/not_false.h +++ b/vendor/core/src/extension/alterschema/common/not_false.h @@ -8,7 +8,7 @@ class NotFalse final : public SchemaTransformRule { [[nodiscard]] auto condition(const JSON &schema, const JSON &, const Vocabularies &vocabularies, - const SchemaFrame &, const SchemaFrame::Location &, + const SchemaFrame &frame, const SchemaFrame::Location &location, const SchemaWalker &, const SchemaResolver &) const -> SchemaTransformRule::Result override { ONLY_CONTINUE_IF(vocabularies.contains_any( @@ -20,6 +20,8 @@ class NotFalse final : public SchemaTransformRule { schema.is_object() && schema.defines("not") && schema.at("not").is_boolean() && !schema.at("not").to_boolean()); + ONLY_CONTINUE_IF( + !frame.has_references_through(location.pointer.concat({"not"}))); return APPLIES_TO_KEYWORDS("not"); } diff --git a/vendor/core/src/extension/alterschema/common/orphan_definitions.h b/vendor/core/src/extension/alterschema/common/orphan_definitions.h index 812a7dc9..0ff2e3e6 100644 --- a/vendor/core/src/extension/alterschema/common/orphan_definitions.h +++ b/vendor/core/src/extension/alterschema/common/orphan_definitions.h @@ -15,6 +15,7 @@ class OrphanDefinitions final : public SchemaTransformRule { const sourcemeta::core::SchemaWalker &, const sourcemeta::core::SchemaResolver &) const -> sourcemeta::core::SchemaTransformRule::Result override { + ONLY_CONTINUE_IF(schema.is_object()); const bool has_modern_core{ vocabularies.contains(Vocabularies::Known::JSON_Schema_2020_12_Core) || vocabularies.contains(Vocabularies::Known::JSON_Schema_2019_09_Core)}; @@ -22,19 +23,41 @@ class OrphanDefinitions final : public SchemaTransformRule { vocabularies.contains(Vocabularies::Known::JSON_Schema_Draft_7) || vocabularies.contains(Vocabularies::Known::JSON_Schema_Draft_6) || vocabularies.contains(Vocabularies::Known::JSON_Schema_Draft_4)}; + const bool has_defs{has_modern_core && schema.defines("$defs")}; + const bool has_definitions{(has_modern_core || has_draft_definitions) && + schema.defines("definitions")}; + ONLY_CONTINUE_IF(has_defs || has_definitions); - ONLY_CONTINUE_IF(has_modern_core || has_draft_definitions); - ONLY_CONTINUE_IF(schema.is_object()); + const auto prefix_size{location.pointer.size()}; + bool has_external_to_defs{false}; + bool has_external_to_definitions{false}; + std::unordered_set outside_referenced_defs; + std::unordered_set outside_referenced_definitions; - std::vector orphans; + for (const auto &[key, reference] : frame.references()) { + const auto destination_location{frame.traverse(reference.destination)}; + if (destination_location.has_value()) { + if (has_defs) { + process_reference(key.second, destination_location->get().pointer, + location.pointer, prefix_size, "$defs", + has_external_to_defs, outside_referenced_defs); + } - if (has_modern_core) { - collect_orphans(frame, location, schema, "$defs", orphans); + if (has_definitions) { + process_reference(key.second, destination_location->get().pointer, + location.pointer, prefix_size, "definitions", + has_external_to_definitions, + outside_referenced_definitions); + } + } } - if (has_modern_core || has_draft_definitions) { - collect_orphans(frame, location, schema, "definitions", orphans); - } + std::vector orphans; + collect_orphans(schema, "$defs", has_defs, has_external_to_defs, + outside_referenced_defs, orphans); + collect_orphans(schema, "definitions", has_definitions, + has_external_to_definitions, outside_referenced_definitions, + orphans); ONLY_CONTINUE_IF(!orphans.empty()); return APPLIES_TO_POINTERS(std::move(orphans)); @@ -55,19 +78,43 @@ class OrphanDefinitions final : public SchemaTransformRule { private: static auto - collect_orphans(const sourcemeta::core::SchemaFrame &frame, - const sourcemeta::core::SchemaFrame::Location &root, - const JSON &schema, const JSON::String &container, - std::vector &orphans) -> void { - if (!schema.defines(container) || !schema.at(container).is_object()) { + process_reference(const Pointer &source_pointer, + const Pointer &destination_pointer, const Pointer &prefix, + const std::size_t prefix_size, std::string_view container, + bool &has_external, + std::unordered_set &referenced) -> void { + if (!destination_pointer.starts_with(prefix, container) || + destination_pointer.size() <= prefix_size + 1) { return; } - for (const auto &entry : schema.at(container).as_object()) { - auto entry_pointer{Pointer{container, entry.first}}; - const auto &entry_location{frame.traverse(root, entry_pointer)}; - if (frame.instance_locations(entry_location).empty()) { - orphans.push_back(std::move(entry_pointer)); + const auto &entry_token{destination_pointer.at(prefix_size + 1)}; + if (entry_token.is_property()) { + const auto &entry_name{entry_token.to_property()}; + if (!source_pointer.starts_with(prefix, container)) { + has_external = true; + referenced.insert(entry_name); + } else if (!source_pointer.starts_with(prefix, container, entry_name)) { + referenced.insert(entry_name); + } + } + } + + static auto + collect_orphans(const JSON &schema, const JSON::String &container, + const bool has_container, const bool has_external_reference, + const std::unordered_set &referenced, + std::vector &orphans) -> void { + if (has_container) { + const auto &maybe_object{schema.at(container)}; + if (maybe_object.is_object()) { + // If no external references to container, all definitions are orphans + // Otherwise, only unreferenced definitions are orphans + for (const auto &entry : maybe_object.as_object()) { + if (!has_external_reference || !referenced.contains(entry.first)) { + orphans.push_back(Pointer{container, entry.first}); + } + } } } } diff --git a/vendor/core/src/extension/alterschema/common/required_properties_in_properties.h b/vendor/core/src/extension/alterschema/common/required_properties_in_properties.h index 7be8acca..53e8baeb 100644 --- a/vendor/core/src/extension/alterschema/common/required_properties_in_properties.h +++ b/vendor/core/src/extension/alterschema/common/required_properties_in_properties.h @@ -77,11 +77,12 @@ class RequiredPropertiesInProperties final : public SchemaTransformRule { const SchemaResolver &resolver, const JSON::String &property) const -> bool { if (location.parent.has_value()) { + const auto parent_pointer{to_pointer(location.parent.value())}; const auto relative_pointer{ - location.pointer.resolve_from(location.parent.value())}; + location.pointer.resolve_from(parent_pointer)}; assert(!relative_pointer.empty() && relative_pointer.at(0).is_property()); const auto parent{ - frame.traverse(frame.uri(location.parent.value()).value().get())}; + frame.traverse(frame.uri(parent_pointer).value().get())}; assert(parent.has_value()); const auto type{walker(relative_pointer.at(0).to_property(), frame.vocabularies(parent.value().get(), resolver)) diff --git a/vendor/core/src/extension/alterschema/common/then_empty.h b/vendor/core/src/extension/alterschema/common/then_empty.h index 77baf134..832c9f78 100644 --- a/vendor/core/src/extension/alterschema/common/then_empty.h +++ b/vendor/core/src/extension/alterschema/common/then_empty.h @@ -7,7 +7,7 @@ class ThenEmpty final : public SchemaTransformRule { [[nodiscard]] auto condition(const JSON &schema, const JSON &, const Vocabularies &vocabularies, - const SchemaFrame &, const SchemaFrame::Location &, + const SchemaFrame &frame, const SchemaFrame::Location &location, const SchemaWalker &, const SchemaResolver &) const -> SchemaTransformRule::Result override { ONLY_CONTINUE_IF( @@ -20,6 +20,8 @@ class ThenEmpty final : public SchemaTransformRule { (schema.at("then").is_object() || (!schema.defines("if") || !(schema.at("if").is_boolean() && schema.at("if").to_boolean())))); + ONLY_CONTINUE_IF( + !frame.has_references_through(location.pointer.concat({"then"}))); return APPLIES_TO_KEYWORDS("then"); } diff --git a/vendor/core/src/extension/alterschema/common/then_without_if.h b/vendor/core/src/extension/alterschema/common/then_without_if.h index b1e3c5ab..86354ad6 100644 --- a/vendor/core/src/extension/alterschema/common/then_without_if.h +++ b/vendor/core/src/extension/alterschema/common/then_without_if.h @@ -9,8 +9,8 @@ class ThenWithoutIf final : public SchemaTransformRule { condition(const sourcemeta::core::JSON &schema, const sourcemeta::core::JSON &, const sourcemeta::core::Vocabularies &vocabularies, - const sourcemeta::core::SchemaFrame &, - const sourcemeta::core::SchemaFrame::Location &, + const sourcemeta::core::SchemaFrame &frame, + const sourcemeta::core::SchemaFrame::Location &location, const sourcemeta::core::SchemaWalker &, const sourcemeta::core::SchemaResolver &) const -> sourcemeta::core::SchemaTransformRule::Result override { @@ -20,6 +20,8 @@ class ThenWithoutIf final : public SchemaTransformRule { Vocabularies::Known::JSON_Schema_Draft_7}) && schema.is_object() && schema.defines("then") && !schema.defines("if")); + ONLY_CONTINUE_IF( + !frame.has_references_through(location.pointer.concat({"then"}))); return APPLIES_TO_KEYWORDS("then"); } diff --git a/vendor/core/src/extension/alterschema/common/unknown_local_ref.h b/vendor/core/src/extension/alterschema/common/unknown_local_ref.h index fea9ab97..8bb1b6b1 100644 --- a/vendor/core/src/extension/alterschema/common/unknown_local_ref.h +++ b/vendor/core/src/extension/alterschema/common/unknown_local_ref.h @@ -27,26 +27,23 @@ class UnknownLocalRef final : public SchemaTransformRule { // Find the keyword location entry const auto absolute_ref_pointer{location.pointer.concat({"$ref"})}; - const auto reference_entry{frame.references().find( - {SchemaReferenceType::Static, absolute_ref_pointer})}; - ONLY_CONTINUE_IF(reference_entry != frame.references().end()); + const auto reference_entry{ + frame.reference(SchemaReferenceType::Static, absolute_ref_pointer)}; + ONLY_CONTINUE_IF(reference_entry.has_value()); // If the keyword has no fragment, continue - const auto &reference_fragment{reference_entry->second.fragment}; + const auto &reference_fragment{reference_entry->get().fragment}; ONLY_CONTINUE_IF(reference_fragment.has_value()); // Only continue if the reference target does not exist - const auto target_location{frame.locations().find( - {SchemaReferenceType::Static, reference_entry->second.destination})}; - ONLY_CONTINUE_IF(target_location == frame.locations().end()); + ONLY_CONTINUE_IF( + !frame.traverse(reference_entry->get().destination).has_value()); // If there is a base beyond the fragment, the base must exist. // Otherwise it is likely an external reference? - const auto &reference_base{reference_entry->second.base}; - if (reference_base.has_value()) { - const auto base_location{frame.locations().find( - {SchemaReferenceType::Static, reference_base.value()})}; - ONLY_CONTINUE_IF(base_location != frame.locations().end()); + const auto &reference_base{reference_entry->get().base}; + if (!reference_base.empty()) { + ONLY_CONTINUE_IF(frame.traverse(reference_base).has_value()); } return APPLIES_TO_KEYWORDS("$ref"); diff --git a/vendor/core/src/extension/alterschema/common/unnecessary_allof_wrapper.h b/vendor/core/src/extension/alterschema/common/unnecessary_allof_wrapper.h index 6f9b668e..4a02137b 100644 --- a/vendor/core/src/extension/alterschema/common/unnecessary_allof_wrapper.h +++ b/vendor/core/src/extension/alterschema/common/unnecessary_allof_wrapper.h @@ -7,7 +7,7 @@ class UnnecessaryAllOfWrapper final : public SchemaTransformRule { [[nodiscard]] auto condition(const JSON &schema, const JSON &, const Vocabularies &vocabularies, - const SchemaFrame &, const SchemaFrame::Location &, + const SchemaFrame &frame, const SchemaFrame::Location &location, const SchemaWalker &walker, const SchemaResolver &) const -> SchemaTransformRule::Result override { ONLY_CONTINUE_IF(vocabularies.contains_any( @@ -51,6 +51,12 @@ class UnnecessaryAllOfWrapper final : public SchemaTransformRule { continue; } + // Skip entries that have direct references pointing to them + const auto entry_pointer{location.pointer.concat({"allOf", index - 1})}; + if (frame.has_references_to(entry_pointer)) { + continue; + } + // Skip entries that define their own identity, as elevating keywords // from them could break references that target those anchors if (!this->is_anonymous(entry, vocabularies)) { @@ -111,6 +117,19 @@ class UnnecessaryAllOfWrapper final : public SchemaTransformRule { } } + [[nodiscard]] auto rereference(const std::string_view, const Pointer &, + const Pointer &target, + const Pointer ¤t) const + -> Pointer override { + // The rule moves keywords from /allOf// to / + const auto allof_prefix{current.concat({"allOf"})}; + const auto relative{target.resolve_from(allof_prefix)}; + const auto &keyword{relative.at(1).to_property()}; + const Pointer old_prefix{allof_prefix.concat({relative.at(0), keyword})}; + const Pointer new_prefix{current.concat({keyword})}; + return target.rebase(old_prefix, new_prefix); + } + private: // TODO: Ideally we this information from the frame out of the box [[nodiscard]] auto is_anonymous(const JSON &entry, diff --git a/vendor/core/src/extension/alterschema/common/unsatisfiable_in_place_applicator_type.h b/vendor/core/src/extension/alterschema/common/unsatisfiable_in_place_applicator_type.h new file mode 100644 index 00000000..7c677d39 --- /dev/null +++ b/vendor/core/src/extension/alterschema/common/unsatisfiable_in_place_applicator_type.h @@ -0,0 +1,85 @@ +class UnsatisfiableInPlaceApplicatorType final : public SchemaTransformRule { +public: + UnsatisfiableInPlaceApplicatorType() + : SchemaTransformRule{ + "unsatisfiable_in_place_applicator_type", + "An in-place applicator branch that defines a `type` with no " + "overlap with the parent `type` can never be satisfied"} {}; + + [[nodiscard]] auto + condition(const sourcemeta::core::JSON &schema, + const sourcemeta::core::JSON &, + const sourcemeta::core::Vocabularies &vocabularies, + const sourcemeta::core::SchemaFrame &, + const sourcemeta::core::SchemaFrame::Location &, + const sourcemeta::core::SchemaWalker &walker, + const sourcemeta::core::SchemaResolver &) const + -> sourcemeta::core::SchemaTransformRule::Result override { + ONLY_CONTINUE_IF(schema.is_object() && schema.defines("type")); + ONLY_CONTINUE_IF(vocabularies.contains_any( + {Vocabularies::Known::JSON_Schema_2020_12_Validation, + Vocabularies::Known::JSON_Schema_2019_09_Validation, + Vocabularies::Known::JSON_Schema_Draft_7, + Vocabularies::Known::JSON_Schema_Draft_6, + Vocabularies::Known::JSON_Schema_Draft_4, + Vocabularies::Known::JSON_Schema_Draft_3, + Vocabularies::Known::JSON_Schema_Draft_2, + Vocabularies::Known::JSON_Schema_Draft_1, + Vocabularies::Known::JSON_Schema_Draft_0})); + const auto parent_types{parse_schema_type(schema.at("type"))}; + + std::vector locations; + + for (const auto &entry : schema.as_object()) { + const auto &keyword{entry.first}; + const auto &keyword_type{walker(keyword, vocabularies).type}; + + if (keyword_type == SchemaKeywordType::ApplicatorElementsInPlace || + keyword_type == SchemaKeywordType::ApplicatorElementsInPlaceSome) { + if (!entry.second.is_array()) { + continue; + } + + const auto &branches{entry.second}; + for (std::size_t index = 0; index < branches.size(); ++index) { + const auto &branch{branches.at(index)}; + if (!branch.is_object() || !branch.defines("type")) { + continue; + } + + const auto branch_types{parse_schema_type(branch.at("type"))}; + if ((parent_types & branch_types).none()) { + locations.push_back(Pointer{keyword, index}); + } + } + } else if (keyword_type == + SchemaKeywordType::ApplicatorValueInPlaceMaybe) { + if (!entry.second.is_object() || !entry.second.defines("type")) { + continue; + } + + const auto branch_types{parse_schema_type(entry.second.at("type"))}; + if ((parent_types & branch_types).none()) { + locations.push_back(Pointer{keyword}); + } + } + } + + ONLY_CONTINUE_IF(!locations.empty()); + return APPLIES_TO_POINTERS(std::move(locations)); + } + + auto transform(JSON &schema, const Result &result) const -> void override { + for (const auto &location : result.locations) { + if (location.size() == 2) { + const auto &keyword{location.at(0).to_property()}; + const auto index{location.at(1).to_index()}; + schema.at(keyword).at(index).into(JSON{false}); + } else { + assert(location.size() == 1); + const auto &keyword{location.at(0).to_property()}; + schema.at(keyword).into(JSON{false}); + } + } + } +}; diff --git a/vendor/core/src/extension/alterschema/linter/additional_properties_default.h b/vendor/core/src/extension/alterschema/linter/additional_properties_default.h index ce81205f..1a54886d 100644 --- a/vendor/core/src/extension/alterschema/linter/additional_properties_default.h +++ b/vendor/core/src/extension/alterschema/linter/additional_properties_default.h @@ -10,8 +10,8 @@ class AdditionalPropertiesDefault final : public SchemaTransformRule { condition(const sourcemeta::core::JSON &schema, const sourcemeta::core::JSON &, const sourcemeta::core::Vocabularies &vocabularies, - const sourcemeta::core::SchemaFrame &, - const sourcemeta::core::SchemaFrame::Location &, + const sourcemeta::core::SchemaFrame &frame, + const sourcemeta::core::SchemaFrame::Location &location, const sourcemeta::core::SchemaWalker &, const sourcemeta::core::SchemaResolver &) const -> sourcemeta::core::SchemaTransformRule::Result override { @@ -32,6 +32,8 @@ class AdditionalPropertiesDefault final : public SchemaTransformRule { schema.at("additionalProperties").to_boolean()) || (schema.at("additionalProperties").is_object() && schema.at("additionalProperties").empty()))); + ONLY_CONTINUE_IF(!frame.has_references_through( + location.pointer.concat({"additionalProperties"}))); return APPLIES_TO_KEYWORDS("additionalProperties"); } diff --git a/vendor/core/src/extension/alterschema/linter/content_schema_default.h b/vendor/core/src/extension/alterschema/linter/content_schema_default.h index 536acbf7..b9795f3b 100644 --- a/vendor/core/src/extension/alterschema/linter/content_schema_default.h +++ b/vendor/core/src/extension/alterschema/linter/content_schema_default.h @@ -10,8 +10,8 @@ class ContentSchemaDefault final : public SchemaTransformRule { condition(const sourcemeta::core::JSON &schema, const sourcemeta::core::JSON &, const sourcemeta::core::Vocabularies &vocabularies, - const sourcemeta::core::SchemaFrame &, - const sourcemeta::core::SchemaFrame::Location &, + const sourcemeta::core::SchemaFrame &frame, + const sourcemeta::core::SchemaFrame::Location &location, const sourcemeta::core::SchemaWalker &, const sourcemeta::core::SchemaResolver &) const -> sourcemeta::core::SchemaTransformRule::Result override { @@ -23,6 +23,8 @@ class ContentSchemaDefault final : public SchemaTransformRule { schema.at("contentSchema").to_boolean()) || (schema.at("contentSchema").is_object() && schema.at("contentSchema").empty()))); + ONLY_CONTINUE_IF(!frame.has_references_through( + location.pointer.concat({"contentSchema"}))); return APPLIES_TO_KEYWORDS("contentSchema"); } diff --git a/vendor/core/src/extension/alterschema/linter/definitions_to_defs.h b/vendor/core/src/extension/alterschema/linter/definitions_to_defs.h index 4571b7ea..9efe13e3 100644 --- a/vendor/core/src/extension/alterschema/linter/definitions_to_defs.h +++ b/vendor/core/src/extension/alterschema/linter/definitions_to_defs.h @@ -26,7 +26,7 @@ class DefinitionsToDefs final : public SchemaTransformRule { schema.rename("definitions", "$defs"); } - [[nodiscard]] auto rereference(const std::string &, const Pointer &, + [[nodiscard]] auto rereference(const std::string_view, const Pointer &, const Pointer &target, const Pointer ¤t) const -> Pointer override { diff --git a/vendor/core/src/extension/alterschema/linter/dependencies_default.h b/vendor/core/src/extension/alterschema/linter/dependencies_default.h index ec5cb10b..28dc2b08 100644 --- a/vendor/core/src/extension/alterschema/linter/dependencies_default.h +++ b/vendor/core/src/extension/alterschema/linter/dependencies_default.h @@ -10,8 +10,8 @@ class DependenciesDefault final : public SchemaTransformRule { condition(const sourcemeta::core::JSON &schema, const sourcemeta::core::JSON &, const sourcemeta::core::Vocabularies &vocabularies, - const sourcemeta::core::SchemaFrame &, - const sourcemeta::core::SchemaFrame::Location &, + const sourcemeta::core::SchemaFrame &frame, + const sourcemeta::core::SchemaFrame::Location &location, const sourcemeta::core::SchemaWalker &, const sourcemeta::core::SchemaResolver &) const -> sourcemeta::core::SchemaTransformRule::Result override { @@ -23,6 +23,8 @@ class DependenciesDefault final : public SchemaTransformRule { schema.is_object() && schema.defines("dependencies") && schema.at("dependencies").is_object() && schema.at("dependencies").empty()); + ONLY_CONTINUE_IF(!frame.has_references_through( + location.pointer.concat({"dependencies"}))); return APPLIES_TO_KEYWORDS("dependencies"); } diff --git a/vendor/core/src/extension/alterschema/linter/items_schema_default.h b/vendor/core/src/extension/alterschema/linter/items_schema_default.h index 1425ab79..cceff192 100644 --- a/vendor/core/src/extension/alterschema/linter/items_schema_default.h +++ b/vendor/core/src/extension/alterschema/linter/items_schema_default.h @@ -9,8 +9,8 @@ class ItemsSchemaDefault final : public SchemaTransformRule { condition(const sourcemeta::core::JSON &schema, const sourcemeta::core::JSON &, const sourcemeta::core::Vocabularies &vocabularies, - const sourcemeta::core::SchemaFrame &, - const sourcemeta::core::SchemaFrame::Location &, + const sourcemeta::core::SchemaFrame &frame, + const sourcemeta::core::SchemaFrame::Location &location, const sourcemeta::core::SchemaWalker &, const sourcemeta::core::SchemaResolver &) const -> sourcemeta::core::SchemaTransformRule::Result override { @@ -29,6 +29,8 @@ class ItemsSchemaDefault final : public SchemaTransformRule { schema.is_object() && schema.defines("items") && ((schema.at("items").is_boolean() && schema.at("items").to_boolean()) || (schema.at("items").is_object() && schema.at("items").empty()))); + ONLY_CONTINUE_IF( + !frame.has_references_through(location.pointer.concat({"items"}))); return APPLIES_TO_KEYWORDS("items"); } diff --git a/vendor/core/src/extension/alterschema/linter/property_names_default.h b/vendor/core/src/extension/alterschema/linter/property_names_default.h index 0bb4fcc8..17529406 100644 --- a/vendor/core/src/extension/alterschema/linter/property_names_default.h +++ b/vendor/core/src/extension/alterschema/linter/property_names_default.h @@ -10,8 +10,8 @@ class PropertyNamesDefault final : public SchemaTransformRule { condition(const sourcemeta::core::JSON &schema, const sourcemeta::core::JSON &, const sourcemeta::core::Vocabularies &vocabularies, - const sourcemeta::core::SchemaFrame &, - const sourcemeta::core::SchemaFrame::Location &, + const sourcemeta::core::SchemaFrame &frame, + const sourcemeta::core::SchemaFrame::Location &location, const sourcemeta::core::SchemaWalker &, const sourcemeta::core::SchemaResolver &) const -> sourcemeta::core::SchemaTransformRule::Result override { @@ -23,6 +23,8 @@ class PropertyNamesDefault final : public SchemaTransformRule { schema.is_object() && schema.defines("propertyNames") && schema.at("propertyNames").is_object() && schema.at("propertyNames").empty()); + ONLY_CONTINUE_IF(!frame.has_references_through( + location.pointer.concat({"propertyNames"}))); return APPLIES_TO_KEYWORDS("propertyNames"); } diff --git a/vendor/core/src/extension/alterschema/linter/simple_properties_identifiers.h b/vendor/core/src/extension/alterschema/linter/simple_properties_identifiers.h index e4e36519..07cf73ef 100644 --- a/vendor/core/src/extension/alterschema/linter/simple_properties_identifiers.h +++ b/vendor/core/src/extension/alterschema/linter/simple_properties_identifiers.h @@ -46,8 +46,10 @@ class SimplePropertiesIdentifiers final : public SchemaTransformRule { } } else { // Skip pre-vocabulary meta-schemas + JSON::String base_with_hash{location.base}; + base_with_hash += '#'; ONLY_CONTINUE_IF(location.base != location.dialect && - (location.base + "#") != location.dialect); + base_with_hash != location.dialect); } std::vector offenders; diff --git a/vendor/core/src/extension/alterschema/linter/unevaluated_items_default.h b/vendor/core/src/extension/alterschema/linter/unevaluated_items_default.h index 23bdebc9..543a6c49 100644 --- a/vendor/core/src/extension/alterschema/linter/unevaluated_items_default.h +++ b/vendor/core/src/extension/alterschema/linter/unevaluated_items_default.h @@ -10,8 +10,8 @@ class UnevaluatedItemsDefault final : public SchemaTransformRule { condition(const sourcemeta::core::JSON &schema, const sourcemeta::core::JSON &, const sourcemeta::core::Vocabularies &vocabularies, - const sourcemeta::core::SchemaFrame &, - const sourcemeta::core::SchemaFrame::Location &, + const sourcemeta::core::SchemaFrame &frame, + const sourcemeta::core::SchemaFrame::Location &location, const sourcemeta::core::SchemaWalker &, const sourcemeta::core::SchemaResolver &) const -> sourcemeta::core::SchemaTransformRule::Result override { @@ -24,6 +24,8 @@ class UnevaluatedItemsDefault final : public SchemaTransformRule { schema.at("unevaluatedItems").to_boolean()) || (schema.at("unevaluatedItems").is_object() && schema.at("unevaluatedItems").empty()))); + ONLY_CONTINUE_IF(!frame.has_references_through( + location.pointer.concat({"unevaluatedItems"}))); return APPLIES_TO_KEYWORDS("unevaluatedItems"); } diff --git a/vendor/core/src/extension/alterschema/linter/unevaluated_properties_default.h b/vendor/core/src/extension/alterschema/linter/unevaluated_properties_default.h index b9dc5bf1..247bbf1c 100644 --- a/vendor/core/src/extension/alterschema/linter/unevaluated_properties_default.h +++ b/vendor/core/src/extension/alterschema/linter/unevaluated_properties_default.h @@ -10,8 +10,8 @@ class UnevaluatedPropertiesDefault final : public SchemaTransformRule { condition(const sourcemeta::core::JSON &schema, const sourcemeta::core::JSON &, const sourcemeta::core::Vocabularies &vocabularies, - const sourcemeta::core::SchemaFrame &, - const sourcemeta::core::SchemaFrame::Location &, + const sourcemeta::core::SchemaFrame &frame, + const sourcemeta::core::SchemaFrame::Location &location, const sourcemeta::core::SchemaWalker &, const sourcemeta::core::SchemaResolver &) const -> sourcemeta::core::SchemaTransformRule::Result override { @@ -24,6 +24,8 @@ class UnevaluatedPropertiesDefault final : public SchemaTransformRule { schema.at("unevaluatedProperties").to_boolean()) || (schema.at("unevaluatedProperties").is_object() && schema.at("unevaluatedProperties").empty()))); + ONLY_CONTINUE_IF(!frame.has_references_through( + location.pointer.concat({"unevaluatedProperties"}))); return APPLIES_TO_KEYWORDS("unevaluatedProperties"); } diff --git a/vendor/core/src/extension/editorschema/editorschema.cc b/vendor/core/src/extension/editorschema/editorschema.cc index 81ac6894..43d6f57d 100644 --- a/vendor/core/src/extension/editorschema/editorschema.cc +++ b/vendor/core/src/extension/editorschema/editorschema.cc @@ -10,8 +10,7 @@ namespace { // See https://arxiv.org/abs/2503.11288 for an academic study of this topic auto top_dynamic_anchor_location( const sourcemeta::core::SchemaFrame &frame, - const sourcemeta::core::Pointer ¤t, - const sourcemeta::core::JSON::String &fragment, + const sourcemeta::core::Pointer ¤t, const std::string_view fragment, const sourcemeta::core::JSON::String &default_uri) -> std::optional { // Get the location object of where we are at the moment @@ -23,15 +22,17 @@ auto top_dynamic_anchor_location( // Try to locate an anchor with the given name on the current base assert(!fragment.starts_with('#')); - const auto anchor_uri{location.base + "#" + fragment}; + sourcemeta::core::JSON::String anchor_uri{location.base}; + anchor_uri += '#'; + anchor_uri += fragment; const auto anchor{frame.traverse(anchor_uri)}; if (location.parent.has_value()) { // If there is a parent resource, keep looking there, but update the default // if the current resource has the dynamic anchor we want - return top_dynamic_anchor_location(frame, location.parent.value(), fragment, - anchor.has_value() ? anchor_uri - : default_uri); + return top_dynamic_anchor_location( + frame, to_pointer(location.parent.value()), fragment, + anchor.has_value() ? anchor_uri : default_uri); // If we are at the top of the schema and it declares the dynamic anchor, we // should use that @@ -51,94 +52,134 @@ auto top_dynamic_anchor_location( namespace sourcemeta::core { +// Collected information about a reference to modify +struct ReferenceChange { + Pointer pointer; + JSON::String new_value; + JSON::String keyword; + bool rename_to_ref; +}; + +// Collected information about a subschema to modify +struct SubschemaChange { + Pointer pointer; + SchemaBaseDialect base_dialect; + bool add_schema_declaration; + bool erase_2020_12_keywords; + bool erase_2019_09_keywords; +}; + auto for_editor(JSON &schema, const SchemaWalker &walker, const SchemaResolver &resolver, - const std::optional &default_dialect) -> void { + std::string_view default_dialect) -> void { // (1) Bring in all of the references bundle(schema, walker, resolver, default_dialect); - // (2) Re-frame before changing anything - SchemaFrame frame{SchemaFrame::Mode::References}; - frame.analyse(schema, walker, resolver, default_dialect); + // (2) Frame the schema and collect all changes we need to make + std::vector reference_changes; + std::vector subschema_changes; + + { + SchemaFrame frame{SchemaFrame::Mode::References}; + frame.analyse(schema, walker, resolver, default_dialect); + + // Collect reference changes + for (const auto &[key, reference] : frame.references()) { + assert(!key.second.empty()); + assert(key.second.back().is_property()); + const auto &keyword{key.second.back().to_property()}; + + if (key.first == SchemaReferenceType::Dynamic) { + if (reference.fragment.has_value()) { + auto destination{top_dynamic_anchor_location( + frame, key.second, reference.fragment.value(), + reference.destination)}; + if (!destination.has_value()) { + continue; + } + + reference_changes.push_back( + {key.second, to_uri(std::move(destination).value()).recompose(), + keyword, true}); + } else { + reference_changes.push_back({key.second, "", keyword, true}); + } + } else { + if (keyword == "$schema") { + const auto uri{frame.uri(key.second)}; + assert(uri.has_value()); + const auto origin{frame.traverse(uri.value().get())}; + assert(origin.has_value()); + reference_changes.push_back( + {key.second, + JSON::String{to_string(origin.value().get().base_dialect)}, + keyword, false}); + continue; + } - // (3) Pre-process all subschemas - for (const auto &entry : frame.locations()) { - if (entry.second.type != SchemaFrame::LocationType::Resource && - entry.second.type != SchemaFrame::LocationType::Subschema) { - continue; + const auto result{frame.traverse(reference.destination)}; + if (result.has_value()) { + const bool should_rename = + keyword == "$dynamicRef" || keyword == "$recursiveRef"; + reference_changes.push_back( + {key.second, to_uri(result.value().get().pointer).recompose(), + keyword, should_rename}); + } else { + reference_changes.push_back( + {key.second, reference.destination, keyword, false}); + } + } } - auto &subschema{get(schema, entry.second.pointer)}; - if (subschema.is_boolean()) { - continue; - } + // Collect subschema changes + for (const auto &entry : frame.locations()) { + if (entry.second.type != SchemaFrame::LocationType::Resource && + entry.second.type != SchemaFrame::LocationType::Subschema) { + continue; + } - // Make sure that the top-level schema ALWAYS has a `$schema` declaration - if (entry.second.pointer.empty() && !subschema.defines("$schema")) { - subschema.assign_assume_new("$schema", JSON{entry.second.base_dialect}); - } + const auto &subschema{get(schema, entry.second.pointer)}; + if (subschema.is_boolean()) { + continue; + } - // Get rid of the keywords we don't want anymore - anonymize(subschema, entry.second.base_dialect); - const auto vocabularies{frame.vocabularies(entry.second, resolver)}; - if (vocabularies.contains(Vocabularies::Known::JSON_Schema_2020_12_Core)) { - subschema.erase_keys({"$vocabulary", "$anchor", "$dynamicAnchor"}); - } else if (vocabularies.contains( - Vocabularies::Known::JSON_Schema_2019_09_Core)) { - subschema.erase_keys({"$vocabulary", "$anchor", "$recursiveAnchor"}); + const bool add_schema = + entry.second.pointer.empty() && !subschema.defines("$schema"); + const auto vocabularies{frame.vocabularies(entry.second, resolver)}; + + subschema_changes.push_back( + {entry.second.pointer, entry.second.base_dialect, add_schema, + vocabularies.contains(Vocabularies::Known::JSON_Schema_2020_12_Core), + vocabularies.contains( + Vocabularies::Known::JSON_Schema_2019_09_Core)}); } } - // (4) Fix-up static and dynamic references - for (const auto &[key, reference] : frame.references()) { - assert(!key.second.empty()); - assert(key.second.back().is_property()); - const auto &keyword{key.second.back().to_property()}; - - if (key.first == SchemaReferenceType::Dynamic) { - if (reference.fragment.has_value()) { - auto destination{top_dynamic_anchor_location(frame, key.second, - reference.fragment.value(), - reference.destination)}; - if (!destination.has_value()) { - continue; - } + // (3) Apply reference changes + for (const auto &change : reference_changes) { + if (!change.new_value.empty()) { + set(schema, change.pointer, JSON{change.new_value}); + } + if (change.rename_to_ref) { + get(schema, change.pointer.initial()).rename(change.keyword, "$ref"); + } + } - set(schema, key.second, - JSON{to_uri(std::move(destination).value()).recompose()}); - } + // (4) Apply subschema changes + for (const auto &change : subschema_changes) { + auto &subschema{get(schema, change.pointer)}; - get(schema, key.second.initial()).rename(keyword, "$ref"); - } else { - // The `$schema` keyword is not allowed to take relative URIs (for - // example, pointers going from the root). Because we remove identifiers, - // the only sane thing we can do here is default it to the base dialect, - // which editors will likely understand - if (keyword == "$schema") { - const auto uri{frame.uri(key.second)}; - assert(uri.has_value()); - const auto origin{frame.traverse(uri.value().get())}; - assert(origin.has_value()); - set(schema, key.second, JSON{origin.value().get().base_dialect}); - continue; - } + if (change.add_schema_declaration) { + subschema.assign_assume_new( + "$schema", JSON{JSON::String{to_string(change.base_dialect)}}); + } - // As we get rid of identifiers, we rephrase every reference to be the URI - // representation of the JSON Pointer to the destination from the root - const auto result{frame.traverse(reference.destination)}; - if (result.has_value()) { - set(schema, key.second, - JSON{to_uri(result.value().get().pointer).recompose()}); - - // If we have a dynamic reference to a static location, - // we can just rename the keyword - if (keyword == "$dynamicRef" || keyword == "$recursiveRef") { - get(schema, key.second.initial()).rename(keyword, "$ref"); - } + anonymize(subschema, change.base_dialect); - } else { - set(schema, key.second, JSON{reference.destination}); - } + if (change.erase_2020_12_keywords) { + subschema.erase_keys({"$vocabulary", "$anchor", "$dynamicAnchor"}); + } else if (change.erase_2019_09_keywords) { + subschema.erase_keys({"$vocabulary", "$anchor", "$recursiveAnchor"}); } } } diff --git a/vendor/core/src/extension/editorschema/include/sourcemeta/core/editorschema.h b/vendor/core/src/extension/editorschema/include/sourcemeta/core/editorschema.h index 21de72f6..716c17a1 100644 --- a/vendor/core/src/extension/editorschema/include/sourcemeta/core/editorschema.h +++ b/vendor/core/src/extension/editorschema/include/sourcemeta/core/editorschema.h @@ -17,8 +17,7 @@ #include #include -#include // std::optional, std::nullopt -#include // std::string +#include // std::string_view namespace sourcemeta::core { @@ -47,9 +46,9 @@ namespace sourcemeta::core { /// sourcemeta::core::schema_resolver); /// ``` SOURCEMETA_CORE_EDITORSCHEMA_EXPORT -auto for_editor( - JSON &schema, const SchemaWalker &walker, const SchemaResolver &resolver, - const std::optional &default_dialect = std::nullopt) -> void; +auto for_editor(JSON &schema, const SchemaWalker &walker, + const SchemaResolver &resolver, + std::string_view default_dialect = "") -> void; } // namespace sourcemeta::core diff --git a/vendor/core/src/extension/options/include/sourcemeta/core/options.h b/vendor/core/src/extension/options/include/sourcemeta/core/options.h index 76aeb9c1..5880f75e 100644 --- a/vendor/core/src/extension/options/include/sourcemeta/core/options.h +++ b/vendor/core/src/extension/options/include/sourcemeta/core/options.h @@ -85,11 +85,11 @@ class SOURCEMETA_CORE_OPTIONS_EXPORT Options { -> void; /// Access the values (if any) set for an option or flag, by its main name - [[nodiscard]] auto at(std::string_view name) const + [[nodiscard]] auto at(const std::string_view name) const -> const std::vector &; /// Check if an option or flag was set, by its main name - [[nodiscard]] auto contains(std::string_view name) const -> bool; + [[nodiscard]] auto contains(const std::string_view name) const -> bool; /// Access the positional arguments, if any [[nodiscard]] auto positional() const diff --git a/vendor/core/src/extension/options/include/sourcemeta/core/options_error.h b/vendor/core/src/extension/options/include/sourcemeta/core/options_error.h index 65367a94..fd8f1a8c 100644 --- a/vendor/core/src/extension/options/include/sourcemeta/core/options_error.h +++ b/vendor/core/src/extension/options/include/sourcemeta/core/options_error.h @@ -8,7 +8,6 @@ #include // std::exception #include // std::string #include // std::string_view -#include // std::move namespace sourcemeta::core { @@ -40,9 +39,11 @@ class SOURCEMETA_CORE_OPTIONS_EXPORT OptionsError : public std::exception { /// This class represents a unknown option error struct SOURCEMETA_CORE_OPTIONS_EXPORT OptionsUnknownOptionError : public OptionsError { - explicit OptionsUnknownOptionError(std::string option) - : OptionsError{"Unknown option"}, option_{std::move(option)} {} - [[nodiscard]] auto option() const -> const auto & { return this->option_; } + explicit OptionsUnknownOptionError(const std::string_view option) + : OptionsError{"Unknown option"}, option_{option} {} + [[nodiscard]] auto option() const noexcept -> std::string_view { + return this->option_; + } private: std::string option_; @@ -52,10 +53,11 @@ struct SOURCEMETA_CORE_OPTIONS_EXPORT OptionsUnknownOptionError /// This class represents a value being passed to a flag struct SOURCEMETA_CORE_OPTIONS_EXPORT OptionsUnexpectedValueFlagError : public OptionsError { - explicit OptionsUnexpectedValueFlagError(std::string option) - : OptionsError{"This flag cannot take a value"}, - option_{std::move(option)} {} - [[nodiscard]] auto option() const -> const auto & { return this->option_; } + explicit OptionsUnexpectedValueFlagError(const std::string_view option) + : OptionsError{"This flag cannot take a value"}, option_{option} {} + [[nodiscard]] auto option() const noexcept -> std::string_view { + return this->option_; + } private: std::string option_; @@ -65,10 +67,11 @@ struct SOURCEMETA_CORE_OPTIONS_EXPORT OptionsUnexpectedValueFlagError /// This class represents a missing value from an option struct SOURCEMETA_CORE_OPTIONS_EXPORT OptionsMissingOptionValueError : public OptionsError { - explicit OptionsMissingOptionValueError(std::string option) - : OptionsError{"This option must take a value"}, - option_{std::move(option)} {} - [[nodiscard]] auto option() const -> const auto & { return this->option_; } + explicit OptionsMissingOptionValueError(const std::string_view option) + : OptionsError{"This option must take a value"}, option_{option} {} + [[nodiscard]] auto option() const noexcept -> std::string_view { + return this->option_; + } private: std::string option_; diff --git a/vendor/core/src/extension/options/options.cc b/vendor/core/src/extension/options/options.cc index 12c0650e..b754701c 100644 --- a/vendor/core/src/extension/options/options.cc +++ b/vendor/core/src/extension/options/options.cc @@ -17,7 +17,7 @@ auto find_canonical_name(const T &aliases, const typename T::key_type &alias) -> const typename T::mapped_type & { const auto iterator{aliases.find(alias)}; if (iterator == aliases.cend()) { - throw sourcemeta::core::OptionsUnknownOptionError(std::string{alias}); + throw sourcemeta::core::OptionsUnknownOptionError(alias); } else { return iterator->second; } @@ -57,14 +57,14 @@ auto Options::flag(std::string &&name, this->flags.emplace(view); } -auto Options::at(std::string_view name) const +auto Options::at(const std::string_view name) const -> const std::vector & { assert(!name.empty()); const auto iterator{this->options_.find(name)}; return iterator == this->options_.cend() ? Options::EMPTY : iterator->second; } -auto Options::contains(std::string_view name) const -> bool { +auto Options::contains(const std::string_view name) const -> bool { return this->options_.contains(name); } @@ -104,7 +104,7 @@ auto Options::parse(const int argc, if (eq == std::string_view::npos) { this->options_[canonical].push_back(token.substr(2)); } else { - throw OptionsUnexpectedValueFlagError(std::string{name}); + throw OptionsUnexpectedValueFlagError(name); } } else if (eq != std::string_view::npos) { this->options_[canonical].push_back(token.substr(eq + 1)); @@ -112,7 +112,7 @@ auto Options::parse(const int argc, this->options_[canonical].emplace_back(next); index += 1; } else { - throw OptionsMissingOptionValueError(std::string{name}); + throw OptionsMissingOptionValueError(name); } // Parse short options @@ -132,7 +132,7 @@ auto Options::parse(const int argc, index += 1; break; } else { - throw OptionsMissingOptionValueError(std::string{name}); + throw OptionsMissingOptionValueError(name); } } diff --git a/vendor/core/src/lang/io/CMakeLists.txt b/vendor/core/src/lang/io/CMakeLists.txt index 1911c620..a890265e 100644 --- a/vendor/core/src/lang/io/CMakeLists.txt +++ b/vendor/core/src/lang/io/CMakeLists.txt @@ -1,4 +1,6 @@ -sourcemeta_library(NAMESPACE sourcemeta PROJECT core NAME io SOURCES io.cc) +sourcemeta_library(NAMESPACE sourcemeta PROJECT core NAME io + PRIVATE_HEADERS error.h fileview.h + SOURCES io.cc io_fileview.cc) if(SOURCEMETA_CORE_INSTALL) sourcemeta_library_install(NAMESPACE sourcemeta PROJECT core NAME io) diff --git a/vendor/core/src/lang/io/include/sourcemeta/core/io.h b/vendor/core/src/lang/io/include/sourcemeta/core/io.h index 821a09f3..ae03a233 100644 --- a/vendor/core/src/lang/io/include/sourcemeta/core/io.h +++ b/vendor/core/src/lang/io/include/sourcemeta/core/io.h @@ -5,6 +5,11 @@ #include #endif +// NOLINTBEGIN(misc-include-cleaner) +#include +#include +// NOLINTEND(misc-include-cleaner) + #include // assert #include // std::filesystem #include // std::basic_ifstream diff --git a/vendor/core/src/lang/io/include/sourcemeta/core/io_error.h b/vendor/core/src/lang/io/include/sourcemeta/core/io_error.h new file mode 100644 index 00000000..2d4fe5d1 --- /dev/null +++ b/vendor/core/src/lang/io/include/sourcemeta/core/io_error.h @@ -0,0 +1,52 @@ +#ifndef SOURCEMETA_CORE_IO_ERROR_H_ +#define SOURCEMETA_CORE_IO_ERROR_H_ + +#ifndef SOURCEMETA_CORE_IO_EXPORT +#include +#endif + +#include // std::exception +#include // std::filesystem::path +#include // std::string +#include // std::string_view +#include // std::move + +namespace sourcemeta::core { + +// Exporting symbols that depends on the standard C++ library is considered +// safe. +// https://learn.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-2-c4275?view=msvc-170&redirectedfrom=MSDN +#if defined(_MSC_VER) +#pragma warning(disable : 4251 4275) +#endif + +/// @ingroup io +/// An error that represents a failure to memory-map a file +class SOURCEMETA_CORE_IO_EXPORT FileViewError : public std::exception { +public: + FileViewError(std::filesystem::path path, const char *message) + : path_{std::move(path)}, message_{message} {} + FileViewError(std::filesystem::path path, std::string message) = delete; + FileViewError(std::filesystem::path path, std::string &&message) = delete; + FileViewError(std::filesystem::path path, std::string_view message) = delete; + + [[nodiscard]] auto what() const noexcept -> const char * override { + return this->message_; + } + + [[nodiscard]] auto path() const noexcept -> const std::filesystem::path & { + return this->path_; + } + +private: + std::filesystem::path path_; + const char *message_; +}; + +#if defined(_MSC_VER) +#pragma warning(default : 4251 4275) +#endif + +} // namespace sourcemeta::core + +#endif diff --git a/vendor/core/src/lang/io/include/sourcemeta/core/io_fileview.h b/vendor/core/src/lang/io/include/sourcemeta/core/io_fileview.h new file mode 100644 index 00000000..4cddf041 --- /dev/null +++ b/vendor/core/src/lang/io/include/sourcemeta/core/io_fileview.h @@ -0,0 +1,67 @@ +#ifndef SOURCEMETA_CORE_IO_FILEVIEW_H_ +#define SOURCEMETA_CORE_IO_FILEVIEW_H_ + +#ifndef SOURCEMETA_CORE_IO_EXPORT +#include +#endif + +#include // assert +#include // std::size_t +#include // std::uint8_t +#include // std::filesystem::path + +namespace sourcemeta::core { + +/// @ingroup io +/// A read-only memory-mapped file. For example: +/// +/// ```cpp +/// #include +/// #include +/// +/// struct Header { +/// std::uint32_t magic; +/// std::uint32_t version; +/// }; +/// +/// sourcemeta::core::FileView view{"/path/to/file.bin"}; +/// const auto *header = view.as
(); +/// assert(header->magic == 0x12345678); +/// ``` +class SOURCEMETA_CORE_IO_EXPORT FileView { +public: + FileView(const std::filesystem::path &path); + ~FileView(); + + // Disable copying and moving + FileView(const FileView &) = delete; + FileView(FileView &&) = delete; + auto operator=(const FileView &) -> FileView & = delete; + auto operator=(FileView &&) -> FileView & = delete; + + /// The size of the memory-mapped data in bytes + [[nodiscard]] auto size() const noexcept -> std::size_t; + + /// Interpret the memory-mapped data as a pointer to T at the given offset. + template + [[nodiscard]] auto as(const std::size_t offset = 0) const noexcept + -> const T * { + assert(offset + sizeof(T) <= this->size_); + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + return reinterpret_cast(this->data_ + offset); + } + +private: + const std::uint8_t *data_{nullptr}; + std::size_t size_{0}; +#if defined(_WIN32) + void *file_handle_{nullptr}; + void *mapping_handle_{nullptr}; +#else + int file_descriptor_{-1}; +#endif +}; + +} // namespace sourcemeta::core + +#endif diff --git a/vendor/core/src/lang/io/io_fileview.cc b/vendor/core/src/lang/io/io_fileview.cc new file mode 100644 index 00000000..20905721 --- /dev/null +++ b/vendor/core/src/lang/io/io_fileview.cc @@ -0,0 +1,105 @@ +#include +#include + +#if defined(_WIN32) +#define WIN32_LEAN_AND_MEAN +#include +#else +#include // open, O_RDONLY +#include // mmap, munmap +#include // fstat +#include // close +#endif + +namespace sourcemeta::core { + +#if defined(_WIN32) + +FileView::FileView(const std::filesystem::path &path) { + this->file_handle_ = + CreateFileW(path.c_str(), GENERIC_READ, FILE_SHARE_READ, nullptr, + OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, nullptr); + if (this->file_handle_ == INVALID_HANDLE_VALUE) { + throw FileViewError(path, "Could not open the file"); + } + + LARGE_INTEGER file_size; + if (GetFileSizeEx(this->file_handle_, &file_size) == 0) { + CloseHandle(this->file_handle_); + throw FileViewError(path, "Could not determine the file size"); + } + this->size_ = static_cast(file_size.QuadPart); + + this->mapping_handle_ = CreateFileMappingW(this->file_handle_, nullptr, + PAGE_READONLY, 0, 0, nullptr); + if (this->mapping_handle_ == nullptr) { + CloseHandle(this->file_handle_); + throw FileViewError(path, "Could not create a file mapping"); + } + + this->data_ = static_cast( + MapViewOfFile(this->mapping_handle_, FILE_MAP_READ, 0, 0, 0)); + if (this->data_ == nullptr) { + CloseHandle(this->mapping_handle_); + CloseHandle(this->file_handle_); + throw FileViewError(path, "Could not map the file into memory"); + } +} + +FileView::~FileView() { + if (this->data_ != nullptr) { + UnmapViewOfFile(this->data_); + } + + if (this->mapping_handle_ != nullptr) { + CloseHandle(this->mapping_handle_); + } + + if (this->file_handle_ != nullptr && + this->file_handle_ != INVALID_HANDLE_VALUE) { + CloseHandle(this->file_handle_); + } +} + +#else + +FileView::FileView(const std::filesystem::path &path) { + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) + this->file_descriptor_ = open(path.c_str(), O_RDONLY); + if (this->file_descriptor_ == -1) { + throw FileViewError(path, "Could not open the file"); + } + + struct stat file_stat; + if (fstat(this->file_descriptor_, &file_stat) != 0) { + close(this->file_descriptor_); + throw FileViewError(path, "Could not determine the file size"); + } + this->size_ = static_cast(file_stat.st_size); + + void *mapped = mmap(nullptr, this->size_, PROT_READ, MAP_PRIVATE, + this->file_descriptor_, 0); + if (mapped == MAP_FAILED) { + close(this->file_descriptor_); + throw FileViewError(path, "Could not map the file into memory"); + } + + this->data_ = static_cast(mapped); +} + +FileView::~FileView() { + if (this->data_ != nullptr && this->size_ > 0) { + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast) + munmap(const_cast(this->data_), this->size_); + } + + if (this->file_descriptor_ != -1) { + close(this->file_descriptor_); + } +} + +#endif + +auto FileView::size() const noexcept -> std::size_t { return this->size_; } + +} // namespace sourcemeta::core diff --git a/vendor/core/src/lang/process/include/sourcemeta/core/process_error.h b/vendor/core/src/lang/process/include/sourcemeta/core/process_error.h index c06ba237..5de68d01 100644 --- a/vendor/core/src/lang/process/include/sourcemeta/core/process_error.h +++ b/vendor/core/src/lang/process/include/sourcemeta/core/process_error.h @@ -27,8 +27,8 @@ namespace sourcemeta::core { class SOURCEMETA_CORE_PROCESS_EXPORT ProcessProgramNotNotFoundError : public std::exception { public: - ProcessProgramNotNotFoundError(std::string program) - : program_{std::move(program)} {} + ProcessProgramNotNotFoundError(const std::string_view program) + : program_{program} {} [[nodiscard]] auto what() const noexcept -> const char * override { return "Could not locate the requested program"; @@ -46,15 +46,13 @@ class SOURCEMETA_CORE_PROCESS_EXPORT ProcessProgramNotNotFoundError /// A spawned process terminated abnormally class SOURCEMETA_CORE_PROCESS_EXPORT ProcessSpawnError : public std::exception { public: - ProcessSpawnError(std::string program, + ProcessSpawnError(const std::string_view program, std::initializer_list arguments) - : program_{std::move(program)}, - arguments_{arguments.begin(), arguments.end()} {} + : program_{program}, arguments_{arguments.begin(), arguments.end()} {} - ProcessSpawnError(std::string program, + ProcessSpawnError(const std::string_view program, std::span arguments) - : program_{std::move(program)}, - arguments_{arguments.begin(), arguments.end()} {} + : program_{program}, arguments_{arguments.begin(), arguments.end()} {} [[nodiscard]] auto what() const noexcept -> const char * override { return "Process terminated abnormally";