Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 73 additions & 6 deletions include/fkYAML/detail/output/serializer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -262,10 +262,10 @@ class basic_serializer {
str += m_tmp_str_buff;
break;
case node_type::STRING: {
bool is_escaped = false;
auto str_val = get_string_node_value(node, is_escaped);
bool needs_quotes = false;
auto str_val = get_string_node_value(node, needs_quotes);

if (is_escaped) {
if (needs_quotes) {
// There's no other token type with escapes than strings.
// Also, escapes must be in double-quoted strings.
str += '\"';
Expand Down Expand Up @@ -366,15 +366,82 @@ class basic_serializer {
return false;
}

/// @brief Scan a string for YAML indicator characters that require quoting.
/// @param[in] str The already escaped string value to be inspected.
/// @return true if the string contains characters that could be interpreted
/// by a YAML parser as structural tokens (e.g., alias, anchor, tag,
/// comment, or flow indicators) or space at the beginning, and therefore
/// must be surrounded with quotes to preserve its semantic meaning;
/// false otherwise.
bool scan_yaml_tokens(const std::string& str) const {
if (str.empty()) {
// Empty plain scalar is technically valid, and escaper already
// decided we don't need quotes for whitespace/escapes.
return false;
}

// The string contains spaces at the beginning or end of the stirng. " data "
if (str.front() == ' ')
{
return true;
}
if (str.back() == ' ')
{
return true;
}

for (char c : str) {
switch (c) {
// Definitely dangerous as plain: will be lexed as alias/anchor/tag/comment
case '*': // alias: *foo
case '&': // anchor: &foo
case '!': // tag
case '#': // comment start
return true;

// Structural indicators; legal in plain scalars in some positions,
// but conservative choice is to quote whenever present.
case '[':
case ']':
case '{':
case '}':
case ',':
case '|':
case '>':
case '@':
case '`':
case '%':
return true;

// “Flow” indicators at beginning of scalar or before space are tricky.
// Being conservative, just quote if they appear anywhere.
case ':':
case '?':
case '-':
return true;

default:
break;
}
}

// No suspicious YAML indicator characters found, can stay plain.
return false;
}

/// @brief Get a string value from the given node and, if necessary, escape its contents.
/// @param[in] node The target string YAML node.
/// @param[out] is_escaped Whether the contents of an output string has been escaped.
/// @param[out] need_quotes Whether the contents of an output string needs to be quoted.
/// @return The (escaped) string node value.
typename BasicNodeType::string_type get_string_node_value(const BasicNodeType& node, bool& is_escaped) {
typename BasicNodeType::string_type get_string_node_value(const BasicNodeType& node, bool& need_quotes) const {
FK_YAML_ASSERT(node.is_string());

const auto& s = node.as_str();
return yaml_escaper::escape(s.c_str(), s.c_str() + s.size(), is_escaped);
auto result = yaml_escaper::escape(s.c_str(), s.c_str() + s.size(), need_quotes);
if (!need_quotes) {
need_quotes = scan_yaml_tokens(result);
}
return result;
} // LCOV_EXCL_LINE

private:
Expand Down
79 changes: 73 additions & 6 deletions single_include/fkYAML/node.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11000,10 +11000,10 @@ class basic_serializer {
str += m_tmp_str_buff;
break;
case node_type::STRING: {
bool is_escaped = false;
auto str_val = get_string_node_value(node, is_escaped);
bool needs_quotes = false;
auto str_val = get_string_node_value(node, needs_quotes);

if (is_escaped) {
if (needs_quotes) {
// There's no other token type with escapes than strings.
// Also, escapes must be in double-quoted strings.
str += '\"';
Expand Down Expand Up @@ -11104,15 +11104,82 @@ class basic_serializer {
return false;
}

/// @brief Scan a string for YAML indicator characters that require quoting.
/// @param[in] str The already escaped string value to be inspected.
/// @return true if the string contains characters that could be interpreted
/// by a YAML parser as structural tokens (e.g., alias, anchor, tag,
/// comment, or flow indicators) or space at the beginning, and therefore
/// must be surrounded with quotes to preserve its semantic meaning;
/// false otherwise.
bool scan_yaml_tokens(const std::string& str) const {
if (str.empty()) {
// Empty plain scalar is technically valid, and escaper already
// decided we don't need quotes for whitespace/escapes.
return false;
}

// The string contains spaces at the beginning or end of the stirng. " data "
if (str.front() == ' ')
{
return true;
}
if (str.back() == ' ')
{
return true;
}

for (char c : str) {
switch (c) {
// Definitely dangerous as plain: will be lexed as alias/anchor/tag/comment
case '*': // alias: *foo
case '&': // anchor: &foo
case '!': // tag
case '#': // comment start
return true;

// Structural indicators; legal in plain scalars in some positions,
// but conservative choice is to quote whenever present.
case '[':
case ']':
case '{':
case '}':
case ',':
case '|':
case '>':
case '@':
case '`':
case '%':
return true;

// “Flow” indicators at beginning of scalar or before space are tricky.
// Being conservative, just quote if they appear anywhere.
case ':':
case '?':
case '-':
return true;

default:
break;
}
}

// No suspicious YAML indicator characters found, can stay plain.
return false;
}

/// @brief Get a string value from the given node and, if necessary, escape its contents.
/// @param[in] node The target string YAML node.
/// @param[out] is_escaped Whether the contents of an output string has been escaped.
/// @param[out] need_quotes Whether the contents of an output string needs to be quoted.
/// @return The (escaped) string node value.
typename BasicNodeType::string_type get_string_node_value(const BasicNodeType& node, bool& is_escaped) {
typename BasicNodeType::string_type get_string_node_value(const BasicNodeType& node, bool& need_quotes) const {
FK_YAML_ASSERT(node.is_string());

const auto& s = node.as_str();
return yaml_escaper::escape(s.c_str(), s.c_str() + s.size(), is_escaped);
auto result = yaml_escaper::escape(s.c_str(), s.c_str() + s.size(), need_quotes);
if (!need_quotes) {
need_quotes = scan_yaml_tokens(result);
}
return result;
} // LCOV_EXCL_LINE

private:
Expand Down
1 change: 1 addition & 0 deletions tests/unit_test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,7 @@ add_executable(
test_utf_encodings.cpp
test_yaml_escaper_class.cpp
test_yaml_version_type.cpp
test_string_serialize_deserialize.cpp
main.cpp
)

Expand Down
55 changes: 55 additions & 0 deletions tests/unit_test/test_string_serialize_deserialize.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
// String serialize/deserialize round-trip tests

#include <catch2/catch.hpp>

#include <fkYAML/node.hpp>

TEST_CASE("String_Serialize_Deserialize_Roundtrip") {
fkyaml::node source = fkyaml::node::mapping();
source["alias"] = "*anchor";
source["anchor"] = "&anchor";
source["tag"] = "!tag";
source["comment"] = "#not-a-comment";
source["brackets"] = "[1,2]";
source["braces"] = "{a: 1}";
source["comma"] = "a,b";
source["pipe"] = "a|b";
source["gt"] = "a>b";
source["at"] = "user@host";
source["backtick"] = "`code`";
source["percent"] = "100%";
source["colon"] = "key:value";
source["question"] = "?maybe";
source["dash"] = "-value";
source["begin_space"] = " value";
source["end_space"] = "value ";
source["space"] = " value ";

// Values that should *not* trigger scane_yaml_tokens
source["plain"] = "just_text";

std::stringstream buff;
buff << source;

fkyaml::node copy;
REQUIRE_NOTHROW(copy = fkyaml::node::deserialize(buff));
REQUIRE(source["alias"] == copy["alias"]);
REQUIRE(source["anchor"] == copy["anchor"]);
REQUIRE(source["tag"] == copy["tag"]);
REQUIRE(source["comment"] == copy["comment"]);
REQUIRE(source["brackets"] == copy["brackets"]);
REQUIRE(source["braces"] == copy["braces"]);
REQUIRE(source["comma"] == copy["comma"]);
REQUIRE(source["pipe"] == copy["pipe"]);
REQUIRE(source["gt"] == copy["gt"]);
REQUIRE(source["at"] == copy["at"]);
REQUIRE(source["backtick"] == copy["backtick"]);
REQUIRE(source["percent"] == copy["percent"]);
REQUIRE(source["colon"] == copy["colon"]);
REQUIRE(source["question"] == copy["question"]);
REQUIRE(source["dash"] == copy["dash"]);
REQUIRE(source["plain"] == copy["plain"]);
REQUIRE(source["begin_space"] == copy["begin_space"]);
REQUIRE(source["end_space"] == copy["end_space"]);
REQUIRE(source["space"] == copy["space"]);
}
Loading