Skip to content

Commit ba92781

Browse files
authored
fix: add identifier-field-ids JSON serialization/deserialization (#451)
Fixes the TODO in json_internal.cc for identifier-field-ids support.
1 parent 18a85e2 commit ba92781

File tree

3 files changed

+69
-5
lines changed

3 files changed

+69
-5
lines changed

src/iceberg/json_internal.cc

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
#include "iceberg/table_properties.h"
4343
#include "iceberg/transform.h"
4444
#include "iceberg/type.h"
45+
#include "iceberg/util/checked_cast.h"
4546
#include "iceberg/util/formatter.h" // IWYU pragma: keep
4647
#include "iceberg/util/json_util_internal.h"
4748
#include "iceberg/util/macros.h"
@@ -309,7 +310,9 @@ nlohmann::json ToJson(const Type& type) {
309310
nlohmann::json ToJson(const Schema& schema) {
310311
nlohmann::json json = ToJson(static_cast<const Type&>(schema));
311312
json[kSchemaId] = schema.schema_id();
312-
// TODO(gangwu): add identifier-field-ids.
313+
if (!schema.IdentifierFieldIds().empty()) {
314+
json[kIdentifierFieldIds] = schema.IdentifierFieldIds();
315+
}
313316
return json;
314317
}
315318

@@ -473,10 +476,21 @@ Result<std::unique_ptr<Schema>> SchemaFromJson(const nlohmann::json& json) {
473476
if (type->type_id() != TypeId::kStruct) [[unlikely]] {
474477
return JsonParseError("Schema must be a struct type, but got {}", SafeDumpJson(json));
475478
}
479+
auto& struct_type = internal::checked_cast<StructType&>(*type);
480+
481+
std::vector<SchemaField> fields;
482+
fields.reserve(struct_type.fields().size());
483+
for (auto& field : struct_type.fields()) {
484+
fields.emplace_back(std::move(field));
485+
}
486+
487+
ICEBERG_ASSIGN_OR_RAISE(
488+
auto identifier_field_ids,
489+
GetJsonValueOrDefault<std::vector<int32_t>>(json, kIdentifierFieldIds));
476490

477-
auto& struct_type = static_cast<StructType&>(*type);
478-
auto schema_id = schema_id_opt.value_or(Schema::kInitialSchemaId);
479-
return FromStructType(std::move(struct_type), schema_id);
491+
return std::make_unique<Schema>(std::move(fields),
492+
schema_id_opt.value_or(Schema::kInitialSchemaId),
493+
std::move(identifier_field_ids));
480494
}
481495

482496
nlohmann::json ToJson(const PartitionField& partition_field) {

src/iceberg/test/metadata_serde_test.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,8 @@ TEST(MetadataSerdeTest, DeserializeV2Valid) {
143143
std::vector<SchemaField>{SchemaField::MakeRequired(1, "x", int64()),
144144
SchemaField::MakeRequired(2, "y", int64()),
145145
SchemaField::MakeRequired(3, "z", int64())},
146-
/*schema_id=*/1);
146+
/*schema_id=*/1,
147+
/*identifier_field_ids=*/std::vector<int32_t>{1, 2});
147148

148149
auto expected_spec_result = PartitionSpec::Make(
149150
/*spec_id=*/0,

src/iceberg/test/schema_json_test.cc

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include "iceberg/json_internal.h"
2828
#include "iceberg/schema.h"
2929
#include "iceberg/schema_field.h"
30+
#include "iceberg/test/matchers.h"
3031
#include "iceberg/type.h"
3132

3233
namespace iceberg {
@@ -133,4 +134,52 @@ TEST(SchemaJsonTest, RoundTrip) {
133134
ASSERT_EQ(dumped_json, json);
134135
}
135136

137+
TEST(SchemaJsonTest, IdentifierFieldIds) {
138+
// Test schema with identifier-field-ids
139+
constexpr std::string_view json_with_identifier_str =
140+
R"({"fields":[{"id":1,"name":"id","required":true,"type":"long"},
141+
{"id":2,"name":"data","required":false,"type":"string"}],
142+
"identifier-field-ids":[1],
143+
"schema-id":1,
144+
"type":"struct"})";
145+
146+
auto json_with_identifiers = nlohmann::json::parse(json_with_identifier_str);
147+
ICEBERG_UNWRAP_OR_FAIL(auto schema_with_identifers,
148+
SchemaFromJson(json_with_identifiers));
149+
ASSERT_EQ(schema_with_identifers->fields().size(), 2);
150+
ASSERT_EQ(schema_with_identifers->schema_id(), 1);
151+
ASSERT_EQ(schema_with_identifers->IdentifierFieldIds().size(), 1);
152+
ASSERT_EQ(schema_with_identifers->IdentifierFieldIds()[0], 1);
153+
ASSERT_EQ(ToJson(*schema_with_identifers), json_with_identifiers);
154+
155+
// Test schema without identifier-field-ids
156+
constexpr std::string_view json_without_identifiers_str =
157+
R"({"fields":[{"id":1,"name":"id","required":true,"type":"int"},
158+
{"id":2,"name":"name","required":false,"type":"string"}],
159+
"schema-id":1,
160+
"type":"struct"})";
161+
162+
auto json_without_identifiers = nlohmann::json::parse(json_without_identifiers_str);
163+
ICEBERG_UNWRAP_OR_FAIL(auto schema_without_identifiers,
164+
SchemaFromJson(json_without_identifiers));
165+
ASSERT_TRUE(schema_without_identifiers->IdentifierFieldIds().empty());
166+
ASSERT_EQ(ToJson(*schema_without_identifiers), json_without_identifiers);
167+
168+
// Test schema with multiple identifier fields
169+
constexpr std::string_view json_multi_identifiers_str =
170+
R"({"fields":[{"id":1,"name":"user_id","required":true,"type":"long"},
171+
{"id":2,"name":"org_id","required":true,"type":"long"},
172+
{"id":3,"name":"data","required":false,"type":"string"}],
173+
"identifier-field-ids":[1,2],
174+
"schema-id":2,
175+
"type":"struct"})";
176+
auto json_multi_identifiers = nlohmann::json::parse(json_multi_identifiers_str);
177+
ICEBERG_UNWRAP_OR_FAIL(auto schema_multi_identifiers,
178+
SchemaFromJson(json_multi_identifiers));
179+
ASSERT_EQ(schema_multi_identifiers->IdentifierFieldIds().size(), 2);
180+
ASSERT_EQ(schema_multi_identifiers->IdentifierFieldIds()[0], 1);
181+
ASSERT_EQ(schema_multi_identifiers->IdentifierFieldIds()[1], 2);
182+
ASSERT_EQ(ToJson(*schema_multi_identifiers), json_multi_identifiers);
183+
}
184+
136185
} // namespace iceberg

0 commit comments

Comments
 (0)