From a56ef4a19e7d333a4c2c8434214b9ca0b8d866b8 Mon Sep 17 00:00:00 2001 From: Nidhi Nandwani Date: Thu, 8 Jan 2026 11:40:22 +0000 Subject: [PATCH 1/6] fix(storage): add extra validation for type in object_metadata_parser --- .../cloud/storage/internal/metadata_parser.cc | 11 +++++ .../cloud/storage/internal/metadata_parser.h | 10 ++++ .../storage/internal/metadata_parser_test.cc | 46 +++++++++++++++++++ .../internal/object_metadata_parser.cc | 39 ++++++++-------- 4 files changed, 88 insertions(+), 18 deletions(-) diff --git a/google/cloud/storage/internal/metadata_parser.cc b/google/cloud/storage/internal/metadata_parser.cc index 1c8a7607b7ba5..0b85560624206 100644 --- a/google/cloud/storage/internal/metadata_parser.cc +++ b/google/cloud/storage/internal/metadata_parser.cc @@ -121,6 +121,17 @@ StatusOr ParseTimestampField( return google::cloud::internal::InvalidArgumentError(std::move(os).str(), GCP_ERROR_INFO()); } +StatusOr ParseStringField(nlohmann::json const& json, + char const* field_name) { + if (json.count(field_name) == 0) return std::string{}; + auto const& f = json[field_name]; + if (f.is_string()) return f.get(); + std::ostringstream os; + os << "Error parsing field <" << field_name + << "> as a string, json=" << json; + return google::cloud::internal::InvalidArgumentError(std::move(os).str(), + GCP_ERROR_INFO()); +} Status NotJsonObject(nlohmann::json const& j, google::cloud::internal::ErrorInfoBuilder eib) { diff --git a/google/cloud/storage/internal/metadata_parser.h b/google/cloud/storage/internal/metadata_parser.h index 9b27181387635..081919da48161 100644 --- a/google/cloud/storage/internal/metadata_parser.h +++ b/google/cloud/storage/internal/metadata_parser.h @@ -80,6 +80,16 @@ StatusOr ParseUnsignedLongField(nlohmann::json const& json, StatusOr ParseTimestampField( nlohmann::json const& json, char const* field_name); +/** + * Parses a string field. + * + * @return the value of @p field_name in @p json, or an empty string if the + * field is not present. + */ +StatusOr ParseStringField(nlohmann::json const& json, + char const* field_name); + + /// Return an error indicating that `j` is not a JSON object (it may be a /// string, or other valid JSON) Status NotJsonObject(nlohmann::json const& j, diff --git a/google/cloud/storage/internal/metadata_parser_test.cc b/google/cloud/storage/internal/metadata_parser_test.cc index a8845463e533a..dbd603b2ca04a 100644 --- a/google/cloud/storage/internal/metadata_parser_test.cc +++ b/google/cloud/storage/internal/metadata_parser_test.cc @@ -235,6 +235,52 @@ TEST(MetadataParserTest, ParseIntegralFieldInvalidFieldType) { CheckParseInvalidFieldType(&ParseUnsignedLongField); } +/// @test Verify that we parse string values in JSON objects. +TEST(MetadataParserTest, ParseStringField) { + std::string text = R"""({ + "field1": "value1", + "field2": "" + })"""; + auto json_object = nlohmann::json::parse(text); + EXPECT_EQ("value1", ParseStringField(json_object, "field1").value()); + EXPECT_EQ("", ParseStringField(json_object, "field2").value()); +} + +/// @test Verify that we parse missing string values in JSON objects. +TEST(MetadataParserTest, ParseMissingStringField) { + std::string text = R"""({ + "some-field": "some-value" + })"""; + auto json_object = nlohmann::json::parse(text); + auto actual = ParseStringField(json_object, "missing-field").value(); + EXPECT_EQ("", actual); +} + +/// @test Verify that we return an error for invalid string field types. +TEST(MetadataParserTest, ParseInvalidStringFieldType) { + std::string text = R"""({ + "field": 12345 + })"""; + auto json_object = nlohmann::json::parse(text); + + auto actual = ParseStringField(json_object, "field"); + EXPECT_THAT(actual, StatusIs(StatusCode::kInvalidArgument)); + EXPECT_THAT(actual.status().message(), + ::testing::HasSubstr("Error parsing field as a string")); +} + +/// @test Verify that we return an error for complex types where a string is expected. +TEST(MetadataParserTest, ParseInvalidStringFieldTypeComplex) { + std::string text = R"""({ + "field": [1, 2, 3] + })"""; + auto json_object = nlohmann::json::parse(text); + + EXPECT_THAT(ParseStringField(json_object, "field"), + StatusIs(StatusCode::kInvalidArgument)); +} + + TEST(MetadataParserTest, NotJsonObject) { EXPECT_THAT(NotJsonObject(nlohmann::json{}, GCP_ERROR_INFO()), StatusIs(StatusCode::kInvalidArgument)); diff --git a/google/cloud/storage/internal/object_metadata_parser.cc b/google/cloud/storage/internal/object_metadata_parser.cc index 0e97c194e8fb8..71c30b2e00d36 100644 --- a/google/cloud/storage/internal/object_metadata_parser.cc +++ b/google/cloud/storage/internal/object_metadata_parser.cc @@ -109,6 +109,10 @@ Status ParseMetadata(ObjectMetadata& meta, nlohmann::json const& json) { if (f == json.end()) return Status{}; std::map metadata; for (auto const& kv : f->items()) { + if (!kv.value().is_string()) { + return google::cloud::internal::InvalidArgumentError( + "Metadata value is not a string", GCP_ERROR_INFO()); + } metadata.emplace(kv.key(), kv.value().get()); } meta.mutable_metadata() = std::move(metadata); @@ -204,6 +208,14 @@ Status ParseHardDeleteTime(ObjectMetadata& meta, nlohmann::json const& json) { return Status{}; } +Status SetStringField(ObjectMetadata& meta, nlohmann::json const& json, + char const* key, + ObjectMetadata& (ObjectMetadata::*setter)(std::string)) { + auto v = internal::ParseStringField(json, key); + if (!v) return std::move(v).status(); + (meta.*setter)(*std::move(v)); + return Status{}; +} } // namespace StatusOr ObjectMetadataParser::FromJson( @@ -214,8 +226,7 @@ StatusOr ObjectMetadataParser::FromJson( Parser parsers[] = { ParseAcl, [](ObjectMetadata& meta, nlohmann::json const& json) { - meta.set_bucket(json.value("bucket", "")); - return Status{}; + return SetStringField(meta, json, "bucket", &ObjectMetadata::set_bucket); }, [](ObjectMetadata& meta, nlohmann::json const& json) { meta.set_cache_control(json.value("cacheControl", "")); @@ -251,41 +262,33 @@ StatusOr ObjectMetadataParser::FromJson( ParseEventBasedHold, ParseGeneration, [](ObjectMetadata& meta, nlohmann::json const& json) { - meta.set_id(json.value("id", "")); - return Status{}; + return SetStringField(meta, json, "id", &ObjectMetadata::set_id); }, [](ObjectMetadata& meta, nlohmann::json const& json) { - meta.set_kind(json.value("kind", "")); - return Status{}; + return SetStringField(meta, json, "kind", &ObjectMetadata::set_kind); }, [](ObjectMetadata& meta, nlohmann::json const& json) { - meta.set_kms_key_name(json.value("kmsKeyName", "")); - return Status{}; + return SetStringField(meta, json, "kmsKeyName", &ObjectMetadata::set_kms_key_name); }, ParseMetageneration, [](ObjectMetadata& meta, nlohmann::json const& json) { - meta.set_md5_hash(json.value("md5Hash", "")); - return Status{}; + return SetStringField(meta, json, "md5Hash", &ObjectMetadata::set_md5_hash); }, [](ObjectMetadata& meta, nlohmann::json const& json) { - meta.set_media_link(json.value("mediaLink", "")); - return Status{}; + return SetStringField(meta, json, "mediaLink", &ObjectMetadata::set_media_link); }, ParseMetadata, [](ObjectMetadata& meta, nlohmann::json const& json) { - meta.set_name(json.value("name", "")); - return Status{}; + return SetStringField(meta, json, "name", &ObjectMetadata::set_name); }, ParseOwner, ParseRetentionExpirationTime, ParseRetention, [](ObjectMetadata& meta, nlohmann::json const& json) { - meta.set_self_link(json.value("selfLink", "")); - return Status{}; + return SetStringField(meta, json, "selfLink", &ObjectMetadata::set_self_link); }, [](ObjectMetadata& meta, nlohmann::json const& json) { - meta.set_storage_class(json.value("storageClass", "")); - return Status{}; + return SetStringField(meta, json, "storageClass", &ObjectMetadata::set_storage_class); }, ParseSize, ParseTemporaryHold, From 8da5af74dfa4ebbaf040adf7019586f412f4ddd9 Mon Sep 17 00:00:00 2001 From: Nidhi Nandwani Date: Thu, 8 Jan 2026 12:53:49 +0000 Subject: [PATCH 2/6] add type validation to remaining fields --- .../cloud/storage/internal/metadata_parser.cc | 3 +- .../cloud/storage/internal/metadata_parser.h | 1 - .../storage/internal/metadata_parser_test.cc | 4 +- .../internal/object_metadata_parser.cc | 47 ++++++++++--------- 4 files changed, 29 insertions(+), 26 deletions(-) diff --git a/google/cloud/storage/internal/metadata_parser.cc b/google/cloud/storage/internal/metadata_parser.cc index 0b85560624206..f9ed9e4589a27 100644 --- a/google/cloud/storage/internal/metadata_parser.cc +++ b/google/cloud/storage/internal/metadata_parser.cc @@ -127,8 +127,7 @@ StatusOr ParseStringField(nlohmann::json const& json, auto const& f = json[field_name]; if (f.is_string()) return f.get(); std::ostringstream os; - os << "Error parsing field <" << field_name - << "> as a string, json=" << json; + os << "Error parsing field <" << field_name << "> as a string, json=" << json; return google::cloud::internal::InvalidArgumentError(std::move(os).str(), GCP_ERROR_INFO()); } diff --git a/google/cloud/storage/internal/metadata_parser.h b/google/cloud/storage/internal/metadata_parser.h index 081919da48161..7c00a847c39e2 100644 --- a/google/cloud/storage/internal/metadata_parser.h +++ b/google/cloud/storage/internal/metadata_parser.h @@ -89,7 +89,6 @@ StatusOr ParseTimestampField( StatusOr ParseStringField(nlohmann::json const& json, char const* field_name); - /// Return an error indicating that `j` is not a JSON object (it may be a /// string, or other valid JSON) Status NotJsonObject(nlohmann::json const& j, diff --git a/google/cloud/storage/internal/metadata_parser_test.cc b/google/cloud/storage/internal/metadata_parser_test.cc index dbd603b2ca04a..0c85d6e89ec54 100644 --- a/google/cloud/storage/internal/metadata_parser_test.cc +++ b/google/cloud/storage/internal/metadata_parser_test.cc @@ -269,7 +269,8 @@ TEST(MetadataParserTest, ParseInvalidStringFieldType) { ::testing::HasSubstr("Error parsing field as a string")); } -/// @test Verify that we return an error for complex types where a string is expected. +/// @test Verify that we return an error for complex types where a string is +/// expected. TEST(MetadataParserTest, ParseInvalidStringFieldTypeComplex) { std::string text = R"""({ "field": [1, 2, 3] @@ -280,7 +281,6 @@ TEST(MetadataParserTest, ParseInvalidStringFieldTypeComplex) { StatusIs(StatusCode::kInvalidArgument)); } - TEST(MetadataParserTest, NotJsonObject) { EXPECT_THAT(NotJsonObject(nlohmann::json{}, GCP_ERROR_INFO()), StatusIs(StatusCode::kInvalidArgument)); diff --git a/google/cloud/storage/internal/object_metadata_parser.cc b/google/cloud/storage/internal/object_metadata_parser.cc index 71c30b2e00d36..b088668aa70d4 100644 --- a/google/cloud/storage/internal/object_metadata_parser.cc +++ b/google/cloud/storage/internal/object_metadata_parser.cc @@ -111,7 +111,7 @@ Status ParseMetadata(ObjectMetadata& meta, nlohmann::json const& json) { for (auto const& kv : f->items()) { if (!kv.value().is_string()) { return google::cloud::internal::InvalidArgumentError( - "Metadata value is not a string", GCP_ERROR_INFO()); + "Metadata value for key <") + kv.key() + "> is not a string", GCP_ERROR_INFO()); } metadata.emplace(kv.key(), kv.value().get()); } @@ -226,38 +226,38 @@ StatusOr ObjectMetadataParser::FromJson( Parser parsers[] = { ParseAcl, [](ObjectMetadata& meta, nlohmann::json const& json) { - return SetStringField(meta, json, "bucket", &ObjectMetadata::set_bucket); + return SetStringField(meta, json, "bucket", + &ObjectMetadata::set_bucket); }, [](ObjectMetadata& meta, nlohmann::json const& json) { - meta.set_cache_control(json.value("cacheControl", "")); - return Status{}; + return SetStringField(meta, json, "cacheControl", + &ObjectMetadata::set_cache_control); }, ParseComponentCount, [](ObjectMetadata& meta, nlohmann::json const& json) { - meta.set_content_disposition(json.value("contentDisposition", "")); - return Status{}; + return SetStringField(meta, json, "contentDisposition", + &ObjectMetadata::set_content_disposition); }, [](ObjectMetadata& meta, nlohmann::json const& json) { - meta.set_content_encoding(json.value("contentEncoding", "")); - return Status{}; + return SetStringField(meta, json, "contentEncoding", + &ObjectMetadata::content_encoding); }, [](ObjectMetadata& meta, nlohmann::json const& json) { - meta.set_content_language(json.value("contentLanguage", "")); - return Status{}; + return SetStringField(meta, json, "contentLanguage", + &ObjectMetadata::set_content_language); }, [](ObjectMetadata& meta, nlohmann::json const& json) { - meta.set_content_type(json.value("contentType", "")); - return Status{}; + return SetStringField(meta, json, "contentType", + &ObjectMetadata::set_content_type); }, [](ObjectMetadata& meta, nlohmann::json const& json) { - meta.set_crc32c(json.value("crc32c", "")); - return Status{}; + return SetStringField(meta, json, "crc32c", + &ObjectMetadata::set_crc32c); }, ParseCustomTime, ParseCustomerEncryption, [](ObjectMetadata& meta, nlohmann::json const& json) { - meta.set_etag(json.value("etag", "")); - return Status{}; + return SetStringField(meta, json, "eTag", &ObjectMetadata::set_etag); }, ParseEventBasedHold, ParseGeneration, @@ -268,14 +268,17 @@ StatusOr ObjectMetadataParser::FromJson( return SetStringField(meta, json, "kind", &ObjectMetadata::set_kind); }, [](ObjectMetadata& meta, nlohmann::json const& json) { - return SetStringField(meta, json, "kmsKeyName", &ObjectMetadata::set_kms_key_name); + return SetStringField(meta, json, "kmsKeyName", + &ObjectMetadata::set_kms_key_name); }, ParseMetageneration, [](ObjectMetadata& meta, nlohmann::json const& json) { - return SetStringField(meta, json, "md5Hash", &ObjectMetadata::set_md5_hash); + return SetStringField(meta, json, "md5Hash", + &ObjectMetadata::set_md5_hash); }, [](ObjectMetadata& meta, nlohmann::json const& json) { - return SetStringField(meta, json, "mediaLink", &ObjectMetadata::set_media_link); + return SetStringField(meta, json, "mediaLink", + &ObjectMetadata::set_media_link); }, ParseMetadata, [](ObjectMetadata& meta, nlohmann::json const& json) { @@ -285,10 +288,12 @@ StatusOr ObjectMetadataParser::FromJson( ParseRetentionExpirationTime, ParseRetention, [](ObjectMetadata& meta, nlohmann::json const& json) { - return SetStringField(meta, json, "selfLink", &ObjectMetadata::set_self_link); + return SetStringField(meta, json, "selfLink", + &ObjectMetadata::set_self_link); }, [](ObjectMetadata& meta, nlohmann::json const& json) { - return SetStringField(meta, json, "storageClass", &ObjectMetadata::set_storage_class); + return SetStringField(meta, json, "storageClass", + &ObjectMetadata::set_storage_class); }, ParseSize, ParseTemporaryHold, From 87f60b0e25c1dc24923e59c486782f7baf8c7bed Mon Sep 17 00:00:00 2001 From: Nidhi Nandwani Date: Thu, 8 Jan 2026 13:04:55 +0000 Subject: [PATCH 3/6] fix build errors --- google/cloud/storage/internal/object_metadata_parser.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/google/cloud/storage/internal/object_metadata_parser.cc b/google/cloud/storage/internal/object_metadata_parser.cc index b088668aa70d4..9c9e9711fa7c7 100644 --- a/google/cloud/storage/internal/object_metadata_parser.cc +++ b/google/cloud/storage/internal/object_metadata_parser.cc @@ -111,7 +111,8 @@ Status ParseMetadata(ObjectMetadata& meta, nlohmann::json const& json) { for (auto const& kv : f->items()) { if (!kv.value().is_string()) { return google::cloud::internal::InvalidArgumentError( - "Metadata value for key <") + kv.key() + "> is not a string", GCP_ERROR_INFO()); + "Metadata value for key <" + kv.key() + "> is not a string", + GCP_ERROR_INFO()); } metadata.emplace(kv.key(), kv.value().get()); } @@ -240,7 +241,7 @@ StatusOr ObjectMetadataParser::FromJson( }, [](ObjectMetadata& meta, nlohmann::json const& json) { return SetStringField(meta, json, "contentEncoding", - &ObjectMetadata::content_encoding); + &ObjectMetadata::set_content_encoding); }, [](ObjectMetadata& meta, nlohmann::json const& json) { return SetStringField(meta, json, "contentLanguage", From d083f315d0419263772440ee08b0bfa2bc992330 Mon Sep 17 00:00:00 2001 From: Nidhi Nandwani Date: Thu, 8 Jan 2026 17:16:25 +0000 Subject: [PATCH 4/6] fix format and build errors --- google/cloud/storage/internal/object_metadata_parser.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/storage/internal/object_metadata_parser.cc b/google/cloud/storage/internal/object_metadata_parser.cc index 9c9e9711fa7c7..8642fda27e2bc 100644 --- a/google/cloud/storage/internal/object_metadata_parser.cc +++ b/google/cloud/storage/internal/object_metadata_parser.cc @@ -258,7 +258,7 @@ StatusOr ObjectMetadataParser::FromJson( ParseCustomTime, ParseCustomerEncryption, [](ObjectMetadata& meta, nlohmann::json const& json) { - return SetStringField(meta, json, "eTag", &ObjectMetadata::set_etag); + return SetStringField(meta, json, "etag", &ObjectMetadata::set_etag); }, ParseEventBasedHold, ParseGeneration, From 9e4920ae411680d61733624f8cfe33da65864128 Mon Sep 17 00:00:00 2001 From: Nidhi Nandwani Date: Fri, 9 Jan 2026 06:53:09 +0000 Subject: [PATCH 5/6] handle null values --- google/cloud/storage/internal/metadata_parser.cc | 1 + .../storage/internal/object_metadata_parser.cc | 15 +++++++++++---- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/google/cloud/storage/internal/metadata_parser.cc b/google/cloud/storage/internal/metadata_parser.cc index f9ed9e4589a27..4cc98709a1ce6 100644 --- a/google/cloud/storage/internal/metadata_parser.cc +++ b/google/cloud/storage/internal/metadata_parser.cc @@ -126,6 +126,7 @@ StatusOr ParseStringField(nlohmann::json const& json, if (json.count(field_name) == 0) return std::string{}; auto const& f = json[field_name]; if (f.is_string()) return f.get(); + if (f.is_null()) return std::string{}; std::ostringstream os; os << "Error parsing field <" << field_name << "> as a string, json=" << json; return google::cloud::internal::InvalidArgumentError(std::move(os).str(), diff --git a/google/cloud/storage/internal/object_metadata_parser.cc b/google/cloud/storage/internal/object_metadata_parser.cc index 8642fda27e2bc..3f6f24b1813f5 100644 --- a/google/cloud/storage/internal/object_metadata_parser.cc +++ b/google/cloud/storage/internal/object_metadata_parser.cc @@ -110,11 +110,18 @@ Status ParseMetadata(ObjectMetadata& meta, nlohmann::json const& json) { std::map metadata; for (auto const& kv : f->items()) { if (!kv.value().is_string()) { - return google::cloud::internal::InvalidArgumentError( - "Metadata value for key <" + kv.key() + "> is not a string", - GCP_ERROR_INFO()); + if (kv.value().is_null()) { + metadata.emplace(kv.key(), std::string{}); + } else { + std::ostringstream os; + os << "Error parsing field as a string, json=" << kv.value(); + return google::cloud::internal::InvalidArgumentError( + std::move(os).str(), GCP_ERROR_INFO()); + } + } else { + metadata.emplace(kv.key(), kv.value().get()); } - metadata.emplace(kv.key(), kv.value().get()); } meta.mutable_metadata() = std::move(metadata); return Status{}; From 8bf08880342f00b849734bca2fbc3b91d75e2f45 Mon Sep 17 00:00:00 2001 From: Nidhi Nandwani Date: Tue, 13 Jan 2026 05:14:53 +0000 Subject: [PATCH 6/6] review fixes --- google/cloud/storage/internal/metadata_parser.cc | 8 ++++---- google/cloud/storage/internal/object_metadata_parser.cc | 1 + 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/google/cloud/storage/internal/metadata_parser.cc b/google/cloud/storage/internal/metadata_parser.cc index 4cc98709a1ce6..d5852c47e08c6 100644 --- a/google/cloud/storage/internal/metadata_parser.cc +++ b/google/cloud/storage/internal/metadata_parser.cc @@ -123,10 +123,10 @@ StatusOr ParseTimestampField( } StatusOr ParseStringField(nlohmann::json const& json, char const* field_name) { - if (json.count(field_name) == 0) return std::string{}; - auto const& f = json[field_name]; - if (f.is_string()) return f.get(); - if (f.is_null()) return std::string{}; + auto it = json.find(field_name); + if (it == json.end() || it->is_null()) return std::string{}; + if (it->is_string()) return it->get(); + std::ostringstream os; os << "Error parsing field <" << field_name << "> as a string, json=" << json; return google::cloud::internal::InvalidArgumentError(std::move(os).str(), diff --git a/google/cloud/storage/internal/object_metadata_parser.cc b/google/cloud/storage/internal/object_metadata_parser.cc index 3f6f24b1813f5..4f5e7efa20795 100644 --- a/google/cloud/storage/internal/object_metadata_parser.cc +++ b/google/cloud/storage/internal/object_metadata_parser.cc @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include