Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,71 @@ public void testParseSchemaSuccessfullyWhenExtendedValidationEnabled() {
AvroSchemaParseUtils.parseSchemaFromJSON(SCHEMA_PASS_EXTENDED_VALIDATION, extendedSchemaValidityCheckEnabled);
}

// Legacy-style schema with an int default on a float field — fails STRICT (numeric tier check),
// accepted by LOOSE_NUMERICS, and rewritten by coerceNumericDefaultsToFieldType.
private static final String SCHEMA_NUMERIC_DEFAULT_MISMATCH =
"{\n" + " \"type\": \"record\",\n" + " \"name\": \"Scores\",\n" + " \"fields\": [\n"
+ " { \"name\": \"score\", \"type\": \"float\", \"default\": 0 }\n" + " ]\n" + "}";

// Union with default whose declared type matches the second branch, not the first — a STRICT
// failure that's outside the numeric-default tier. Confirms LOOSE_NUMERICS only opens the one door.
private static final String SCHEMA_UNION_DEFAULT_NOT_FIRST_BRANCH =
"{\n" + " \"type\": \"record\",\n" + " \"name\": \"BadUnionDefault\",\n" + " \"fields\": [\n"
+ " { \"name\": \"f\", \"type\": [\"int\", \"null\"], \"default\": null }\n" + " ]\n" + "}";

@Test
public void testLooseNumericAcceptsIntDefaultOnFloatField() {
// STRICT rejects this — the historical migration foot-gun.
Assert.assertThrows(
Exception.class,
() -> AvroSchemaParseUtils.parseSchemaFromJSONStrictValidation(SCHEMA_NUMERIC_DEFAULT_MISMATCH));

// LOOSE_NUMERICS accepts it.
Schema parsed = AvroSchemaParseUtils.parseSchemaFromJSONLooseNumericValidation(SCHEMA_NUMERIC_DEFAULT_MISMATCH);
Assert.assertNotNull(parsed);
}

@Test
public void testLooseNumericRejectsNonNumericStrictViolation() {
// The union-default-not-first-branch case is a STRICT failure that LOOSE_NUMERICS preserves —
// it's outside the numeric-default tier this preset relaxes.
Assert.assertThrows(
Exception.class,
() -> AvroSchemaParseUtils.parseSchemaFromJSONLooseNumericValidation(SCHEMA_UNION_DEFAULT_NOT_FIRST_BRANCH));
}

@Test
public void testCoerceNumericDefaultsRewritesIntDefaultOnFloatField() {
String coerced = AvroSchemaParseUtils.coerceNumericDefaultsToFieldType(SCHEMA_NUMERIC_DEFAULT_MISMATCH);
Assert.assertNotEquals(coerced, SCHEMA_NUMERIC_DEFAULT_MISMATCH, "Legacy schema must be rewritten");
// Output must be strict-parse-clean — that's the whole point.
Schema strictParsed = AvroSchemaParseUtils.parseSchemaFromJSONStrictValidation(coerced);
Assert.assertNotNull(strictParsed);
}

@Test
public void testCoerceNumericDefaultsIsNoOpForCleanSchema() {
String coerced = AvroSchemaParseUtils.coerceNumericDefaultsToFieldType(SCHEMA_PASS_EXTENDED_VALIDATION);
Assert.assertSame(coerced, SCHEMA_PASS_EXTENDED_VALIDATION, "Clean schema must be returned by identity");
}

@Test
public void testCoerceNumericDefaultsDoesNotFixNonNumericIssues() {
// The walker only fixes numeric default mismatches. Other STRICT violations must remain.
String coerced = AvroSchemaParseUtils.coerceNumericDefaultsToFieldType(SCHEMA_UNION_DEFAULT_NOT_FIRST_BRANCH);
Assert.assertThrows(Exception.class, () -> AvroSchemaParseUtils.parseSchemaFromJSONStrictValidation(coerced));
}

@Test
public void testCoerceNumericDefaultsHandlesAllNumericTiers() {
String schema = "{\"type\":\"record\",\"name\":\"AllTiers\",\"fields\":["
+ "{\"name\":\"f\",\"type\":\"float\",\"default\":0}," + "{\"name\":\"d\",\"type\":\"double\",\"default\":1},"
+ "{\"name\":\"l\",\"type\":\"long\",\"default\":2.0}," + "{\"name\":\"i\",\"type\":\"int\",\"default\":3.0}"
+ "]}";
String coerced = AvroSchemaParseUtils.coerceNumericDefaultsToFieldType(schema);
AvroSchemaParseUtils.parseSchemaFromJSONStrictValidation(coerced);
}

@Test
public void testParseSchemaStrWithDifferentConfigurations() {
String schemaStr = "{" + " \"doc\": \"Value in the store\"," + " \"fields\": [" + " {\n"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,18 @@
package com.linkedin.venice.schema;

import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.DoubleNode;
import com.fasterxml.jackson.databind.node.FloatNode;
import com.fasterxml.jackson.databind.node.IntNode;
import com.fasterxml.jackson.databind.node.LongNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.linkedin.avroutil1.compatibility.AvroCompatibilityHelper;
import com.linkedin.avroutil1.compatibility.SchemaParseConfiguration;
import com.linkedin.venice.exceptions.VeniceException;
import com.linkedin.venice.utils.ObjectMapperFactory;
import java.io.IOException;
import java.util.Iterator;
import org.apache.avro.Schema;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
Expand Down Expand Up @@ -38,7 +48,109 @@ public static Schema parseSchemaFromJSONStrictValidation(String jsonSchema) {
return AvroCompatibilityHelper.parse(jsonSchema, SchemaParseConfiguration.STRICT, null).getMainSchema();
}

/**
* Parses with {@link SchemaParseConfiguration#LOOSE_NUMERICS}, which differs from STRICT only by
* disabling {@code validateNumericDefaultValueTypes}. Numeric defaults whose JSON tier doesn't
* match the field's declared numeric type (e.g. {@code "default": 0} for a {@code float} field)
* are accepted at parse time. All other strict checks (name validation,
* default-actually-matches-type, dangling-content) still apply.
*
* Note: Avro retains the original {@code JsonNode} as the default — it does NOT coerce the in-memory
* default to the declared type. Use {@link #coerceNumericDefaultsToFieldType(String)} if you need a
* string form that subsequently passes STRICT.
*/
public static Schema parseSchemaFromJSONLooseNumericValidation(String jsonSchema) {
return AvroCompatibilityHelper.parse(jsonSchema, SchemaParseConfiguration.LOOSE_NUMERICS, null).getMainSchema();
}

public static Schema parseSchemaFromJSONLooseValidation(String jsonSchema) {
return AvroCompatibilityHelper.parse(jsonSchema, SchemaParseConfiguration.LOOSE, null).getMainSchema();
}

/**
* Walks the schema JSON and coerces numeric default values to the declared primitive numeric field
* type. For example {@code {"name":"score","type":"float","default":0}} becomes
* {@code {"name":"score","type":"float","default":0.0}} — making the schema strict-parse-clean
* without changing its semantic meaning (Avro fingerprint / parsing canonical form is unaffected
* since defaults are not part of the canonical form).
*
* Only acts on field specs whose {@code type} is a textual primitive numeric name and whose
* {@code default} is a numeric JSON value of a different tier. Does not touch unions, complex
* types, non-numeric defaults, or fields whose default already matches the declared type.
*
* Returns the input unchanged if no coercion was needed.
*
* Intended for use during store migration to rewrite legacy schemas registered before
* {@code validateNumericDefaultValueTypes} was enforced.
*/
public static String coerceNumericDefaultsToFieldType(String jsonSchema) {
try {
ObjectMapper mapper = ObjectMapperFactory.getInstance();
JsonNode root = mapper.readTree(jsonSchema);
if (coerceInPlace(root)) {
return mapper.writeValueAsString(root);
}
return jsonSchema;
} catch (IOException e) {
throw new VeniceException("Failed to parse JSON for numeric default coercion: " + jsonSchema, e);
}
}

private static boolean coerceInPlace(JsonNode node) {
boolean changed = false;
if (node.isArray()) {
for (JsonNode child: node) {
if (coerceInPlace(child)) {
changed = true;
}
}
return changed;
}
if (!node.isObject()) {
return false;
}
ObjectNode obj = (ObjectNode) node;
JsonNode typeNode = obj.get("type");
JsonNode defaultNode = obj.get("default");
if (typeNode != null && typeNode.isTextual() && defaultNode != null && defaultNode.isNumber()) {
JsonNode coerced = coerceNumber(defaultNode, typeNode.asText());
if (coerced != null) {
obj.set("default", coerced);
changed = true;
}
}
for (Iterator<String> it = obj.fieldNames(); it.hasNext();) {
if (coerceInPlace(obj.get(it.next()))) {
changed = true;
}
}
return changed;
}

private static JsonNode coerceNumber(JsonNode value, String declaredType) {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Jackson always parses a JSON float literal like 0.0 into a DoubleNode, never a FloatNode. so the value.isDouble() short-circuit on the "float" branch means a legacy schema written as {"type":"float","default":0.0} would slip through without being rewritten.

The existing tests cover IntNode-on-float (e.g. "default":0) but not DoubleNode-on-float, so I can't tell from the suite alone whether avroutil1's strict parser tolerates that combo or not. Could you please add a quick test for {"type":"float","default":0.0} running through coerceNumericDefaultsToFieldType followed by parseSchemaFromJSONStrictValidation?

switch (declaredType) {
case "float":
if (value.isFloat() || value.isDouble()) {
return null;
}
return new FloatNode(value.floatValue());
case "double":
if (value.isDouble()) {
return null;
}
return new DoubleNode(value.doubleValue());
case "int":
if (value.isInt()) {
return null;
}
return new IntNode(value.intValue());
case "long":
if (value.isLong()) {
return null;
}
return new LongNode(value.longValue());
default:
return null;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
package com.linkedin.venice.schema;

import com.linkedin.venice.exceptions.VeniceException;
import org.apache.avro.Schema;
import org.testng.Assert;
import org.testng.annotations.Test;


/**
* Branch-coverage tests for {@link AvroSchemaParseUtils}.
*
* Lives in {@code venice-client-common} (alongside the class under test) so coverage tooling on
* this module sees the exercised branches. There is a similar test file in {@code venice-push-job}
* that covers a few overlapping cases but doesn't count toward this module's diff coverage.
*/
public class TestAvroSchemaParseUtils {
// STRICT-clean record schema.
private static final String CLEAN_RECORD_SCHEMA =
"{\"type\":\"record\",\"name\":\"Clean\",\"fields\":[" + "{\"name\":\"v\",\"type\":\"string\"}]}";

// Legacy: int default on a float field — fails STRICT (numeric tier), accepted by LOOSE_NUMERICS.
private static final String LEGACY_INT_ON_FLOAT = "{\"type\":\"record\",\"name\":\"Scores\",\"fields\":["
+ "{\"name\":\"score\",\"type\":\"float\",\"default\":0}]}";

// Union default whose JSON tier matches the second branch, not the first — STRICT failure outside
// the numeric-default tier. LOOSE_NUMERICS must still reject this.
private static final String UNION_DEFAULT_NOT_FIRST_BRANCH = "{\"type\":\"record\",\"name\":\"BadUnion\",\"fields\":["
+ "{\"name\":\"f\",\"type\":[\"int\",\"null\"],\"default\":null}]}";

// Schema mixing all four numeric tiers with mismatched defaults — exercises every case in the
// coerceNumber switch.
private static final String ALL_NUMERIC_TIERS = "{\"type\":\"record\",\"name\":\"AllTiers\",\"fields\":["
+ "{\"name\":\"f\",\"type\":\"float\",\"default\":0}," + "{\"name\":\"d\",\"type\":\"double\",\"default\":1},"
+ "{\"name\":\"l\",\"type\":\"long\",\"default\":2.0}," + "{\"name\":\"i\",\"type\":\"int\",\"default\":3.0}]}";

// ----- parseSchemaFromJSONStrictValidation ----------------------------------------------------

@Test
public void strictAcceptsCleanSchema() {
Schema s = AvroSchemaParseUtils.parseSchemaFromJSONStrictValidation(CLEAN_RECORD_SCHEMA);
Assert.assertEquals(s.getType(), Schema.Type.RECORD);
}

@Test
public void strictRejectsLegacyIntOnFloat() {
Assert.assertThrows(
Exception.class,
() -> AvroSchemaParseUtils.parseSchemaFromJSONStrictValidation(LEGACY_INT_ON_FLOAT));
}

// ----- parseSchemaFromJSONLooseNumericValidation ----------------------------------------------

@Test
public void looseNumericAcceptsLegacyIntOnFloat() {
Schema s = AvroSchemaParseUtils.parseSchemaFromJSONLooseNumericValidation(LEGACY_INT_ON_FLOAT);
Assert.assertNotNull(s);
}

@Test
public void looseNumericStillRejectsNonNumericStrictViolations() {
// The union-default-not-first-branch case is outside the numeric-default tier this preset
// relaxes — must still throw.
Assert.assertThrows(
Exception.class,
() -> AvroSchemaParseUtils.parseSchemaFromJSONLooseNumericValidation(UNION_DEFAULT_NOT_FIRST_BRANCH));
}

// ----- parseSchemaFromJSONLooseValidation -----------------------------------------------------

@Test
public void looseAcceptsLegacyIntOnFloat() {
Schema s = AvroSchemaParseUtils.parseSchemaFromJSONLooseValidation(LEGACY_INT_ON_FLOAT);
Assert.assertNotNull(s);
}

// ----- parseSchemaFromJSON(str, extendedSchemaValidityCheckEnabled) ---------------------------

@Test
public void parseSchemaFromJSONStrictPassesThroughForCleanSchema() {
// Clean schema → strict parser succeeds, no fallback triggered.
Schema s = AvroSchemaParseUtils.parseSchemaFromJSON(CLEAN_RECORD_SCHEMA, true);
Assert.assertEquals(s.getType(), Schema.Type.RECORD);
}

@Test(expectedExceptions = VeniceException.class)
public void parseSchemaFromJSONThrowsWhenExtendedValidationOnAndStrictFails() {
AvroSchemaParseUtils.parseSchemaFromJSON(LEGACY_INT_ON_FLOAT, true);
}

@Test
public void parseSchemaFromJSONFallsBackToLooseWhenExtendedValidationOff() {
// Strict fails, but extendedSchemaValidityCheckEnabled=false so we fall back to LOOSE and
// return a parsed schema rather than throwing.
Schema s = AvroSchemaParseUtils.parseSchemaFromJSON(LEGACY_INT_ON_FLOAT, false);
Assert.assertNotNull(s);
}

// ----- coerceNumericDefaultsToFieldType -------------------------------------------------------

@Test
public void doubleDefaultOnFloatFieldRoundTripsCleanly() {
// Jackson parses any JSON decimal literal (e.g. {@code 0.0}) as a DoubleNode regardless of the
// declared field type. The {@code "float"} branch of {@code coerceNumber} short-circuits on
// {@code isDouble()} — i.e. it does NOT rewrite DoubleNode-on-float to FloatNode. This test
// pins down that empirically: avro-util1's STRICT parser accepts DoubleNode-on-float, so the
// walker's short-circuit produces a strict-clean output. If avro-util1 ever tightens the float
// numeric-tier check to be symmetric (rejecting double-typed defaults on float fields), this
// test will fail and the {@code "float"} branch will need to coerce DoubleNode -> FloatNode.
String doubleOnFloat = "{\"type\":\"record\",\"name\":\"Scores\",\"fields\":["
+ "{\"name\":\"score\",\"type\":\"float\",\"default\":0.0}]}";
String coerced = AvroSchemaParseUtils.coerceNumericDefaultsToFieldType(doubleOnFloat);
AvroSchemaParseUtils.parseSchemaFromJSONStrictValidation(coerced);
}

@Test
public void coerceRewritesIntDefaultOnFloatField() {
String coerced = AvroSchemaParseUtils.coerceNumericDefaultsToFieldType(LEGACY_INT_ON_FLOAT);
Assert.assertNotEquals(coerced, LEGACY_INT_ON_FLOAT, "Walker must rewrite the legacy default");
// Round-trip: the rewritten output must pass STRICT.
AvroSchemaParseUtils.parseSchemaFromJSONStrictValidation(coerced);
}

@Test
public void coerceHandlesAllFourNumericTiers() {
// Walks each branch of coerceNumber's switch (float / double / int / long with mismatched JSON tier).
String coerced = AvroSchemaParseUtils.coerceNumericDefaultsToFieldType(ALL_NUMERIC_TIERS);
AvroSchemaParseUtils.parseSchemaFromJSONStrictValidation(coerced);
}

@Test
public void coerceReturnsInputUnchangedWhenCleanSchema() {
String coerced = AvroSchemaParseUtils.coerceNumericDefaultsToFieldType(CLEAN_RECORD_SCHEMA);
Assert.assertEquals(coerced, CLEAN_RECORD_SCHEMA, "Clean schema must short-circuit the walker");
}

@Test
public void coerceLeavesNonNumericStrictViolationsAlone() {
// The walker is a numeric-default fixer only. A non-numeric STRICT violation must still trip
// strict parse on the walker's output.
String coerced = AvroSchemaParseUtils.coerceNumericDefaultsToFieldType(UNION_DEFAULT_NOT_FIRST_BRANCH);
Assert.assertThrows(Exception.class, () -> AvroSchemaParseUtils.parseSchemaFromJSONStrictValidation(coerced));
}

@Test
public void coerceRecursesIntoNestedRecord() {
// Nested record with a legacy numeric default inside — exercises the recursion path through
// a non-array, non-primitive {@code type} value.
String nested = "{\"type\":\"record\",\"name\":\"Outer\",\"fields\":["
+ "{\"name\":\"inner\",\"type\":{\"type\":\"record\",\"name\":\"Inner\",\"fields\":["
+ "{\"name\":\"score\",\"type\":\"float\",\"default\":0}]}}]}";
String coerced = AvroSchemaParseUtils.coerceNumericDefaultsToFieldType(nested);
Assert.assertNotEquals(coerced, nested);
AvroSchemaParseUtils.parseSchemaFromJSONStrictValidation(coerced);
}

@Test
public void coerceLeavesNonNumericDefaultsUnchanged() {
// String-typed field with a string default — not a coercion target.
String coerced = AvroSchemaParseUtils.coerceNumericDefaultsToFieldType(CLEAN_RECORD_SCHEMA);
Assert.assertEquals(coerced, CLEAN_RECORD_SCHEMA);
}

@Test
public void coerceLeavesNonTextualTypeUnchanged() {
// When the {@code type} is itself an object (e.g. a nested record definition) rather than a
// textual primitive name, the field-spec check skips and only recursion fires.
String schema = "{\"type\":\"record\",\"name\":\"Outer\",\"fields\":["
+ "{\"name\":\"items\",\"type\":{\"type\":\"array\",\"items\":\"string\"}}]}";
String coerced = AvroSchemaParseUtils.coerceNumericDefaultsToFieldType(schema);
Assert.assertEquals(coerced, schema);
}

@Test(expectedExceptions = VeniceException.class)
public void coerceWrapsInvalidJsonAsVeniceException() {
AvroSchemaParseUtils.coerceNumericDefaultsToFieldType("{not valid json");
}
}
Loading