diff --git a/CHANGELOG.md b/CHANGELOG.md index 5afffdfc10..a58097eaed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -45,6 +45,7 @@ * [ENHANCEMENT] Ring: Cache `ShuffleShardWithLookback` subrings. The cached entry is invalidated on topology change or once `now` reaches the earliest `RegisteredTimestamp + lookbackPeriod` of any included instance. #7628 * [ENHANCEMENT] Query Frontend: Rename `time_taken` field to `time_taken_ms` and make it return millisecond count. #7649 * [ENHANCEMENT] Update prometheus alertmanager version to v0.33.0. #7647 +* [ENHANCEMENT] Distributor: Include the offending series labels in the "sample missing metric name" and "no metric name label" push errors so operators can identify which series is missing a metric name. #7657 * [BUGFIX] Querier: Fix queryWithRetry and labelsWithRetry returning (nil, nil) on cancelled context by propagating ctx.Err(). #7370 * [BUGFIX] Metrics Helper: Fix non-deterministic bucket order in merged histograms by sorting buckets after map iteration, matching Prometheus client library behavior. #7380 * [BUGFIX] Distributor: Return HTTP 401 Unauthorized when tenant ID resolution fails in the Prometheus Remote Write 2.0 path. #7389 diff --git a/pkg/distributor/distributor.go b/pkg/distributor/distributor.go index edf9b750b8..6fd382ffb6 100644 --- a/pkg/distributor/distributor.go +++ b/pkg/distributor/distributor.go @@ -1208,7 +1208,7 @@ func (d *Distributor) prepareSeriesKeys(ctx context.Context, req *cortexpb.Write // label and dropped labels (if any) key, err := d.tokenForLabels(userID, ts.Labels) if err != nil { - return nil, nil, nil, nil, 0, 0, 0, 0, nil, err + return nil, nil, nil, nil, 0, 0, 0, 0, nil, errors.Wrapf(err, "failed to compute sharding token for series %s", cortexpb.FromLabelAdaptersToMetric(ts.Labels).String()) } validatedSeries, validationErr := d.validateSeries(*ts, userID, skipLabelNameValidation, limits) diff --git a/pkg/distributor/distributor_test.go b/pkg/distributor/distributor_test.go index 2cb0b1522f..d0ac4f3059 100644 --- a/pkg/distributor/distributor_test.go +++ b/pkg/distributor/distributor_test.go @@ -2186,7 +2186,36 @@ func TestDistributor_Push_LabelRemoval_RemovingNameLabelWillError(t *testing.T) req := mockWriteRequest([]labels.Labels{tc.inputSeries}, 1, 1, false) _, err = ds[0].Push(ctx, req) require.Error(t, err) - assert.Equal(t, "rpc error: code = Code(400) desc = sample missing metric name", err.Error()) + assert.Equal(t, `rpc error: code = Code(400) desc = sample missing metric name metric: "{cluster=\"one\"}"`, err.Error()) +} + +// TestDistributor_Push_NoMetricNameShardingErrorReportsSeries covers the path that +// the original bug report hit (issue #5802): with metric name enforcement disabled +// and shard_by_all_labels=false, a series without a metric name fails while computing +// its sharding token. The resulting error must name the offending series so operators +// can identify it instead of only seeing "no metric name label". +func TestDistributor_Push_NoMetricNameShardingErrorReportsSeries(t *testing.T) { + t.Parallel() + ctx := user.InjectOrgID(context.Background(), "user") + + var limits validation.Limits + flagext.DefaultValues(&limits) + limits.EnforceMetricName = false + + ds, _, _, _ := prepare(t, prepConfig{ + numIngesters: 2, + happyIngesters: 2, + numDistributors: 1, + shardByAllLabels: false, + limits: &limits, + }) + + inputSeries := labels.FromStrings("foo", "bar", "team", "a") + req := mockWriteRequest([]labels.Labels{inputSeries}, 1, 1, false) + _, err := ds[0].Push(ctx, req) + require.Error(t, err) + assert.Contains(t, err.Error(), "no metric name label") + assert.Contains(t, err.Error(), `{foo="bar", team="a"}`) } func TestDistributor_Push_ShouldGuaranteeShardingTokenConsistencyOverTheTime(t *testing.T) { diff --git a/pkg/util/validation/errors.go b/pkg/util/validation/errors.go index 2efc077655..af47cc6a4e 100644 --- a/pkg/util/validation/errors.go +++ b/pkg/util/validation/errors.go @@ -134,14 +134,18 @@ func (e *tooManyLabelsError) Error() string { len(e.series), e.limit, cortexpb.FromLabelAdaptersToMetric(e.series).String()) } -type noMetricNameError struct{} +type noMetricNameError struct { + series []cortexpb.LabelAdapter +} -func newNoMetricNameError() ValidationError { - return &noMetricNameError{} +func newNoMetricNameError(series []cortexpb.LabelAdapter) ValidationError { + return &noMetricNameError{ + series: series, + } } func (e *noMetricNameError) Error() string { - return "sample missing metric name" + return fmt.Sprintf("sample missing metric name metric: %.200q", formatLabelSet(e.series)) } type invalidMetricNameError struct { diff --git a/pkg/util/validation/validate.go b/pkg/util/validation/validate.go index a2f7bc395d..73b89d2499 100644 --- a/pkg/util/validation/validate.go +++ b/pkg/util/validation/validate.go @@ -285,7 +285,7 @@ func ValidateMetricName(limits *Limits, ls []cortexpb.LabelAdapter, nameValidati } unsafeMetricName, err := extract.UnsafeMetricNameFromLabelAdapters(ls) if err != nil { - return newNoMetricNameError(), missingMetricName + return newNoMetricNameError(ls), missingMetricName } if !nameValidationScheme.IsValidMetricName(unsafeMetricName) { return newInvalidMetricNameError(unsafeMetricName), invalidMetricName diff --git a/pkg/util/validation/validate_test.go b/pkg/util/validation/validate_test.go index 6832ee61f0..9aa1657bee 100644 --- a/pkg/util/validation/validate_test.go +++ b/pkg/util/validation/validate_test.go @@ -81,7 +81,8 @@ func TestValidateMetricName(t *testing.T) { expectErr error reason string }{ - {map[model.LabelName]model.LabelValue{}, newNoMetricNameError(), missingMetricName}, + {map[model.LabelName]model.LabelValue{}, newNoMetricNameError(cortexpb.FromMetricsToLabelAdapters(model.Metric{})), missingMetricName}, + {map[model.LabelName]model.LabelValue{"foo": "bar"}, newNoMetricNameError(cortexpb.FromMetricsToLabelAdapters(model.Metric{"foo": "bar"})), missingMetricName}, {map[model.LabelName]model.LabelValue{model.MetricNameLabel: ""}, newInvalidMetricNameError(""), invalidMetricName}, {map[model.LabelName]model.LabelValue{model.MetricNameLabel: " "}, newInvalidMetricNameError(" "), invalidMetricName}, {map[model.LabelName]model.LabelValue{model.MetricNameLabel: "test.\xc5.metric"}, newInvalidMetricNameError("test.\xc5.metric"), invalidMetricName}, @@ -102,6 +103,17 @@ func TestValidateMetricName(t *testing.T) { assert.Empty(t, reason) } +func TestNoMetricNameError_ReportsOffendingSeries(t *testing.T) { + cfg := new(Limits) + cfg.EnforceMetricName = true + + err, reason := ValidateMetricName(cfg, cortexpb.FromMetricsToLabelAdapters(model.Metric{"foo": "bar"}), model.LegacyValidation) + require.Error(t, err) + assert.Equal(t, missingMetricName, reason) + // The error must surface the offending series so operators can identify it (see issue #5802). + assert.Equal(t, `sample missing metric name metric: "{foo=\"bar\"}"`, err.Error()) +} + func TestValidateLabels(t *testing.T) { cfg := new(Limits) userID := "testUser"