Skip to content

Commit fee23ec

Browse files
committed
Fix Hodges-Lehmann distribution ratio calculation
1 parent 0c74c21 commit fee23ec

9 files changed

Lines changed: 1339 additions & 15 deletions

File tree

CHANGELOG.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,16 @@ Dropping a requirement of a major version of a dependency is a new contract.
2626
## [Unreleased]
2727
[Unreleased]: https://github.com/atlassian/report/compare/release-4.5.0...master
2828

29+
### Added
30+
- Add `DistributionComparator` which uses a proper Hodges-Lehmann implementation using pseudo-median instead of median
31+
32+
### Fixed
33+
- Fix calculating distribution ratio in `RelativeNonparametricPerformanceJudge` by `DistributionComparator`
34+
35+
### Deprecated
36+
- Deprecate `ShiftedDistributionRegressionTest` in favor of `DistributionComparator`
37+
38+
2939
## [4.5.0] - 2024-07-01
3040
[4.5.0]: https://github.com/atlassian/report/compare/release-4.4.0...release-4.5.0
3141

src/main/kotlin/com/atlassian/performance/tools/report/api/ShiftedDistributionRegressionTest.kt

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@ import org.apache.commons.math3.stat.descriptive.rank.Median
1313
* @experiment experiment durations
1414
* @param mwAlpha Mann-Whitney significance level
1515
* @param ksAlpha Kolmogorov-Smirnov significance level
16+
* @deprecated Use [DistributionComparator] instead
1617
*/
18+
@Deprecated("Use DistributionComparator instead")
1719
class ShiftedDistributionRegressionTest(
1820
private val baseline: DoubleArray,
1921
private val experiment: DoubleArray,
@@ -57,7 +59,12 @@ class ShiftedDistributionRegressionTest(
5759
}
5860

5961
internal fun overcomesTolerance(tolerance: Double): Boolean {
60-
return isExperimentRegressed(tolerance) || isExperimentImproved(tolerance)
62+
val isExperimentRegressed = isExperimentRegressed(tolerance)
63+
val isExperimentImproved = isExperimentImproved(tolerance)
64+
if (isExperimentImproved && isExperimentRegressed) {
65+
throw IllegalArgumentException("Experiment can't be both regressed and improved at the same time")
66+
}
67+
return isExperimentRegressed || isExperimentImproved
6168
}
6269

6370
/**
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
package com.atlassian.performance.tools.report.api.distribution
2+
3+
import com.numericalmethod.suanshu.stats.test.rank.wilcoxon.WilcoxonRankSum
4+
import org.apache.commons.math3.stat.descriptive.rank.Median
5+
import org.apache.commons.math3.stat.ranking.NaNStrategy
6+
7+
class DistributionComparator private constructor(
8+
private val baseline: DoubleArray,
9+
private val experiment: DoubleArray,
10+
/**
11+
* A percentage by which experiment can be slower/faster than baseline and not considered as a regression/improvement
12+
*/
13+
private val tolerance: Double,
14+
private val significance: Double
15+
) {
16+
17+
18+
19+
/**
20+
* Performs a one-tailed Mann–Whitney U test to check whether experiment is not slower than the baseline
21+
*
22+
* @return true if the experiment is slower than the baseline by more than tolerance, false otherwise
23+
*/
24+
private fun isExperimentRegressed(baselineMedian: Double): Boolean {
25+
val mu = -tolerance * baselineMedian
26+
return WilcoxonRankSum(baseline, experiment, mu).pValue1SidedLess < significance
27+
}
28+
29+
private fun isExperimentImproved(baselineMedian: Double): Boolean {
30+
val mu = -tolerance * baselineMedian
31+
val wilcoxon = WilcoxonRankSum(experiment, baseline, mu)
32+
return wilcoxon.pValue1SidedLess < significance
33+
}
34+
35+
/**
36+
* Pseudo-median: the median of the Walsh (pairwise) averages
37+
*/
38+
private fun pseudoMedian(array: DoubleArray): Double {
39+
val n = array.size
40+
val size = n * (n + 1) / 2 - n
41+
val values = DoubleArray(size)
42+
var k = 0
43+
for (i in 0 until n) {
44+
for (j in i + 1 until n) {
45+
values[k++] = (array[i] + array[j]) / 2
46+
}
47+
}
48+
return Median().evaluate(values)
49+
}
50+
51+
private fun median(func: (xi: Double, yj: Double) -> Double): Double {
52+
val values = DoubleArray(baseline.size * experiment.size)
53+
var k = 0
54+
for (i in baseline.indices) {
55+
for (j in experiment.indices) {
56+
values[k++] = func(baseline[i], experiment[j])
57+
}
58+
}
59+
return Median().withNaNStrategy(NaNStrategy.MINIMAL).evaluate(values)
60+
}
61+
62+
private fun shift(): Double {
63+
return median { xi, yj -> yj - xi }
64+
}
65+
66+
private fun ratio(): Double {
67+
return median { xi, yj -> yj / xi }
68+
}
69+
70+
/**
71+
* Calculates the distance between two data sets based on the [Hodges-Lehmann estimator][].
72+
* [Hodges-Lehmann estimator]: https://en.wikipedia.org/wiki/Hodges%E2%80%93Lehmann_estimator
73+
* https://aakinshin.net/hodges-lehmann-estimator/
74+
* https://github.com/AndreyAkinshin/perfolizer/blob/master/src/Perfolizer/Perfolizer/Mathematics/GenericEstimators/HodgesLehmannEstimator.cs
75+
*
76+
* Takes into account tolerance which answers the question "is change is big enough to matter?"
77+
*/
78+
fun compare(): DistributionComparison {
79+
val experimentShift = shift()
80+
val baselineMedian = pseudoMedian(baseline)
81+
val experimentRatio = ratio()
82+
val isExperimentImproved = isExperimentImproved(baselineMedian)
83+
val isExperimentRegressed = isExperimentRegressed(baselineMedian)
84+
val experimentRelativeChange = experimentRatio - 1
85+
return DistributionComparison(
86+
experimentRelativeChange = experimentRelativeChange,
87+
experimentAbsoluteChange = experimentShift,
88+
isExperimentRegressed = isExperimentRegressed,
89+
isExperimentImproved = isExperimentImproved
90+
)
91+
}
92+
93+
class Builder(
94+
private var baseline: DoubleArray,
95+
private var experiment: DoubleArray
96+
) {
97+
private var significance: Double = 0.05
98+
private var tolerance: Double = 0.01
99+
100+
fun significance(significance: Double) = apply { this.significance = significance }
101+
fun tolerance(tolerance: Double) = apply { this.tolerance = tolerance }
102+
fun baseline(baseline: DoubleArray) = apply { this.baseline = baseline }
103+
fun experiment(experiment: DoubleArray) = apply { this.experiment = experiment }
104+
105+
fun build() = DistributionComparator(
106+
baseline = baseline,
107+
experiment = experiment,
108+
tolerance = tolerance,
109+
significance = significance
110+
)
111+
112+
}
113+
}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
package com.atlassian.performance.tools.report.api.distribution
2+
3+
class DistributionComparison(
4+
val experimentRelativeChange: Double,
5+
val experimentAbsoluteChange: Double,
6+
val isExperimentRegressed: Boolean,
7+
val isExperimentImproved: Boolean
8+
) {
9+
10+
init {
11+
if (isExperimentImproved && isExperimentRegressed) {
12+
throw IllegalArgumentException("Experiment can't be both regressed and improved at the same time")
13+
}
14+
}
15+
16+
fun hasImpact() = isExperimentRegressed || isExperimentImproved
17+
18+
}

src/main/kotlin/com/atlassian/performance/tools/report/api/judge/RelativeNonparametricPerformanceJudge.kt

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ package com.atlassian.performance.tools.report.api.judge
22

33
import com.atlassian.performance.tools.jiraactions.api.ActionType
44
import com.atlassian.performance.tools.report.ActionMetricsReader
5-
import com.atlassian.performance.tools.report.api.ShiftedDistributionRegressionTest
5+
import com.atlassian.performance.tools.report.api.distribution.DistributionComparator
66
import com.atlassian.performance.tools.report.api.junit.FailedAssertionJUnitReport
77
import com.atlassian.performance.tools.report.api.junit.JUnitReport
88
import com.atlassian.performance.tools.report.api.junit.SuccessfulJUnitReport
@@ -70,14 +70,16 @@ class RelativeNonparametricPerformanceJudge private constructor(
7070
report = FailedAssertionJUnitReport(reportName, "No action $label results for $experimentCohort"),
7171
action = action
7272
)
73-
val test = ShiftedDistributionRegressionTest(baseline, experiment, mwAlpha = significance, ksAlpha = 0.0)
74-
// shifts are negated, because ShiftedDistributionRegressionTest is relative to experiment, instead of baseline
73+
val comparison = DistributionComparator.Builder(baseline, experiment)
74+
.tolerance(toleranceRatio.toDouble())
75+
.build()
76+
.compare()
7577
val impact = LatencyImpact.Builder(
7678
action,
77-
-test.percentageShift,
78-
reader.convertToDuration(-test.locationShift)
79+
comparison.experimentRelativeChange,
80+
reader.convertToDuration(comparison.experimentAbsoluteChange)
7981
)
80-
.relevant(test.overcomesTolerance(toleranceRatio.toDouble()))
82+
.relevant(comparison.hasImpact())
8183
.build()
8284
impactHandlers.forEach { it.accept(impact) }
8385
return if (impact.regression) {

src/test/kotlin/com/atlassian/performance/tools/report/ShiftedDistributionRegressionTestTest.kt

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
package com.atlassian.performance.tools.report
22

3-
import com.atlassian.performance.tools.jiraactions.api.*
43
import com.atlassian.performance.tools.report.api.ShiftedDistributionRegressionTest
54
import com.atlassian.performance.tools.report.api.result.FakeResults
65
import com.atlassian.performance.tools.report.chart.Chart
@@ -12,7 +11,7 @@ import org.apache.commons.math3.distribution.NormalDistribution
1211
import org.apache.commons.math3.distribution.NormalDistribution.DEFAULT_INVERSE_ABSOLUTE_ACCURACY
1312
import org.apache.commons.math3.random.MersenneTwister
1413
import org.assertj.core.api.Assertions.assertThat
15-
import org.assertj.core.api.SoftAssertions.*
14+
import org.assertj.core.api.SoftAssertions.assertSoftly
1615
import org.assertj.core.data.Offset
1716
import org.junit.Ignore
1817
import org.junit.Test
@@ -112,7 +111,7 @@ class ShiftedDistributionRegressionTestTest {
112111
}
113112

114113
/**
115-
* In a 51% vs 49% case, small diffs should not dominate the big diffs.
114+
* In a 51% slightly faster vs 49% much slower case, it should be a regression
116115
*/
117116
@Ignore("https://ecosystem.atlassian.net/browse/JPERF-1297")
118117
@Test
@@ -152,16 +151,39 @@ class ShiftedDistributionRegressionTestTest {
152151
}
153152
}
154153

154+
@Test
155+
// @Ignore("percentageShift calculation is fixed in in DistributionComparator where Hodges-Lehmann estimator with pseudo-median is used")
156+
fun shouldDetectImprovementWhenEveryPercentileBetter() {
157+
// given
158+
val baseline =
159+
this.javaClass.getResource("/real-results/view issue 9.17.0 vs 10.0.0/baseline.csv").readText().lines()
160+
.map { it.toDouble() }.toDoubleArray()
161+
val experiment =
162+
this.javaClass.getResource("/real-results/view issue 9.17.0 vs 10.0.0/experiment.csv").readText().lines()
163+
.map { it.toDouble() }.toDoubleArray()
164+
// when
165+
val test = ShiftedDistributionRegressionTest(baseline, experiment)
166+
// then
167+
plotQuantiles(baseline, experiment)
168+
assertSoftly {
169+
it.assertThat(test.isExperimentRegressed(0.01)).`as`("isExperimentRegressed").isFalse()
170+
it.assertThat(test.percentageShift).`as`("").`as`("percentageShift").isEqualTo(0.03941908713692943)
171+
it.assertThat(test.locationShift).`as`("").`as`("locationShift").isEqualTo(20.0)
172+
it.assertThat(test.overcomesTolerance(0.01)).`as`("overcomesTolerance").isTrue()
173+
}
174+
}
155175

156176

157177
private fun plotQuantiles(
158178
baseline: DoubleArray,
159179
experiment: DoubleArray
160180
) {
161-
val chart = Chart(listOf(
162-
chartLine(baseline, "baseline"),
163-
chartLine(experiment, "experiment")
164-
))
181+
val chart = Chart(
182+
listOf(
183+
chartLine(baseline, "baseline"),
184+
chartLine(experiment, "experiment")
185+
)
186+
)
165187
val htmlFile = Files.createTempFile("kebab", ".html")
166188
.also { println("Distribution comparison at $it") }
167189
DistributionComparison(GitRepo.findFromCurrentDirectory()).render(chart, htmlFile)
@@ -174,7 +196,6 @@ class ShiftedDistributionRegressionTestTest {
174196
yAxisId = "latency-axis"
175197
)
176198

177-
@Ignore("Known bug: https://ecosystem.atlassian.net/browse/JPERF-1188")
178199
@Test
179200
fun shouldSeeNoShiftAcrossTheSameResult() {
180201
val result = FakeResults.fastResult

0 commit comments

Comments
 (0)