Skip to content

Commit d577c60

Browse files
author
Maxim Levitsky
committed
KVM: selftests: Handle Intel Atom errata that leads to PMU event overcount
JIRA: https://issues.redhat.com/browse/RHEL-120168 commit c435978 Author: dongsheng <dongsheng.x.zhang@intel.com> Date: Fri Sep 19 14:46:48 2025 -0700 KVM: selftests: Handle Intel Atom errata that leads to PMU event overcount Add a PMU errata framework and use it to relax precise event counts on Atom platforms that overcount "Instruction Retired" and "Branch Instruction Retired" events, as the overcount issues on VM-Exit/VM-Entry are impossible to prevent from userspace, e.g. the test can't prevent host IRQs. Setup errata during early initialization and automatically sync the mask to VMs so that tests can check for errata without having to manually manage host=>guest variables. For Intel Atom CPUs, the PMU events "Instruction Retired" or "Branch Instruction Retired" may be overcounted for some certain instructions, like FAR CALL/JMP, RETF, IRET, VMENTRY/VMEXIT/VMPTRLD and complex SGX/SMX/CSTATE instructions/flows. The detailed information can be found in the errata (section SRF7): https://edc.intel.com/content/www/us/en/design/products-and-solutions/processors-and-chipsets/sierra-forest/xeon-6700-series-processor-with-e-cores-specification-update/errata-details/ For the Atom platforms before Sierra Forest (including Sierra Forest), Both 2 events "Instruction Retired" and "Branch Instruction Retired" would be overcounted on these certain instructions, but for Clearwater Forest only "Instruction Retired" event is overcounted on these instructions. Signed-off-by: dongsheng <dongsheng.x.zhang@intel.com> Co-developed-by: Dapeng Mi <dapeng1.mi@linux.intel.com> Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com> Tested-by: Yi Lai <yi1.lai@intel.com> Co-developed-by: Sean Christopherson <seanjc@google.com> Reviewed-by: Dapeng Mi <dapeng1.mi@linux.intel.com> Tested-by: Dapeng Mi <dapeng1.mi@linux.intel.com> Link: https://lore.kernel.org/r/20250919214648.1585683-6-seanjc@google.com Signed-off-by: Sean Christopherson <seanjc@google.com> Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
1 parent 99a850d commit d577c60

File tree

5 files changed

+77
-3
lines changed

5 files changed

+77
-3
lines changed

tools/testing/selftests/kvm/include/x86/pmu.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,11 @@
55
#ifndef SELFTEST_KVM_PMU_H
66
#define SELFTEST_KVM_PMU_H
77

8+
#include <stdbool.h>
89
#include <stdint.h>
910

11+
#include <linux/bits.h>
12+
1013
#define KVM_PMU_EVENT_FILTER_MAX_EVENTS 300
1114

1215
/*
@@ -104,4 +107,17 @@ enum amd_pmu_zen_events {
104107
extern const uint64_t intel_pmu_arch_events[];
105108
extern const uint64_t amd_pmu_zen_events[];
106109

110+
enum pmu_errata {
111+
INSTRUCTIONS_RETIRED_OVERCOUNT,
112+
BRANCHES_RETIRED_OVERCOUNT,
113+
};
114+
extern uint64_t pmu_errata_mask;
115+
116+
void kvm_init_pmu_errata(void);
117+
118+
static inline bool this_pmu_has_errata(enum pmu_errata errata)
119+
{
120+
return pmu_errata_mask & BIT_ULL(errata);
121+
}
122+
107123
#endif /* SELFTEST_KVM_PMU_H */

tools/testing/selftests/kvm/lib/x86/pmu.c

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include <linux/kernel.h>
99

1010
#include "kvm_util.h"
11+
#include "processor.h"
1112
#include "pmu.h"
1213

1314
const uint64_t intel_pmu_arch_events[] = {
@@ -34,3 +35,46 @@ const uint64_t amd_pmu_zen_events[] = {
3435
AMD_ZEN_BRANCHES_MISPREDICTED,
3536
};
3637
kvm_static_assert(ARRAY_SIZE(amd_pmu_zen_events) == NR_AMD_ZEN_EVENTS);
38+
39+
/*
40+
* For Intel Atom CPUs, the PMU events "Instruction Retired" or
41+
* "Branch Instruction Retired" may be overcounted for some certain
42+
* instructions, like FAR CALL/JMP, RETF, IRET, VMENTRY/VMEXIT/VMPTRLD
43+
* and complex SGX/SMX/CSTATE instructions/flows.
44+
*
45+
* The detailed information can be found in the errata (section SRF7):
46+
* https://edc.intel.com/content/www/us/en/design/products-and-solutions/processors-and-chipsets/sierra-forest/xeon-6700-series-processor-with-e-cores-specification-update/errata-details/
47+
*
48+
* For the Atom platforms before Sierra Forest (including Sierra Forest),
49+
* Both 2 events "Instruction Retired" and "Branch Instruction Retired" would
50+
* be overcounted on these certain instructions, but for Clearwater Forest
51+
* only "Instruction Retired" event is overcounted on these instructions.
52+
*/
53+
static uint64_t get_pmu_errata(void)
54+
{
55+
if (!this_cpu_is_intel())
56+
return 0;
57+
58+
if (this_cpu_family() != 0x6)
59+
return 0;
60+
61+
switch (this_cpu_model()) {
62+
case 0xDD: /* Clearwater Forest */
63+
return BIT_ULL(INSTRUCTIONS_RETIRED_OVERCOUNT);
64+
case 0xAF: /* Sierra Forest */
65+
case 0x4D: /* Avaton, Rangely */
66+
case 0x5F: /* Denverton */
67+
case 0x86: /* Jacobsville */
68+
return BIT_ULL(INSTRUCTIONS_RETIRED_OVERCOUNT) |
69+
BIT_ULL(BRANCHES_RETIRED_OVERCOUNT);
70+
default:
71+
return 0;
72+
}
73+
}
74+
75+
uint64_t pmu_errata_mask;
76+
77+
void kvm_init_pmu_errata(void)
78+
{
79+
pmu_errata_mask = get_pmu_errata();
80+
}

tools/testing/selftests/kvm/lib/x86/processor.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include "linux/bitmap.h"
77
#include "test_util.h"
88
#include "kvm_util.h"
9+
#include "pmu.h"
910
#include "processor.h"
1011
#include "sev.h"
1112

@@ -638,6 +639,7 @@ void kvm_arch_vm_post_create(struct kvm_vm *vm)
638639
sync_global_to_guest(vm, host_cpu_is_intel);
639640
sync_global_to_guest(vm, host_cpu_is_amd);
640641
sync_global_to_guest(vm, is_forced_emulation_enabled);
642+
sync_global_to_guest(vm, pmu_errata_mask);
641643

642644
if (is_sev_vm(vm)) {
643645
struct kvm_sev_init init = { 0 };
@@ -1269,6 +1271,8 @@ void kvm_selftest_arch_init(void)
12691271
host_cpu_is_intel = this_cpu_is_intel();
12701272
host_cpu_is_amd = this_cpu_is_amd();
12711273
is_forced_emulation_enabled = kvm_is_forced_emulation_enabled();
1274+
1275+
kvm_init_pmu_errata();
12721276
}
12731277

12741278
bool sys_clocksource_is_based_on_tsc(void)

tools/testing/selftests/kvm/x86/pmu_counters_test.c

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -163,10 +163,18 @@ static void guest_assert_event_count(uint8_t idx, uint32_t pmc, uint32_t pmc_msr
163163

164164
switch (idx) {
165165
case INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX:
166-
GUEST_ASSERT_EQ(count, NUM_INSNS_RETIRED);
166+
/* Relax precise count check due to VM-EXIT/VM-ENTRY overcount issue */
167+
if (this_pmu_has_errata(INSTRUCTIONS_RETIRED_OVERCOUNT))
168+
GUEST_ASSERT(count >= NUM_INSNS_RETIRED);
169+
else
170+
GUEST_ASSERT_EQ(count, NUM_INSNS_RETIRED);
167171
break;
168172
case INTEL_ARCH_BRANCHES_RETIRED_INDEX:
169-
GUEST_ASSERT_EQ(count, NUM_BRANCH_INSNS_RETIRED);
173+
/* Relax precise count check due to VM-EXIT/VM-ENTRY overcount issue */
174+
if (this_pmu_has_errata(BRANCHES_RETIRED_OVERCOUNT))
175+
GUEST_ASSERT(count >= NUM_BRANCH_INSNS_RETIRED);
176+
else
177+
GUEST_ASSERT_EQ(count, NUM_BRANCH_INSNS_RETIRED);
170178
break;
171179
case INTEL_ARCH_LLC_REFERENCES_INDEX:
172180
case INTEL_ARCH_LLC_MISSES_INDEX:

tools/testing/selftests/kvm/x86/pmu_event_filter_test.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -214,8 +214,10 @@ static void remove_event(struct __kvm_pmu_event_filter *f, uint64_t event)
214214
do { \
215215
uint64_t br = pmc_results.branches_retired; \
216216
uint64_t ir = pmc_results.instructions_retired; \
217+
bool br_matched = this_pmu_has_errata(BRANCHES_RETIRED_OVERCOUNT) ? \
218+
br >= NUM_BRANCHES : br == NUM_BRANCHES; \
217219
\
218-
if (br && br != NUM_BRANCHES) \
220+
if (br && !br_matched) \
219221
pr_info("%s: Branch instructions retired = %lu (expected %u)\n", \
220222
__func__, br, NUM_BRANCHES); \
221223
TEST_ASSERT(br, "%s: Branch instructions retired = %lu (expected > 0)", \

0 commit comments

Comments
 (0)