From 64025efb2de50165df85bb34fcc163e868cf6397 Mon Sep 17 00:00:00 2001 From: Jiri Mencak Date: Thu, 7 May 2026 14:07:57 +0200 Subject: [PATCH] Add: AGENTS.md/README.md, Migrate more OTE tests --- .../main.go | 14 +- test/extended/AGENTS.md | 332 ++++++++++++++++++ test/extended/CLAUDE.md | 1 + test/extended/README.md | 266 ++++++++++++++ test/extended/specs/nto.go | 14 +- test/extended/utils/nto_util.go | 19 + 6 files changed, 629 insertions(+), 17 deletions(-) create mode 100644 test/extended/AGENTS.md create mode 100644 test/extended/CLAUDE.md create mode 100644 test/extended/README.md diff --git a/cmd/cluster-node-tuning-operator-test-ext/main.go b/cmd/cluster-node-tuning-operator-test-ext/main.go index 6fa955aaa..52cf687dc 100644 --- a/cmd/cluster-node-tuning-operator-test-ext/main.go +++ b/cmd/cluster-node-tuning-operator-test-ext/main.go @@ -55,8 +55,7 @@ func main() { Name: "openshift/cluster-node-tuning-operator/disruptive", Parents: []string{"openshift/disruptive-longrunning"}, Qualifiers: []string{ - `(labels.exists(l, l=="ReleaseGate")) && - name.contains("[Disruptive]")`, + `name.contains("[Disruptive]")`, }, }) @@ -65,17 +64,14 @@ func main() { Name: "openshift/cluster-node-tuning-operator/optional/slow", Parents: []string{"openshift/optional/slow"}, Qualifiers: []string{ - `(labels.exists(l, l=="ReleaseGate")) && - name.contains("[Slow]") && !name.contains("[Disruptive]")`, + `name.contains("[Slow]") && !name.contains("[Disruptive]")`, }, }) // Suite: all (includes everything) ext.AddSuite(e.Suite{ - Name: "openshift/cluster-node-tuning-operator/all", - Qualifiers: []string{ - `(labels.exists(l, l=="ReleaseGate"))`, - }, + Name: "openshift/cluster-node-tuning-operator/all", + Qualifiers: []string{}, }) specs, err := g.BuildExtensionTestSpecsFromOpenShiftGinkgoSuite() @@ -97,7 +93,7 @@ func main() { // Ignore obsolete tests ext.IgnoreObsoleteTests( - // "[sig-node-tuning] ", + // "[sig-tuning-node] ", ) // Initialize environment before running any tests diff --git a/test/extended/AGENTS.md b/test/extended/AGENTS.md new file mode 100644 index 000000000..95ccb91b8 --- /dev/null +++ b/test/extended/AGENTS.md @@ -0,0 +1,332 @@ +# AGENTS.md + +This file provides AI agents with comprehensive context about the NTO QE Test Extension project to enable effective test development, debugging, and maintenance. + +## Scope and Working Directory + +### Applicability +This AGENTS.md applies to the **NTO QE Test Cases** located at: +``` +cluster-node-tuning-operator/test/extended/ +``` + +**IMPORTANT**: This file is specifically for the **QE migration test code** in the `test/extended/` directory, not for: +- Product code in the main `cluster-node-tuning-operator` repository + +### Required Working Directory +For this AGENTS.md to be effective, ensure your working directory is set to: +```bash +/cluster-node-tuning-operator/test/extended/ +``` + +### Working Directory Verification for AI Agents + +**Context Awareness**: This AGENTS.md may be loaded even when not actively working with QE test files (e.g., user briefly left `test/extended/` for another repo path). Apply these guidelines intelligently based on the actual task. + +#### When to Apply This AGENTS.md + +**ONLY apply this AGENTS.md when the user is working with QE migration test files**, identified by: +- File paths containing `test/extended/` +- Tasks explicitly about "NTO QE tests", "QE migration", "test extension", "sig-tuning-node" + +**DO NOT apply this AGENTS.md when**: +- Working with files outside these directories (e.g., e2e tests, product code) +- User is in a different part of the repository +- Even if this AGENTS.md was previously loaded + +#### Directory Check (Only for QE Test File Operations) + +When the user asks to work with QE test files (files under `test/extended/`): + +1. **Check current working directory**: + ```bash + pwd + ``` + +2. **Verify directory alignment**: + - Preferred: Current directory should be `test/extended/` or subdirectory + - This ensures AGENTS.md context is automatically available + +3. **If working directory is not aligned**: + + **Inform (don't block) the user**: + ``` + 💡 Note: Working Directory Suggestion + + You're working with QE test files under test/extended/, + but your current directory is elsewhere. For better context and auto-completion: + + Consider running: cd test/extended/ + + I can still help you, but setting the working directory correctly + ensures I have full access to the test documentation. + + Do you want to continue in the current directory, or should I wait + for you to switch? + ``` + +**Important**: This is a suggestion, not a blocker. If the user wants to proceed, assist them normally. + +### Path Structure Reference +``` +cluster-node-tuning-operator/ ← OpenShift downstream product repo +└── test/ ← Test directory root + └── extended/ ← OpenShift Test Extension (OTE) root + ├── bindata/ ← Embedded test data for QE tests + ├── specs/ ← OTE test specifications + ├── testdata/ ← Raw test manifests to be compiled into bindata + └── utils/ ← Test helpers/utilities +``` + +## Project Overview + +This is a **Quality Engineering (QE) test extension** for Node Tuning Operator (NTO) on OpenShift. It provides end-to-end functional tests that validate NTO features and functionality in real OpenShift clusters. + +### Purpose +- Validate NTO functionality across different OpenShift topologies +- Ensure NTO works correctly in various cluster configurations (SNO, standard OCP, etc.) +- Provide regression testing for NTO bug fixes and enhancements + +**Note**: NTO currently does NOT support MicroShift topology. Support may be added in future releases. + +### Key Characteristics +- **Framework**: Built on Ginkgo v2 BDD testing framework and OpenShift Tests Extension (OTE) +- **Test Organization**: Polarion-ID based test case management +- **Integration**: Extends `openshift-tests-extension` framework + +## Test Case Sources and Organization + +**Reference**: For OpenShift CI requirements, see [Choosing a Test Suite](https://docs.google.com/document/d/1cFZj9QdzW8hbHc3H0Nce-2xrJMtpDJrwAse9H7hLiWk/edit?tab=t.0#heading=h.tjtqedd47nnu) + +## Test Suite Definitions + +**IMPORTANT**: Suite definitions are sourced from **[cmd/cluster-node-tuning-operator-test-ext/main.go](../../cmd/cluster-node-tuning-operator-test-ext/main.go)** and may change over time. Always refer to that file for the most current definitions. + +For detailed explanations and code examples, see **[README.md](./README.md)** section "Suite Definitions". + +## Test Case Migration Guide + +For complete migration guidelines including code changes and label requirements, refer to **[README.md](./README.md)** section "Test Case Migration Guide". + +## Test Architecture and Patterns + +### Test Structure Pattern + +For complete test structure examples, refer to existing test files: +- **Standard tests**: `specs/nto.go` +- **Key patterns**: Look for `g.Describe`, `g.BeforeEach`, `g.AfterEach`, `g.It` blocks + +**Basic structure**: +```go +var _ = g.Describe("[Jira:Node Tuning Operator][sig-tuning-node] feature description", func() { + defer g.GinkgoRecover() + var oc = exutil.NewCLIWithoutNamespace("nto-test") + + g.BeforeEach(func() { + // ensure NTO operator is installed + utils.SkipNoNTO(oc, ntoNamespace) + // get IaaS platform + platformOutput, err := oc.AsAdmin().WithoutNamespace().Run("get").Args("infrastructure", "cluster", "-o=jsonpath={.status.platform}").Output() + if err == nil { + iaasPlatform = strings.ToLower(platformOutput) + } + utils.Logf("Cloud provider is: %v", iaasPlatform) + }) + + g.AfterEach(func() { + // Cleanup resources (use defer) + }) + + g.It("[test_id:37415][OTP] description", g.Label("ReleaseGate"), func() { + // Test implementation + }) +}) +``` + +## Local Development Workflow + +For complete local development workflow, build instructions, testing procedures, PR submission requirements, and disconnected environment support, refer to **[README.md](./README.md)** section "Local Development Workflow". + +**Quick reference**: +- Build: `make` +- Find test: `_output/cluster-node-tuning-operator-test-ext list -o names | grep "keyword_from_your_test_name"` +- Run test: `_output/cluster-node-tuning-operator-test-ext run-test ` +- openshift-tests integration: See [README.md](./README.md) for environment variables and suite selection + +**Important for Disconnected Tests**: With IDMS/ITMS in place, tests work the same in both connected and disconnected environments. See README.md for `ValidateAccessEnvironment` usage + +## Test Automation Code Requirements + +For complete code quality guidelines, best practices, logging best practices, and security considerations, refer to **[README.md](./README.md)** section "Test Automation Code Requirements". + +**Critical rules for AI agents**: +- ✅ Use `defer` for cleanup (BEFORE resource creation): `defer resource.Delete(oc)` then `resource.Create(oc)` +- ✅ Use case ID for resource naming (NOT random strings): `name := "test-extension-" + caseID` +- ❌ Don't use `o.Expect` inside `wait.Poll` loops (use `if err != nil { return false, err }`) +- ❌ Don't execute logic in `g.Describe` blocks (only initialization, move logic to `g.BeforeEach`) +- ❌ Don't use quotes in test titles (breaks XML parsing) +- ❌ Don't put large log outputs in error messages (use proper log messages instead of `o.Expect` with large output) + +## Key Utilities + +For complete utility APIs and usage examples, refer to the source code and existing tests: + +### `utils` Package +**Location**: `utils/` directory (e.g., `utils/cli_wrapper.go`, `utils/nto_util.go`) + +**Key functions**: +- CLI management: `NewCLIWithoutNamespace()` +- Cluster detection: `IsSNOCluster()`, `IsROSACluster()` +- Skip functions: `SkipNoNTO()` + +## Anti-Patterns to Avoid + +For complete anti-patterns with detailed code examples and explanations, refer to **[README.md](./README.md)** section "Test Automation Code Requirements". + +**Common mistakes for AI agents to avoid**: +- ❌ No cleanup: Always use `defer resource.Delete(oc)` BEFORE `resource.Create(oc)` +- ❌ Hardcoded names: Use case ID for naming: `name := "test-extension-" + caseID` +- ❌ Missing timeouts: Always specify timeout for Wait functions +- ❌ Hard sleeps: Use Wait functions instead of `time.Sleep()` +- ❌ `o.Expect` in `wait.Poll`: Use `if err != nil { return false, err }` pattern instead + +## Quick Reference + +### Test Naming Convention +``` +[Jira:Node Tuning Operator][sig-tuning-node] should [test_id:12345][OTP]Description [Parallel|Serial|Disruptive|Slow] +``` + +## Resources + +- [NTO OpenShift Product Code](https://github.com/openshift/cluster-node-tuning-operator) +- [Ginkgo v2 Documentation](https://onsi.github.io/ginkgo/) +- [OpenShift Tests Extension](https://github.com/openshift-eng/openshift-tests-extension) +- [Test Extensions in Origin](https://github.com/openshift/origin/blob/main/docs/test_extensions.md) +- [OpenShift CI Requirements](https://docs.google.com/document/d/1cFZj9QdzW8hbHc3H0Nce-2xrJMtpDJrwAse9H7hLiWk/edit?tab=t.0#heading=h.tjtqedd47nnu) + +## Debugging + +**Investigation Priority** when tests fail: +1. Check test code in `test/extended/` +2. Check resource status and conditions via `oc describe` +3. Refer to product code to understand expected behavior + +**For deeper investigation** (when you need to refer to product code): +1. **Locate NTO OpenShift product code** +2. **Trace code flow**: Use the product code to understand expected behavior +3. **Compare implementation**: Check if test expectations match product implementation +4. **Check recent changes**: Look for recent commits that might have changed the behavior + +**Key Namespaces** (OpenShift): +- `openshift-cluster-node-tuning-operator`: NTO operator and tuned pods + +**Common Debugging Commands**: +```bash +# Check resource status +oc get tuned -n openshift-cluster-node-tuning-operator +oc get profile -n openshift-cluster-node-tuning-operator + +# Check logs +oc logs -l name=cluster-node-tuning-operator -n openshift-cluster-node-tuning-operator +oc logs -l name=tuned -n openshift-cluster-node-tuning-operator +``` + +## Notes for AI Agents + +### Suggesting Test Locations + +When discussing whether a feature needs testing: + +**✅ DO**: Provide simple, focused guidance on QE test placement +- Example: "If you need to write QE tests for this functionality, they should go in `test/extended/specs/`." +- Keep suggestions within the scope of this AGENTS.md (QE tests only) + +**❌ DON'T**: +- Discuss DEV test locations (e.g., unit tests in product code directories) +- Explain the difference between QE and DEV tests unless explicitly asked +- Provide detailed test categorization unless the user is actively writing tests + +**Remember**: This AGENTS.md is for QE test code in `test/extended/` only. Product code testing (DEV tests) is outside this scope. + +### Critical Points + +1. **Test Scope**: + - This AGENTS.md applies ONLY to QE migration test code under `test/extended/` + +2. **Suite Definitions Source**: + - Always check `cmd/cluster-node-tuning-operator-test-ext/main.go` for current suite definitions + - Suite qualifiers may change over time + +3. **ReleaseGate Label Mechanism**: + - Only `ReleaseGate` cases can be used in OpenShift General Jobs + +4. **ReleaseGate is Critical**: + - Determines if Extended case can be used in OpenShift General Jobs and PR Presubmit Jobs + - All cases are executed via `openshift-tests` command + +5. **Most Failures are Test Code Issues**: + - Always investigate test code first before looking at product code + - Refer to Debugging section for investigation priority + +### Test Development Guidelines + +1. **Component Tag**: Always use `[sig-tuning-node]` +2. **Utilities**: Use `utils` package +3. **API Focus**: Test NTO APIs (tuneds.tuned.openshift.io, performanceprofiles.performance.openshift.io) +4. **Cleanup**: Always use defer for cleanup to ensure resources are removed +5. **Suite Logic**: Understand the qualifier logic for different test suites + - Refer to Test Suite Definitions section for suite hierarchy + - Understand which suite your test belongs to based on labels + +### Cluster Topologies + +**Note**: NTO currently supports only a subset of OpenShift topologies. + +**Currently Supported**: +- **Standard OCP**: Regular OpenShift clusters +- **SNO (Single Node OpenShift)**: Single-node clusters +- **HyperShift Hosted**: Hosted control plane clusters +- **HyperShift Management**: Management clusters for hosted control planes + +**NOT Currently Supported**: +- **MicroShift**: Lightweight OpenShift for edge (not yet supported by NTO) + +**Network Connectivity**: +- **Connected**: Full internet access +- **Disconnected**: No internet access (air-gapped) +- **Proxy**: Internet access through proxy + +### Common Pitfalls + +**Test Code Issues**: +1. ❌ **Don't** use `o.Expect` inside `wait.Poll` loops (causes panic) +2. ❌ **Don't** use quotes in test titles (breaks XML parsing) +3. ❌ **Don't** execute logic in `g.Describe` blocks (only initialization) +4. ❌ **Don't** add `ReleaseGate` to `[Disruptive]` and `[Slow]` cases +5. ❌ **Don't** forget cleanup in `g.AfterEach` with defer + +### Best Practices + +**General Test Practices**: +1. ✅ **Do** check suite definitions in `cmd/cluster-node-tuning-operator-test-ext/main.go` before adding tests +2. ✅ **Do** use case ID for naming resources (NOT random strings) +3. ✅ **Do** add proper test_id (Polarion ID) to all test cases +4. ✅ **Do** use skip functions for topology-specific tests +5. ✅ **Do** register defer cleanup BEFORE creating resources + - Pattern: `defer resource.Delete(oc)` then `resource.Create(oc)` + - Why: Ensures cleanup even if Create partially succeeds then fails +6. ✅ **Do** test locally with `cluster-node-tuning-operator-test-ext` before submitting PR +7. ✅ **Do** test with `openshift-tests` to verify suite selection +8. ✅ **Do** run stability tests (`/payload-job`) for ReleaseGate test cases + +### Build and Run + +For complete workflow and detailed commands, refer to **[README.md](./README.md)** section "Local Development Workflow" and the **[Quick Reference](#quick-reference)** section above. + +**Essential pattern for AI agents**: +1. Build: `make` +2. Find test: `_output/cluster-node-tuning-operator-test-ext list -o names | grep ` +3. Run locally: `_output/cluster-node-tuning-operator-test-ext run-test ""` +4. Test with openshift-tests: See README.md for environment variables and suite selection +5. Run stability tests: `/payload-job` for ReleaseGate test cases (see README.md for details) diff --git a/test/extended/CLAUDE.md b/test/extended/CLAUDE.md new file mode 100644 index 000000000..43c994c2d --- /dev/null +++ b/test/extended/CLAUDE.md @@ -0,0 +1 @@ +@AGENTS.md diff --git a/test/extended/README.md b/test/extended/README.md new file mode 100644 index 000000000..d4f794945 --- /dev/null +++ b/test/extended/README.md @@ -0,0 +1,266 @@ +# NTO QE Test Extension + +> **For AI Agents**: This directory contains comprehensive documentation for AI coding assistants. +> Please read [AGENTS.md](./AGENTS.md) for detailed context about the NTO QE test framework, +> migration guidelines, suite definitions, and best practices. +> +> **Using Claude Code**: If you are using Claude Code as your AI coding assistant: +> 1. Start Claude Code from `test/extended/` directory +> 2. On first launch from a subdirectory, Claude Code will prompt you to load the parent AGENTS.md - select **Yes** (subsequent launches will auto-load) +> 3. If starting from `test/extended/` itself, AGENTS.md is automatically loaded +> 4. Use `/memory` to verify AGENTS.md is loaded and view its content +> +> This ensures Claude Code has access to test framework architecture, migration guidelines, +> suite definitions, and code quality standards. + +## Implementation Strategy + +### Test Case Organization + +1. If the author believes a case meets OpenShift CI requirements, add the `ReleaseGate` label: + ```go + g.It("xxxxxx", g.Label("ReleaseGate"), func() { + ``` + - This makes the case equivalent to origin cases for openshift-tests + - For the cases with `ReleaseGate` that need `Informing`, add: + ```go + import oteg "github.com/openshift-eng/openshift-tests-extension/pkg/ginkgo" + g.It("xxxxxx", g.Label("ReleaseGate"), oteg.Informing(), func() { + ``` + +## Suite Definitions + +### Suites for openshift-tests and PR presubmit jobs: + +#### Parallel Suite +```go + ext.AddSuite(e.Suite{ + Name: "openshift/cluster-node-tuning-operator/conformance/parallel", + Parents: []string{"openshift/conformance/parallel"}, + Qualifiers: []string{ + `(labels.exists(l, l=="ReleaseGate")) && + !(name.contains("[Serial]") || name.contains("[Slow]") || name.contains("[Disruptive]"))`, + }, + }) +``` + +#### Serial Suite +```go + ext.AddSuite(e.Suite{ + Name: "openshift/cluster-node-tuning-operator/conformance/serial", + Parents: []string{"openshift/conformance/serial"}, + Qualifiers: []string{ + `(labels.exists(l, l=="ReleaseGate")) && + name.contains("[Serial]") && !name.contains("[Disruptive]")`, + // refer to https://github.com/openshift/origin/blob/main/pkg/testsuites/standard_suites.go + }, + }) +``` + +#### Disruptive Suite +```go + ext.AddSuite(e.Suite{ + Name: "openshift/cluster-node-tuning-operator/disruptive", + Parents: []string{"openshift/disruptive-longrunning"}, + Qualifiers: []string{ + `name.contains("[Disruptive]")`, + }, + }) +``` + +#### Slow Suite +```go + ext.AddSuite(e.Suite{ + Name: "openshift/cluster-node-tuning-operator/optional/slow", + Parents: []string{"openshift/optional/slow"}, + Qualifiers: []string{ + `name.contains("[Slow]") && !name.contains("[Disruptive]")`, + }, + }) +``` + +#### All Suite +```go + ext.AddSuite(e.Suite{ + Name: "openshift/cluster-node-tuning-operator/all", + Qualifiers: []string{ + }, + }) +``` + +## Test Case Migration Guide + +**Required For all QE cases**: +- Do not use `&|!,()/` in case title +- Do NOT remove the test_id number from the `original-name` label. The test_id in `g.Label("original-name:...")` must include the case ID number. + Case ID is a 5 digit number between `-` symbols. + - ✅ **Correct**: `g.Label("[OTP][Disruptive][test_id:37415] Allow setting isolated_cores without touching the default_irq_affinity")` + - ❌ **Wrong**: `g.Label("[OTP][Disruptive] Allow setting isolated_cores without touching the default_irq_affinity")` (missing case ID) + +### A. Code Changes for Migrated Cases + +All migrated test case code needs the following changes to run in the new test framework: + +1. Change `compat_otp.By()` to `g.By()` +2. Change `compat_otp.XYZ()` to `utils.XYZ()`, where `XYZ` are such as `IsSNOCluster` +3. Adjust functions missing in the `utils` package from the `compat_otp` package +4. Change `ntoResource` to `NtoResource` and export its fields +5. Change `e2e.Logf()` to `utils.Logf()` +6. When using `oc.AsAdmin().WithoutNamespace()`, always use `-n` with the appropriate namespace either from the resource being created itself or NTO namespace. + This will prevent issues in the CI when temporary OTE namespaces no longer exist. +7. Change the comments missing a space after `//`: + - ✅ **Correct**: // test requires NTO to be installed + - ❌ **Wrong**: //test requires NTO to be installed + + +### B. Label Requirements for Migrated and New Cases + +#### Required Labels +1. **Component annotation**: Add `[sig-tuning-node]` in case title +2. **Jira Component**: Add `[Jira:Node Tuning Operator]` in case title +3. **OpenShift CI compatibility**: If you believe the case meets OpenShift CI requirements, add `ReleaseGate` label to Ginkgo +4. **Required For Migrated case from test-private**: Add `[OTP]` in case title + +#### Optional Labels in Migration/New test cases' title +1. **Author**: Deprecated, remove it. +2. **ConnectedOnly**: Add `[Skipped:Disconnected]` in title +3. **DisconnectedOnly**: Add `[Skipped:Connected][Skipped:Proxy]` in title +4. **Case ID**: change it to `[test_id:xxxxxx]` format, and remove the old one from the case title. Such as `-37415-` strings. + - **IMPORTANT**: The test_id number should only appear ONCE in the test title - at the beginning as `[test_id:xxxxx]`. Do NOT repeat the number anywhere else in the title. + - **IMPORTANT**: Do NOT add `-` between two consecutive square brackets. Adjacent tags should be written directly together. + - ✅ **Correct**: `[test_id:12345][OTP][Skipped:Disconnected]Allow setting isolated_cores without touching the default_irq_affinity [Disruptive]` + - ❌ **Wrong**: `[test_id:12345][OTP]-[Skipped:Disconnected]Allow setting isolated_cores without touching the default_irq_affinity [Disruptive]` (dash between brackets) + - ❌ **Wrong**: `[test_id:12345][OTP][Skipped:Disconnected]12345-Allow setting isolated_cores without touching the default_irq_affinity [Disruptive]` (repeated ID) +5. **Importance**: Deprecated, remove it. Such as `Critical`, `High`, `Medium` and `Low` strings. +6. **NonPrerelease**: Deprecated, remove it. + - **Longduration**: Change it to `[Slow]` in case title. + - **ChkUpg**: Deprecated, remove it. Not supported (openshift-tests upgrade differs from OpenShift QE) +7. **VMonly**: Deprecated, and don't migrate the `VMonly` test cases to here. +8. **Slow, Serial, Disruptive**: Preserved, but add them in the end of the title as above. +9. **CPaasrunOnly, CPaasrunBoth, StagerunOnly, StagerunBoth, ProdrunOnly, ProdrunBoth**: Deprecated, remove them. +10. **NonHyperShiftHOST**: Use Ginkgo label `g.Label("NonHyperShiftHOST")` or use `IsHypershiftHostedCluster` judgment, then skip +11. **HyperShiftMGMT**: Deprecated. For cases needing hypershift mgmt execution, use `g.Label("NonHyperShiftHOST")` and `ValidHypershiftAndGetGuestKubeConf` validation +12. **MicroShiftOnly**: Deprecated. For cases not supporting microshift, use `SkipMicroshift` judgment, then skip +13. **ROSA**: Deprecated. Three ROSA job types: + - `rosa-sts-ovn`: equivalent to OCP + - `rosa-sts-hypershift-ovn`: equivalent to hypershift hosted + - `rosa-classic-sts`: doesn't use openshift-tests +14. **ARO**: Deprecated. All ARO jobs based on HCP are equivalent to hypershift hosted (don't actually use openshift-test) +15. **OSD_CCS**: Deprecated. Only one job type: `osd-ccs-gcp` equivalent to OCP +16. **Feature Gates**: Handle test cases based on their feature gate requirements: + + **Case 1: Test only runs when feature gate is enabled** + - The test should not execute if the feature gate is disabled + - Add `[OCPFeatureGate:xxxx]` in `g.It` title (where xxxx is feature gate name) + - Or use `IsFeaturegateEnabled` check, then skip if disabled + - Remove label/check when feature no longer requires gate + + **Case 2: Test runs with/without feature gate but with different behaviors** + - The test executes regardless of feature gate status, but behaves differently + - Use `IsFeaturegateEnabled` check to handle different behaviors + - Do NOT add `[OCPFeatureGate:xxxx]` label + - Remove `IsFeaturegateEnabled` check when feature no longer requires gate + + **Case 3: Test runs with/without feature gate with same behavior** + - The test executes the same way regardless of feature gate status + - Do NOT use `IsFeaturegateEnabled` check + - Do NOT add `[OCPFeatureGate:xxxx]` label +17. **Exclusive**: change to `Serial` + +## Test Automation Code Requirements + +Consider these requirements when writing and reviewing code: + +### Security Considerations +- Does the test case generate sensitive information in logs? +- Does the code contain sensitive information in output or commands? + +### Test Isolation +- Will this test case affect other test executions? +- Will this test case be affected by other test executions? + +### Labeling and Cleanup +- Are correct labels applied? +- What changes does this case make to the cluster? +- Can changes be restored for both normal and abnormal exits? +- During recovery, are both actions and results correct? +- Should recovery restore to predetermined or dynamically determined values? + +### Logging Best Practices +- Avoid excessive logs or large error messages +- Don't put large log outputs in error messages (use proper log messages instead). Don't use `o.Expect` to assert large messages (appears in error message on failure) +- Avoid logging `oc logs` output directly + +### Code Quality +- Don't modify shared libraries (e.g., Ginkgo) or global settings affecting other tests +- Don't execute logic code in `g.Describe` except for initing oc, and move to `g.BeforeEach` +- Don't use single/double quotes in case titles (causes XML parse failures) +- Avoid `o.Expect` in `wait.Poll`: + ```go + // Wrong: + wait.PollUntilContextTimeout(context.TODO(), time.Second, time.Minute, false, func(ctx context.Context) (bool, error) { + response, err := c.AuthorizationV1().SelfSubjectAccessReviews().Create(context.Background(), review, metav1.CreateOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) // in wait.Poll + return response.Status.Allowed == allowed, nil + }) + + // Correct: + wait.PollUntilContextTimeout(context.TODO(), time.Second, time.Minute, false, func(ctx context.Context) (bool, error) { + response, err := c.AuthorizationV1().SelfSubjectAccessReviews().Create(context.Background(), review, metav1.CreateOptions{}) + if err != nil { + return false, err + } + return response.Status.Allowed == allowed, nil + }) + ``` + +## Local Development Workflow + +### Before Submitting PR + +1. **Build and compile**: + ```bash + make + ``` + +2. **Check test name**: + ```bash + # List all test names and search for your test using a keyword + _output/cluster-node-tuning-operator-test-ext list -o names | grep "keyword_from_your_test_name" + ``` + +3. **Run test locally**: + ```bash + _output/cluster-node-tuning-operator-test-ext run-test + ``` + +4. **Test with openshift-tests**: + - Switch to origin repo + - Follow [test extensions documentation](https://github.com/openshift/origin/blob/main/docs/test_extensions.md) + - Set environment variables: + ```bash + export OPENSHIFT_TESTS_DISABLE_CACHE=1 + export EXTENSION_BINARY_OVERRIDE_INCLUDE_TAGS=tests,cluster-node-tuning-operator + export EXTENSION_BINARY_OVERRIDE_CLUSTER_NODE_TUNING_OPERATOR=/_output/cluster-node-tuning-operator-test-ext + export EXTENSIONS_PAYLOAD_OVERRIDE=quay.io/openshift-release-dev/ocp-release:4.21.12-x86_64 + ``` + - Run appropriate suite based on your test characteristics: + ```bash + # Choose the suite that matches your test type: + + # For all tests: + ./openshift-tests run openshift/cluster-node-tuning-operator/all --monitor watch-namespaces + ``` + +5. **Create PR** + +### PR Submission Requirements + +#### Pre-submission Checks +1. Check failed presubmit jobs - verify both your new cases and whether other case failures are caused by your changes + +#### Stability Testing +2. Identify release blocking jobs and run them either using `/payload-job` or `/payload-aggregate`. For example: + ```bash + /payload-job periodic-ci-openshift-release-main-ci-5.0-upgrade-from-stable-4.22-e2e-gcp-ovn-rt-upgrade + ``` diff --git a/test/extended/specs/nto.go b/test/extended/specs/nto.go index 4999248e1..ea266dd6e 100644 --- a/test/extended/specs/nto.go +++ b/test/extended/specs/nto.go @@ -17,7 +17,6 @@ var _ = g.Describe("[Jira:Node Tuning Operator][sig-tuning-node] should", g.Labe oc = utils.NewCLIWithoutNamespace("nto-test") ntoNamespace = "openshift-cluster-node-tuning-operator" - isNTO bool iaasPlatform string tunedNodeName string err error @@ -25,7 +24,7 @@ var _ = g.Describe("[Jira:Node Tuning Operator][sig-tuning-node] should", g.Labe g.BeforeEach(func() { // ensure NTO operator is installed - isNTO = utils.IsNTOPodInstalled(oc, ntoNamespace) + utils.SkipNoNTO(oc, ntoNamespace) // get IaaS platform platformOutput, err := oc.AsAdmin().WithoutNamespace().Run("get").Args("infrastructure", "cluster", "-o=jsonpath={.status.platform}").Output() if err == nil { @@ -34,6 +33,10 @@ var _ = g.Describe("[Jira:Node Tuning Operator][sig-tuning-node] should", g.Labe utils.Logf("Cloud provider is: %v", iaasPlatform) }) + g.AfterEach(func() { + // Cleanup resources (use defer) + }) + // A dummy test that should always pass. It should land in "openshift/cluster-node-tuning-operator/conformance/parallel" suite. g.It("support passing tests", g.Label("ReleaseGate"), func() { o.Expect(true).To(o.BeTrue()) @@ -54,12 +57,7 @@ var _ = g.Describe("[Jira:Node Tuning Operator][sig-tuning-node] should", g.Labe o.Expect(true).To(o.BeTrue()) }) - g.It("[test_id:37415][OTP]Allow setting isolated_cores without touching the default_irq_affinity [Disruptive]", g.Label("ReleaseGate"), oteg.Informing(), func() { - // test requires NTO to be installed - if !isNTO { - g.Skip("NTO is not installed - skipping test ...") - } - + g.It("[test_id:37415][OTP]Allow setting isolated_cores without touching the default_irq_affinity [Disruptive]", oteg.Informing(), func() { ntoIRQSMPFile := utils.TestdataFixturePath(g.GinkgoT(), "nto", "default-irq-smp-affinity.yaml") isSNO := utils.IsSNOCluster(oc) diff --git a/test/extended/utils/nto_util.go b/test/extended/utils/nto_util.go index 68e29ed20..31711d0f7 100644 --- a/test/extended/utils/nto_util.go +++ b/test/extended/utils/nto_util.go @@ -9,6 +9,7 @@ import ( extendedbindata "github.com/openshift/cluster-node-tuning-operator/test/extended/bindata" + g "github.com/onsi/ginkgo/v2" o "github.com/onsi/gomega" ) @@ -79,6 +80,13 @@ func IsNTOPodInstalled(oc *CLI, namespace string) bool { return true } +func SkipNoNTO(oc *CLI, namespace string) { + // test requires NTO to be installed + if !IsNTOPodInstalled(oc, namespace) { + g.Skip("NTO is not installed - skipping test ...") + } +} + func GetDefaultSMPAffinityBitMaskbyCPUCores(oc *CLI, workerNodeName string) string { // Get CPU number in specified worker nodes cpuCoresStdOut, err := oc.AsAdmin().WithoutNamespace().Run("get").Args("node", workerNodeName, "-ojsonpath={.status.capacity.cpu}").Output() @@ -494,6 +502,17 @@ func IsSNOCluster(oc *CLI) bool { return len(strings.Split(strings.TrimSpace(nodeCount), "\n")) == 1 } +// IsRosaCluster determines whether the cluster is a Red Hat OpenShift Service on AWS (ROSA) cluster +// Parameters: +// - oc: CLI client for interacting with the OpenShift cluster +// +// Returns: +// - bool: true if cluster is ROSA, false otherwise +func IsRosaCluster(oc *CLI) bool { + product, _ := oc.WithoutNamespace().AsAdmin().Run("get").Args("clusterclaims/product.open-cluster-management.io", "-o=jsonpath={.spec.value}").Output() + return strings.Compare(product, "ROSA") == 0 +} + func DebugNodeRetryWithOptionsAndChrootWithStdErr(oc *CLI, nodeName string, options []string, command ...string) (string, string, error) { stdout, stderr, err := DebugNode(oc, nodeName, options, true, true, command...) return stdout, stderr, err