From e08c9a316c8f2baed53efe09e417bbf55d9233ee Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Mon, 26 Jan 2026 16:25:47 +1300 Subject: [PATCH 1/8] [ML] Better cleanup of normalizer quantiles state Ensure that quantiles state documents are always removed after an error has been encountered. This is intended to stop the disk being cluttered with many such documents after repeated failures. --- bin/normalize/Main.cc | 53 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 45 insertions(+), 8 deletions(-) diff --git a/bin/normalize/Main.cc b/bin/normalize/Main.cc index 9aa8e9489..fbc7d7155 100644 --- a/bin/normalize/Main.cc +++ b/bin/normalize/Main.cc @@ -46,6 +46,31 @@ #include #include +namespace { + +//! A helper to ensure that quantiles state files always get deleted on failure. +//! They may also be explicitly be deleted on request as well but that is handled separately by the happy path. +class CRemoveQuantilesStateOnFailure { +public: + CRemoveQuantilesStateOnFailure() = default; + explicit CRemoveQuantilesStateOnFailure(const std::string& quantilesStateFile) + : m_QuantilesStateFile{quantilesStateFile} {} + ~CRemoveQuantilesStateOnFailure() { + // Always delete quantiles state files on failure, else we run the risk of filling the disk after repeated failures. + // They should still exist in ES should they need to be examined. + if (m_QuantilesStateFile.empty()) { + return; + } + LOG_WARN(<< "Deleting quantiles state file '" << m_QuantilesStateFile << "'"); + // Ignore the return value from remove, the file may have already been deleted. + std::remove(m_QuantilesStateFile.c_str()); + } + +private: + std::string m_QuantilesStateFile; +}; +} + int main(int argc, char** argv) { // Read command line options std::string modelConfigFile; @@ -63,11 +88,20 @@ int main(int argc, char** argv) { bool deleteStateFiles{false}; bool writeCsv{false}; bool validElasticLicenseKeyConfirmed{false}; - if (ml::normalize::CCmdLineParser::parse( - argc, argv, modelConfigFile, logProperties, logPipe, bucketSpan, - lengthEncodedInput, namedPipeConnectTimeout, inputFileName, - isInputFileNamedPipe, outputFileName, isOutputFileNamedPipe, quantilesStateFile, - deleteStateFiles, writeCsv, validElasticLicenseKeyConfirmed) == false) { + std::unique_ptr removeQuantilesStateOnFailure; + + const bool parseSuccess = ml::normalize::CCmdLineParser::parse( + argc, argv, modelConfigFile, logProperties, logPipe, bucketSpan, + lengthEncodedInput, namedPipeConnectTimeout, inputFileName, + isInputFileNamedPipe, outputFileName, isOutputFileNamedPipe, quantilesStateFile, + deleteStateFiles, writeCsv, validElasticLicenseKeyConfirmed); + + if (!quantilesStateFile.empty()) { + removeQuantilesStateOnFailure = + std::make_unique(quantilesStateFile); + } + + if (parseSuccess == false) { return EXIT_FAILURE; } @@ -158,9 +192,6 @@ int main(int argc, char** argv) { LOG_FATAL(<< "Failed to initialize normalizer"); return EXIT_FAILURE; } - if (deleteStateFiles) { - std::remove(quantilesStateFile.c_str()); - } } // Now handle the numbers to be normalised from stdin @@ -176,5 +207,11 @@ int main(int argc, char** argv) { // message indicating early exit then the process has probably core dumped LOG_DEBUG(<< "ML normalizer exiting"); + // No need for a warning here so we reset the cleanup function and delete the file explicitly if requested. + removeQuantilesStateOnFailure.reset(); + if (deleteStateFiles) { + std::remove(quantilesStateFile.c_str()); + } + return EXIT_SUCCESS; } From 06c44d2f469c360437fb13a75295c152af1e0e13 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Mon, 26 Jan 2026 16:49:29 +1300 Subject: [PATCH 2/8] Update changelog --- docs/CHANGELOG.asciidoc | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc index 20e500660..c1601b136 100644 --- a/docs/CHANGELOG.asciidoc +++ b/docs/CHANGELOG.asciidoc @@ -27,6 +27,13 @@ //=== Bug Fixes //=== Regressions + +== {es} version 9.4.0 + +=== Enhancements + +* Better error handling regarding normalizer quantiles state documents (See {ml-pull}[#2894]) + == {es} version 9.3.0 === Enhancements From dcd94db58d679eb30105b2a902630ee9ae941a89 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Tue, 27 Jan 2026 11:40:37 +1300 Subject: [PATCH 3/8] Attend to review comments --- bin/normalize/Main.cc | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/bin/normalize/Main.cc b/bin/normalize/Main.cc index fbc7d7155..349e38804 100644 --- a/bin/normalize/Main.cc +++ b/bin/normalize/Main.cc @@ -52,22 +52,25 @@ namespace { //! They may also be explicitly be deleted on request as well but that is handled separately by the happy path. class CRemoveQuantilesStateOnFailure { public: - CRemoveQuantilesStateOnFailure() = default; - explicit CRemoveQuantilesStateOnFailure(const std::string& quantilesStateFile) - : m_QuantilesStateFile{quantilesStateFile} {} + CRemoveQuantilesStateOnFailure() = delete; + CRemoveQuantilesStateOnFailure(const std::string& quantilesStateFile, bool deleteStateFiles = false) + : m_QuantilesStateFile{quantilesStateFile}, m_DeleteStateFiles{deleteStateFiles} {} ~CRemoveQuantilesStateOnFailure() { - // Always delete quantiles state files on failure, else we run the risk of filling the disk after repeated failures. + // Always delete quantiles state files if requested to do so, even on failure, + // else we run the risk of filling the disk after repeated failures. // They should still exist in ES should they need to be examined. - if (m_QuantilesStateFile.empty()) { + if (m_QuantilesStateFile.empty() || m_DeleteStateFiles == false) { return; } - LOG_WARN(<< "Deleting quantiles state file '" << m_QuantilesStateFile << "'"); - // Ignore the return value from remove, the file may have already been deleted. - std::remove(m_QuantilesStateFile.c_str()); + LOG_DEBUG(<< "Deleting quantiles state file '" << m_QuantilesStateFile << "'"); + if (std::remove(m_QuantilesStateFile.c_str()) != 0) { + LOG_WARN(<< "Failed to delete quantiles state file '" << m_QuantilesStateFile << "': " << strerror(errno)); + } } private: std::string m_QuantilesStateFile; + bool m_DeleteStateFiles{false}; }; } @@ -98,7 +101,7 @@ int main(int argc, char** argv) { if (!quantilesStateFile.empty()) { removeQuantilesStateOnFailure = - std::make_unique(quantilesStateFile); + std::make_unique(quantilesStateFile, deleteStateFiles); } if (parseSuccess == false) { From 95e1486a57f797f855b7cb4798646109e45fc33a Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Tue, 27 Jan 2026 12:12:46 +1300 Subject: [PATCH 4/8] Reuse quantile state file deleter for autodetect as well as normalizer --- bin/autodetect/Main.cc | 24 +++++++++++---- bin/normalize/Main.cc | 33 ++------------------- include/core/CStateFileRemover.h | 51 ++++++++++++++++++++++++++++++++ 3 files changed, 72 insertions(+), 36 deletions(-) create mode 100644 include/core/CStateFileRemover.h diff --git a/bin/autodetect/Main.cc b/bin/autodetect/Main.cc index 7ca5a3233..b245df0af 100644 --- a/bin/autodetect/Main.cc +++ b/bin/autodetect/Main.cc @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include @@ -119,14 +119,23 @@ int main(int argc, char** argv) { std::size_t maxAnomalyRecords{100}; bool memoryUsage{false}; bool validElasticLicenseKeyConfirmed{false}; - if (ml::autodetect::CCmdLineParser::parse( + std::unique_ptr removeQuantilesStateOnFailure; + + const bool parseSuccess = ml::autodetect::CCmdLineParser::parse( argc, argv, configFile, filtersConfigFile, eventsConfigFile, modelConfigFile, logProperties, logPipe, delimiter, lengthEncodedInput, timeFormat, quantilesStateFile, deleteStateFiles, bucketPersistInterval, namedPipeConnectTimeout, inputFileName, isInputFileNamedPipe, outputFileName, isOutputFileNamedPipe, restoreFileName, isRestoreFileNamedPipe, persistFileName, isPersistFileNamedPipe, isPersistInForeground, - maxAnomalyRecords, memoryUsage, validElasticLicenseKeyConfirmed) == false) { + maxAnomalyRecords, memoryUsage, validElasticLicenseKeyConfirmed); + + if (!quantilesStateFile.empty()) { + removeQuantilesStateOnFailure = + std::make_unique(quantilesStateFile, deleteStateFiles); + } + + if (parseSuccess == false) { return EXIT_FAILURE; } @@ -293,9 +302,6 @@ int main(int argc, char** argv) { LOG_FATAL(<< "Failed to restore quantiles and initialize normalizer"); return EXIT_FAILURE; } - if (deleteStateFiles) { - std::remove(quantilesStateFile.c_str()); - } } // The categorizer knows how to assign categories to records @@ -346,5 +352,11 @@ int main(int argc, char** argv) { // message indicating early exit then the process has probably core dumped LOG_DEBUG(<< "ML anomaly detector job exiting"); + // No need for a warning here so we reset the cleanup function and delete the file explicitly if requested. + removeQuantilesStateOnFailure.reset(); + if (deleteStateFiles) { + std::remove(quantilesStateFile.c_str()); + } + return EXIT_SUCCESS; } diff --git a/bin/normalize/Main.cc b/bin/normalize/Main.cc index 349e38804..3f3c3be5e 100644 --- a/bin/normalize/Main.cc +++ b/bin/normalize/Main.cc @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -46,34 +47,6 @@ #include #include -namespace { - -//! A helper to ensure that quantiles state files always get deleted on failure. -//! They may also be explicitly be deleted on request as well but that is handled separately by the happy path. -class CRemoveQuantilesStateOnFailure { -public: - CRemoveQuantilesStateOnFailure() = delete; - CRemoveQuantilesStateOnFailure(const std::string& quantilesStateFile, bool deleteStateFiles = false) - : m_QuantilesStateFile{quantilesStateFile}, m_DeleteStateFiles{deleteStateFiles} {} - ~CRemoveQuantilesStateOnFailure() { - // Always delete quantiles state files if requested to do so, even on failure, - // else we run the risk of filling the disk after repeated failures. - // They should still exist in ES should they need to be examined. - if (m_QuantilesStateFile.empty() || m_DeleteStateFiles == false) { - return; - } - LOG_DEBUG(<< "Deleting quantiles state file '" << m_QuantilesStateFile << "'"); - if (std::remove(m_QuantilesStateFile.c_str()) != 0) { - LOG_WARN(<< "Failed to delete quantiles state file '" << m_QuantilesStateFile << "': " << strerror(errno)); - } - } - -private: - std::string m_QuantilesStateFile; - bool m_DeleteStateFiles{false}; -}; -} - int main(int argc, char** argv) { // Read command line options std::string modelConfigFile; @@ -91,7 +64,7 @@ int main(int argc, char** argv) { bool deleteStateFiles{false}; bool writeCsv{false}; bool validElasticLicenseKeyConfirmed{false}; - std::unique_ptr removeQuantilesStateOnFailure; + std::unique_ptr removeQuantilesStateOnFailure; const bool parseSuccess = ml::normalize::CCmdLineParser::parse( argc, argv, modelConfigFile, logProperties, logPipe, bucketSpan, @@ -101,7 +74,7 @@ int main(int argc, char** argv) { if (!quantilesStateFile.empty()) { removeQuantilesStateOnFailure = - std::make_unique(quantilesStateFile, deleteStateFiles); + std::make_unique(quantilesStateFile, deleteStateFiles); } if (parseSuccess == false) { diff --git a/include/core/CStateFileRemover.h b/include/core/CStateFileRemover.h new file mode 100644 index 000000000..d61641f6a --- /dev/null +++ b/include/core/CStateFileRemover.h @@ -0,0 +1,51 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the following additional limitation. Functionality enabled by the + * files subject to the Elastic License 2.0 may only be used in production when + * invoked by an Elasticsearch process with a license key installed that permits + * use of machine learning features. You may not use this file except in + * compliance with the Elastic License 2.0 and the foregoing additional + * limitation. + */ +#ifndef INCLUDED_ml_core_CStateFileRemover_h +#define INCLUDED_ml_core_CStateFileRemover_h + +#include + +#include + +namespace ml { +namespace core { + +//! \brief +//! Ensures that deletion of state files occurs even on process failure. +//! +//! DESCRIPTION:\n +//! A helper to ensure that quantiles state files always get deleted on failure. +//! They may also be explicitly be deleted on request as well but that is handled separately by the happy path. +class CStateFileRemover { +public: + CStateFileRemover() = delete; + CStateFileRemover(const std::string& quantilesStateFile, bool deleteStateFiles = false) + : m_QuantilesStateFile{quantilesStateFile}, m_DeleteStateFiles{deleteStateFiles} {} + ~CStateFileRemover() { + // Always delete quantiles state files if requested to do so, even on failure, + // else we run the risk of filling the disk after repeated failures. + // They should still exist in ES should they need to be examined. + if (m_QuantilesStateFile.empty() || m_DeleteStateFiles == false) { + return; + } + LOG_DEBUG(<< "Deleting quantiles state file '" << m_QuantilesStateFile << "'"); + if (std::remove(m_QuantilesStateFile.c_str()) != 0) { + LOG_WARN(<< "Failed to delete quantiles state file '" << m_QuantilesStateFile << "': " << strerror(errno)); + } + } + +private: + std::string m_QuantilesStateFile; + bool m_DeleteStateFiles{false}; +};} +} + +#endif // INCLUDED_ml_core_CStateFileRemover_h From 1713588f64da269d75e5404a9af12f4dd80e1d18 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Tue, 27 Jan 2026 12:19:08 +1300 Subject: [PATCH 5/8] Log a warning message if unable to remove quantiles state file. --- bin/autodetect/Main.cc | 5 ++++- bin/normalize/Main.cc | 4 +++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/bin/autodetect/Main.cc b/bin/autodetect/Main.cc index b245df0af..6990e5841 100644 --- a/bin/autodetect/Main.cc +++ b/bin/autodetect/Main.cc @@ -355,7 +355,10 @@ int main(int argc, char** argv) { // No need for a warning here so we reset the cleanup function and delete the file explicitly if requested. removeQuantilesStateOnFailure.reset(); if (deleteStateFiles) { - std::remove(quantilesStateFile.c_str()); + if (std::remove(quantilesStateFile.c_str()) != 0) { + LOG_WARN(<< "Failed to delete quantiles state file '" + << quantilesStateFile << "': " << strerror(errno)); + } } return EXIT_SUCCESS; diff --git a/bin/normalize/Main.cc b/bin/normalize/Main.cc index 3f3c3be5e..d8499db22 100644 --- a/bin/normalize/Main.cc +++ b/bin/normalize/Main.cc @@ -186,7 +186,9 @@ int main(int argc, char** argv) { // No need for a warning here so we reset the cleanup function and delete the file explicitly if requested. removeQuantilesStateOnFailure.reset(); if (deleteStateFiles) { - std::remove(quantilesStateFile.c_str()); + if (std::remove(quantilesStateFile.c_str()) != 0) { + LOG_WARN(<< "Failed to delete quantiles state file '" << quantilesStateFile << "': " << strerror(errno)); + } } return EXIT_SUCCESS; From 30d210345898654faed555280f60b2cf5a981d0b Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Tue, 27 Jan 2026 12:20:17 +1300 Subject: [PATCH 6/8] Tweak changelog entry to be more generic. --- docs/CHANGELOG.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc index c1601b136..251c8d8eb 100644 --- a/docs/CHANGELOG.asciidoc +++ b/docs/CHANGELOG.asciidoc @@ -32,7 +32,7 @@ === Enhancements -* Better error handling regarding normalizer quantiles state documents (See {ml-pull}[#2894]) +* Better error handling regarding quantiles state documents (See {ml-pull}[#2894]) == {es} version 9.3.0 From b033c7c799a6bcc389ffe8933a29f3d509a4b94b Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Tue, 27 Jan 2026 13:27:57 +1300 Subject: [PATCH 7/8] Tighten up usage of CStateRestorer. --- include/core/CStateFileRemover.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/include/core/CStateFileRemover.h b/include/core/CStateFileRemover.h index d61641f6a..7bcaeda27 100644 --- a/include/core/CStateFileRemover.h +++ b/include/core/CStateFileRemover.h @@ -24,10 +24,17 @@ namespace core { //! DESCRIPTION:\n //! A helper to ensure that quantiles state files always get deleted on failure. //! They may also be explicitly be deleted on request as well but that is handled separately by the happy path. +//! +//! IMPLEMENTATION DECISIONS:\n +//! Not copyable or moveable. No default construction. class CStateFileRemover { public: CStateFileRemover() = delete; - CStateFileRemover(const std::string& quantilesStateFile, bool deleteStateFiles = false) + CStateFileRemover(const CStateFileRemover&) = delete; + CStateFileRemover& operator=(const CStateFileRemover&) = delete; + CStateFileRemover(CStateFileRemover&&) = delete; + CStateFileRemover& operator=(CStateFileRemover&&) = delete; + explicit CStateFileRemover(const std::string& quantilesStateFile, bool deleteStateFiles = false) : m_QuantilesStateFile{quantilesStateFile}, m_DeleteStateFiles{deleteStateFiles} {} ~CStateFileRemover() { // Always delete quantiles state files if requested to do so, even on failure, From 9ee974cae45219f9129d0c65bd6143a9d75905df Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Tue, 27 Jan 2026 15:47:30 +1300 Subject: [PATCH 8/8] Formatting --- bin/autodetect/Main.cc | 18 +++++++++--------- bin/normalize/Main.cc | 7 ++++--- include/core/CStateFileRemover.h | 9 ++++++--- 3 files changed, 19 insertions(+), 15 deletions(-) diff --git a/bin/autodetect/Main.cc b/bin/autodetect/Main.cc index 6990e5841..a18761a0e 100644 --- a/bin/autodetect/Main.cc +++ b/bin/autodetect/Main.cc @@ -122,17 +122,17 @@ int main(int argc, char** argv) { std::unique_ptr removeQuantilesStateOnFailure; const bool parseSuccess = ml::autodetect::CCmdLineParser::parse( - argc, argv, configFile, filtersConfigFile, eventsConfigFile, - modelConfigFile, logProperties, logPipe, delimiter, lengthEncodedInput, - timeFormat, quantilesStateFile, deleteStateFiles, bucketPersistInterval, - namedPipeConnectTimeout, inputFileName, isInputFileNamedPipe, outputFileName, - isOutputFileNamedPipe, restoreFileName, isRestoreFileNamedPipe, - persistFileName, isPersistFileNamedPipe, isPersistInForeground, - maxAnomalyRecords, memoryUsage, validElasticLicenseKeyConfirmed); + argc, argv, configFile, filtersConfigFile, eventsConfigFile, + modelConfigFile, logProperties, logPipe, delimiter, lengthEncodedInput, + timeFormat, quantilesStateFile, deleteStateFiles, bucketPersistInterval, + namedPipeConnectTimeout, inputFileName, isInputFileNamedPipe, outputFileName, + isOutputFileNamedPipe, restoreFileName, isRestoreFileNamedPipe, + persistFileName, isPersistFileNamedPipe, isPersistInForeground, + maxAnomalyRecords, memoryUsage, validElasticLicenseKeyConfirmed); if (!quantilesStateFile.empty()) { - removeQuantilesStateOnFailure = - std::make_unique(quantilesStateFile, deleteStateFiles); + removeQuantilesStateOnFailure = std::make_unique( + quantilesStateFile, deleteStateFiles); } if (parseSuccess == false) { diff --git a/bin/normalize/Main.cc b/bin/normalize/Main.cc index d8499db22..c25b6e71b 100644 --- a/bin/normalize/Main.cc +++ b/bin/normalize/Main.cc @@ -73,8 +73,8 @@ int main(int argc, char** argv) { deleteStateFiles, writeCsv, validElasticLicenseKeyConfirmed); if (!quantilesStateFile.empty()) { - removeQuantilesStateOnFailure = - std::make_unique(quantilesStateFile, deleteStateFiles); + removeQuantilesStateOnFailure = std::make_unique( + quantilesStateFile, deleteStateFiles); } if (parseSuccess == false) { @@ -187,7 +187,8 @@ int main(int argc, char** argv) { removeQuantilesStateOnFailure.reset(); if (deleteStateFiles) { if (std::remove(quantilesStateFile.c_str()) != 0) { - LOG_WARN(<< "Failed to delete quantiles state file '" << quantilesStateFile << "': " << strerror(errno)); + LOG_WARN(<< "Failed to delete quantiles state file '" + << quantilesStateFile << "': " << strerror(errno)); } } diff --git a/include/core/CStateFileRemover.h b/include/core/CStateFileRemover.h index 7bcaeda27..d1d9ff237 100644 --- a/include/core/CStateFileRemover.h +++ b/include/core/CStateFileRemover.h @@ -34,7 +34,8 @@ class CStateFileRemover { CStateFileRemover& operator=(const CStateFileRemover&) = delete; CStateFileRemover(CStateFileRemover&&) = delete; CStateFileRemover& operator=(CStateFileRemover&&) = delete; - explicit CStateFileRemover(const std::string& quantilesStateFile, bool deleteStateFiles = false) + explicit CStateFileRemover(const std::string& quantilesStateFile, + bool deleteStateFiles = false) : m_QuantilesStateFile{quantilesStateFile}, m_DeleteStateFiles{deleteStateFiles} {} ~CStateFileRemover() { // Always delete quantiles state files if requested to do so, even on failure, @@ -45,14 +46,16 @@ class CStateFileRemover { } LOG_DEBUG(<< "Deleting quantiles state file '" << m_QuantilesStateFile << "'"); if (std::remove(m_QuantilesStateFile.c_str()) != 0) { - LOG_WARN(<< "Failed to delete quantiles state file '" << m_QuantilesStateFile << "': " << strerror(errno)); + LOG_WARN(<< "Failed to delete quantiles state file '" + << m_QuantilesStateFile << "': " << strerror(errno)); } } private: std::string m_QuantilesStateFile; bool m_DeleteStateFiles{false}; -};} +}; +} } #endif // INCLUDED_ml_core_CStateFileRemover_h