diff --git a/bin/pytorch_inference/CResultWriter.cc b/bin/pytorch_inference/CResultWriter.cc index 34389dad4..ebb809e71 100644 --- a/bin/pytorch_inference/CResultWriter.cc +++ b/bin/pytorch_inference/CResultWriter.cc @@ -58,7 +58,8 @@ void CResultWriter::writeError(const std::string_view& requestId, const std::str void CResultWriter::wrapAndWriteInnerResponse(const std::string& innerResponse, const std::string& requestId, bool isCacheHit, - std::uint64_t timeMs) { + std::uint64_t timeMs, + std::size_t residentSize) { core::CBoostJsonConcurrentLineWriter jsonWriter{m_WrappedOutputStream}; jsonWriter.onObjectBegin(); jsonWriter.onKey(CCommandParser::REQUEST_ID); @@ -67,6 +68,11 @@ void CResultWriter::wrapAndWriteInnerResponse(const std::string& innerResponse, jsonWriter.onBool(isCacheHit); jsonWriter.onKey(TIME_MS); jsonWriter.onUint64(timeMs); + jsonWriter.onKey(PROCESS_STATS); + jsonWriter.onObjectBegin(); + jsonWriter.onKey(MEMORY_RESIDENT_SET_SIZE); + jsonWriter.onUint64(residentSize); + jsonWriter.onObjectEnd(); jsonWriter.rawKeyAndValue(innerResponse); jsonWriter.onObjectEnd(); } diff --git a/bin/pytorch_inference/CResultWriter.h b/bin/pytorch_inference/CResultWriter.h index 8d809dc9d..78ec99e51 100644 --- a/bin/pytorch_inference/CResultWriter.h +++ b/bin/pytorch_inference/CResultWriter.h @@ -83,7 +83,8 @@ class CResultWriter : public TStringBufWriter { void wrapAndWriteInnerResponse(const std::string& innerResponse, const std::string& requestId, bool isCacheHit, - std::uint64_t timeMs); + std::uint64_t timeMs, + std::size_t residentSize); //! Write the prediction portion of an inference result. template diff --git a/bin/pytorch_inference/Main.cc b/bin/pytorch_inference/Main.cc index 00adee1df..84bc4bf46 100644 --- a/bin/pytorch_inference/Main.cc +++ b/bin/pytorch_inference/Main.cc @@ -133,9 +133,11 @@ bool handleRequest(ml::torch::CCommandParser::CRequestCacheInterface& cache, } }, [&](const auto& innerResponseJson_, bool isCacheHit) { - resultWriter.wrapAndWriteInnerResponse(innerResponseJson_, - requestId, isCacheHit, - stopWatch.stop()); + std::size_t residentSetSize = + ml::core::CProcessStats::residentSetSize(); + resultWriter.wrapAndWriteInnerResponse( + innerResponseJson_, requestId, isCacheHit, + stopWatch.stop(), residentSetSize); }); }); return true; diff --git a/bin/pytorch_inference/unittest/CResultWriterTest.cc b/bin/pytorch_inference/unittest/CResultWriterTest.cc index 97b99038a..0b3df6543 100644 --- a/bin/pytorch_inference/unittest/CResultWriterTest.cc +++ b/bin/pytorch_inference/unittest/CResultWriterTest.cc @@ -99,11 +99,12 @@ BOOST_AUTO_TEST_CASE(testWrapAndWriteInferenceResult) { std::ostringstream output; { ml::torch::CResultWriter resultWriter{output}; - resultWriter.wrapAndWriteInnerResponse(innerPortion, "req4", true, 123); + resultWriter.wrapAndWriteInnerResponse(innerPortion, "req4", true, 123, 111); } - std::string expected = "[{\"request_id\":\"req4\",\"cache_hit\":true," - "\"time_ms\":123,\"result\":{\"inference\":" - "[[[1.0,1.0,1.0],[1.0,1.0,1.0],[1.0,1.0,1.0],[1.0,1.0,1.0],[1.0,1.0,1.0]]]}}\n]"; + std::string expected = + "[{\"request_id\":\"req4\",\"cache_hit\":true," + "\"time_ms\":123,\"process_stats\":{\"memory_rss\":111},\"result\":{\"inference\":" + "[[[1.0,1.0,1.0],[1.0,1.0,1.0],[1.0,1.0,1.0],[1.0,1.0,1.0],[1.0,1.0,1.0]]]}}\n]"; std::string actual = output.str(); LOG_INFO(<< "expected: " << expected); diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc index 20e500660..f11bfd4bb 100644 --- a/docs/CHANGELOG.asciidoc +++ b/docs/CHANGELOG.asciidoc @@ -32,6 +32,7 @@ === Enhancements * Downgrade log severity for a batch of recoverable errors. (See {ml-pull}[#2889].) +* Add pytorch_process rss memory stat to inference response. (See {ml-pull}[#2896].) == {es} version 9.2.0