From d137d50f6abe43bd27038a135f2d16eb85d30fa0 Mon Sep 17 00:00:00 2001 From: zhixiongning Date: Tue, 10 Mar 2026 00:00:17 +0800 Subject: [PATCH 1/2] fix MetaClient bug --- include/nebula/mclient/MetaClient.h | 8 +- src/mclient/MetaClient.cpp | 115 ++++++++++++++++++++++------ 2 files changed, 100 insertions(+), 23 deletions(-) diff --git a/include/nebula/mclient/MetaClient.h b/include/nebula/mclient/MetaClient.h index 5419500e..2abfa1be 100644 --- a/include/nebula/mclient/MetaClient.h +++ b/include/nebula/mclient/MetaClient.h @@ -11,6 +11,7 @@ #include #include #include +#include #include "common/datatypes/HostAddr.h" #include "common/thrift/ThriftTypes.h" @@ -108,7 +109,10 @@ class MetaClient { void getResponse(Request req, RemoteFunc remoteFunc, RespGenerator respGen, - folly::Promise> pro); + folly::Promise> pro, + int32_t retry = 0, + int32_t retry_limit = 3); + void updateLeader(HostAddr leader = HostAddr()); private: std::vector metaAddrs_; @@ -116,6 +120,8 @@ class MetaClient { SpaceNameIdMap spaceIndexByName_; SpaceEdgeNameTypeMap spaceEdgeIndexByName_; SpaceTagNameTypeMap spaceTagIndexByName_; + std::mutex hostLock_; + HostAddr leader_; std::unordered_map, HostAddr, pair_hash> spacePartLeaderMap_; std::unordered_map> spacePartsMap_; std::shared_ptr ioExecutor_; diff --git a/src/mclient/MetaClient.cpp b/src/mclient/MetaClient.cpp index 7715519c..109c343c 100644 --- a/src/mclient/MetaClient.cpp +++ b/src/mclient/MetaClient.cpp @@ -234,7 +234,12 @@ void MetaClient::getResponse(Request req, RespGenerator respGen, folly::Promise> pro) { auto* evb = DCHECK_NOTNULL(ioExecutor_)->getEventBase(); - HostAddr host = metaAddrs_.back(); + + HostAddr host; + { + std::lock_guard holder(hostLock_); + host = leader_; + } folly::via(evb, [host, evb, @@ -242,28 +247,94 @@ void MetaClient::getResponse(Request req, remoteFunc = std::move(remoteFunc), respGen = std::move(respGen), pro = std::move(pro), + retry, + retryLimit, this]() mutable { - auto client = clientsMan_->client(host, evb, false, mConfig_.clientTimeoutInMs_); - LOG(INFO) << "Send request to meta " << host; - remoteFunc(client, req) - .via(evb) - .then([host, respGen = std::move(respGen), pro = std::move(pro)]( - folly::Try&& t) mutable { - // exception occurred during RPC - if (t.hasException()) { - LOG(ERROR) << "Send request to meta" << host << " failed"; - pro.setValue(std::make_pair(false, Response())); - return; - } - auto&& resp = t.value(); - if (resp.get_code() == nebula::cpp2::ErrorCode::SUCCEEDED) { - // succeeded - pro.setValue(respGen(std::move(resp))); - return; - } - pro.setValue(std::make_pair(false, Response())); - }); // then - }); // via + auto client = clientsMan_->client(host, evb, false, mConfig_.clientTimeoutInMs_); + LOG(INFO) << "Send request to meta " << host; + remoteFunc(client, req) + .via(evb) + .then([host, + req = std::move(req), + remoteFunc = std::move(remoteFunc), + respGen = std::move(respGen), + pro = std::move(pro), + retry, + retryLimit, + evb, + this](folly::Try&& t) mutable { + // exception occurred during RPC + if (t.hasException()) { + updateLeader(); + if (retry < retryLimit) { + evb->runAfterDelay( + [req = std::move(req), + remoteFunc = std::move(remoteFunc), + respGen = std::move(respGen), + pro = std::move(pro), + retry, + retryLimit, + this]() mutable { + getResponse(std::move(req), + std::move(remoteFunc), + std::move(respGen), + std::move(pro), + retry + 1, + retryLimit); + }, + 1000); + return; + } else { + LOG(ERROR) << "Send request to " << host << ", exceed retry limit"; + pro.setValue(std::make_pair(false, Response())); + } + return; + } + + auto&& resp = t.value(); + auto code = resp.get_code(); + if (code == nebula::cpp2::ErrorCode::SUCCEEDED) { + // succeeded + pro.setValue(respGen(std::move(resp))); + return; + } else if (code == nebula::cpp2::ErrorCode::E_LEADER_CHANGED || + code == nebula::cpp2::ErrorCode::E_MACHINE_NOT_FOUND) { + updateLeader(resp.get_leader()); + if (retry < retryLimit) { + evb->runAfterDelay( + [req = std::move(req), + remoteFunc = std::move(remoteFunc), + respGen = std::move(respGen), + pro = std::move(pro), + retry, + retryLimit, + this]() mutable { + getResponse(std::move(req), + std::move(remoteFunc), + std::move(respGen), + std::move(pro), + retry + 1, + retryLimit); + }, + 1000); + return; + } + } else if (code == nebula::cpp2::ErrorCode::E_CLIENT_SERVER_INCOMPATIBLE) { + pro.setValue(respGen(std::move(resp))); + return; + } + pro.setValue(std::make_pair(false, Response())); + }); // then + }); // via +} + +void MetaClient::updateLeader(HostAddr leader) { + std::lock_guard holder(hostLock_); + if (leader != HostAddr("", 0)) { + leader_ = leader; + } else { + leader_ = addrs_[folly::Random::rand64(addrs_.size())]; + } } } // namespace nebula From ed5b9d549fba8bd1b9777242c849599fdb7ac83e Mon Sep 17 00:00:00 2001 From: nobizx <43176574+zhixiongning@users.noreply.github.com> Date: Tue, 10 Mar 2026 11:41:50 +0800 Subject: [PATCH 2/2] Init the value of MetaClient leader --- src/mclient/MetaClient.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mclient/MetaClient.cpp b/src/mclient/MetaClient.cpp index 109c343c..132a5da2 100644 --- a/src/mclient/MetaClient.cpp +++ b/src/mclient/MetaClient.cpp @@ -23,6 +23,7 @@ MetaClient::MetaClient(const std::vector& metaAddrs, const MConfig& metaAddrs_.emplace_back(ip_port[0], folly::to(ip_port[1])); } CHECK(!metaAddrs_.empty()) << "metaAddrs_ is empty"; + leader_ = metaAddrs_.back(); mConfig_ = mConfig; SSLConfig sslcfg; sslcfg.enable_mtls = mConfig_.enableMTLS_;