-
Notifications
You must be signed in to change notification settings - Fork 2.8k
fix: use shdict instead of events module for nodes data exchange #13066
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
shreemaan-abhishek
merged 14 commits into
apache:master
from
shreemaan-abhishek:fix/consul-uses-shdict-for-nodes-data
Mar 13, 2026
+137
−62
Merged
Changes from all commits
Commits
Show all changes
14 commits
Select commit
Hold shift + click to select a range
9ad59f2
fix: use shdict instead of events module for nodes data exchange
shreemaan-abhishek db63da1
f
shreemaan-abhishek 05b9aa5
feat: Implement per-worker cache and flush mechanism for Consul servi…
shreemaan-abhishek f484131
fix(discovery/consul): prevent transient 503s during service updates
shreemaan-abhishek 541dccf
ensure all workers have `dump_params`
shreemaan-abhishek e13e543
Update apisix/discovery/consul/init.lua
shreemaan-abhishek 13f5af5
use the local consul_services variable to write_dump_services
shreemaan-abhishek 8bde72e
when a certain node is not found use set the `nodes_cache` of that se…
shreemaan-abhishek f6d0f36
Merge branch 'fix/consul-uses-shdict-for-nodes-data' of github.com:sh…
shreemaan-abhishek e15e81d
use lrucache
shreemaan-abhishek 6b6feaf
sleep
shreemaan-abhishek 7f0fd4a
configurable
shreemaan-abhishek 6739fc9
need the same way too
shreemaan-abhishek db0e4f6
avoid shdict contention
shreemaan-abhishek File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -31,6 +31,7 @@ local ngx_timer_at = ngx.timer.at | |
| local ngx_timer_every = ngx.timer.every | ||
| local log = core.log | ||
| local json_delay_encode = core.json.delay_encode | ||
| local process = require("ngx.process") | ||
| local ngx_worker_id = ngx.worker.id | ||
| local exiting = ngx.worker.exiting | ||
| local thread_spawn = ngx.thread.spawn | ||
|
|
@@ -42,16 +43,28 @@ local null = ngx.null | |
| local type = type | ||
| local next = next | ||
|
|
||
| local all_services = core.table.new(0, 5) | ||
| local consul_dict = ngx.shared.consul | ||
| if not consul_dict then | ||
| error("lua_shared_dict \"consul\" not configured") | ||
| end | ||
|
|
||
| local default_service | ||
| local default_weight | ||
| local sort_type | ||
| local skip_service_map = core.table.new(0, 1) | ||
| local dump_params | ||
|
|
||
| local events | ||
| local events_list | ||
| local consul_services | ||
| -- Per-worker LRU cache: avoids shared dict access on every request. | ||
| -- neg_ttl caches unknown services. invalid_stale ensures expired | ||
| -- entries are refreshed from the shared dict instead of re-cached. | ||
| local nodes_cache = core.lrucache.new({ | ||
| ttl = 1, | ||
| count = 1024, | ||
| invalid_stale = true, | ||
| neg_ttl = 1, | ||
| neg_count = 64, | ||
| }) | ||
|
|
||
| local default_skip_services = {"consul"} | ||
| local default_random_range = 5 | ||
|
|
@@ -66,53 +79,94 @@ local _M = { | |
| } | ||
|
|
||
|
|
||
| local function discovery_consul_callback(data, event, source, pid) | ||
| all_services = data | ||
| log.notice("update local variable all_services, event is: ", event, | ||
| "source: ", source, "server pid:", pid, | ||
| ", all services: ", json_delay_encode(all_services, true)) | ||
| end | ||
| local function fetch_node_from_shdict(service_name) | ||
| local value = consul_dict:get(service_name) | ||
| if not value then | ||
| return nil, "consul service not found: " .. service_name | ||
| end | ||
|
|
||
| local nodes, err = core.json.decode(value) | ||
| if not nodes then | ||
| return nil, "failed to decode nodes for service: " | ||
| .. service_name .. ", error: " .. (err or "") | ||
| end | ||
|
|
||
| function _M.all_nodes() | ||
| return all_services | ||
| return nodes | ||
| end | ||
|
|
||
|
|
||
| function _M.nodes(service_name) | ||
| if not all_services then | ||
| log.error("all_services is nil, failed to fetch nodes for : ", service_name) | ||
| return | ||
| function _M.all_nodes() | ||
| local keys = consul_dict:get_keys(0) | ||
| local services = core.table.new(0, #keys) | ||
| for i, key in ipairs(keys) do | ||
| local value = consul_dict:get(key) | ||
| if value then | ||
| local nodes, err = core.json.decode(value) | ||
| if nodes then | ||
| services[key] = nodes | ||
| else | ||
| log.error("failed to decode nodes for service: ", key, ", error: ", err) | ||
| end | ||
| end | ||
|
|
||
| if i % 100 == 0 then | ||
| ngx.sleep(0) | ||
| end | ||
| end | ||
| return services | ||
| end | ||
|
|
||
| local resp_list = all_services[service_name] | ||
|
|
||
| if not resp_list then | ||
| log.error("fetch nodes failed by ", service_name, ", return default service") | ||
| function _M.nodes(service_name) | ||
| local nodes, err = nodes_cache(service_name, nil, | ||
| fetch_node_from_shdict, service_name) | ||
| if not nodes then | ||
| log.error("fetch nodes failed by ", service_name, ", error: ", err) | ||
| return default_service and {default_service} | ||
| end | ||
|
|
||
| log.info("process id: ", ngx_worker_id(), ", all_services[", service_name, "] = ", | ||
| json_delay_encode(resp_list, true)) | ||
| log.info("process id: ", ngx_worker_id(), ", [", service_name, "] = ", | ||
| json_delay_encode(nodes, true)) | ||
|
|
||
| return resp_list | ||
| return nodes | ||
|
shreemaan-abhishek marked this conversation as resolved.
|
||
| end | ||
|
|
||
|
|
||
| local function update_all_services(consul_server_url, up_services) | ||
| -- clean old unused data | ||
| -- write new/updated values first so readers never see a missing service | ||
| local i = 0 | ||
| for k, v in pairs(up_services) do | ||
| local content, err = core.json.encode(v) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. need the same way too |
||
| if content then | ||
| local ok, set_err, forcible = consul_dict:set(k, content) | ||
| if not ok then | ||
| log.error("failed to set nodes for service: ", k, ", error: ", set_err, | ||
| ", please consider increasing lua_shared_dict consul size") | ||
| elseif forcible then | ||
| log.warn("consul shared dict is full, forcibly evicting items while ", | ||
| "setting nodes for service: ", k, | ||
| ", please consider increasing lua_shared_dict consul size") | ||
| end | ||
| else | ||
|
shreemaan-abhishek marked this conversation as resolved.
|
||
| log.error("failed to encode nodes for service: ", k, ", error: ", err) | ||
| end | ||
| i = i + 1 | ||
| if i % 100 == 0 then | ||
| ngx.sleep(0) | ||
| end | ||
| end | ||
|
|
||
| -- then delete keys that are no longer present | ||
| local old_services = consul_services[consul_server_url] or {} | ||
| for k, _ in pairs(old_services) do | ||
| all_services[k] = nil | ||
| if not up_services[k] then | ||
| consul_dict:delete(k) | ||
| end | ||
| end | ||
| core.table.clear(old_services) | ||
|
|
||
| for k, v in pairs(up_services) do | ||
| all_services[k] = v | ||
| end | ||
| consul_services[consul_server_url] = up_services | ||
|
|
||
| log.info("update all services: ", json_delay_encode(all_services, true)) | ||
| log.info("update all services to shared dict") | ||
| end | ||
|
|
||
|
|
||
|
|
@@ -149,14 +203,30 @@ local function read_dump_services() | |
| return | ||
| end | ||
|
|
||
| all_services = entity.services | ||
| log.info("load dump file into memory success") | ||
| for k, v in pairs(entity.services) do | ||
| local content, json_err = core.json.encode(v) | ||
| if content then | ||
| consul_dict:set(k, content) | ||
|
shreemaan-abhishek marked this conversation as resolved.
|
||
| else | ||
| log.error("failed to encode dump service: ", k, ", error: ", json_err) | ||
| end | ||
| end | ||
| log.info("load dump file into shared dict success") | ||
|
shreemaan-abhishek marked this conversation as resolved.
|
||
| end | ||
|
|
||
|
|
||
| local function write_dump_services() | ||
| -- build services from the privileged agent's in-memory tracking table | ||
| -- to avoid a full shared dict scan + JSON decode via _M.all_nodes() | ||
| local services = core.table.new(0, 8) | ||
| for _, svcs in pairs(consul_services) do | ||
| for k, v in pairs(svcs) do | ||
| services[k] = v | ||
| end | ||
| end | ||
|
|
||
| local entity = { | ||
| services = all_services, | ||
| services = services, | ||
| last_update = ngx.time(), | ||
| expire = dump_params.expire, -- later need handle it | ||
|
shreemaan-abhishek marked this conversation as resolved.
|
||
| } | ||
|
|
@@ -556,14 +626,6 @@ function _M.connect(premature, consul_server, retry_delay) | |
|
|
||
| update_all_services(consul_server.consul_server_url, up_services) | ||
|
|
||
| --update events | ||
| local post_ok, post_err = events:post(events_list._source, | ||
| events_list.updating, all_services) | ||
| if not post_ok then | ||
| log.error("post_event failure with ", events_list._source, | ||
| ", update all services error: ", post_err) | ||
| end | ||
|
|
||
| if dump_params then | ||
| ngx_timer_at(0, write_dump_services) | ||
| end | ||
|
|
@@ -611,35 +673,32 @@ end | |
|
|
||
| function _M.init_worker() | ||
| local consul_conf = local_conf.discovery.consul | ||
| dump_params = consul_conf.dump | ||
|
|
||
| if consul_conf.dump then | ||
| local dump = consul_conf.dump | ||
| dump_params = dump | ||
|
|
||
| if dump.load_on_init then | ||
| read_dump_services() | ||
| end | ||
| end | ||
|
|
||
| events = require("apisix.events") | ||
| events_list = events:event_list( | ||
| "discovery_consul_update_all_services", | ||
| "updating" | ||
| ) | ||
|
|
||
| if 0 ~= ngx_worker_id() then | ||
| events:register(discovery_consul_callback, events_list._source, events_list.updating) | ||
| return | ||
| end | ||
|
|
||
| log.notice("consul_conf: ", json_delay_encode(consul_conf, true)) | ||
| default_weight = consul_conf.weight | ||
| sort_type = consul_conf.sort_type | ||
| -- set default service, used when the server node cannot be found | ||
| if consul_conf.default_service then | ||
| default_service = consul_conf.default_service | ||
| default_service.weight = default_weight | ||
| end | ||
|
|
||
| if process.type() ~= "privileged agent" then | ||
| return | ||
| end | ||
|
|
||
| -- flush stale data that may persist across reloads, | ||
| -- since consul_services is re-initialized empty | ||
| consul_dict:flush_all() | ||
|
shreemaan-abhishek marked this conversation as resolved.
shreemaan-abhishek marked this conversation as resolved.
|
||
|
|
||
|
shreemaan-abhishek marked this conversation as resolved.
shreemaan-abhishek marked this conversation as resolved.
|
||
| if consul_conf.dump then | ||
| if consul_conf.dump.load_on_init then | ||
| read_dump_services() | ||
| end | ||
| end | ||
|
|
||
| log.notice("consul_conf: ", json_delay_encode(consul_conf, true)) | ||
|
|
||
| if consul_conf.skip_services then | ||
| skip_service_map = core.table.new(0, #consul_conf.skip_services) | ||
| for _, v in ipairs(consul_conf.skip_services) do | ||
|
|
@@ -673,7 +732,7 @@ end | |
|
|
||
|
|
||
| function _M.dump_data() | ||
| return {config = local_conf.discovery.consul, services = all_services } | ||
| return {config = local_conf.discovery.consul, services = _M.all_nodes()} | ||
| end | ||
|
|
||
|
|
||
|
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
After calculating every 10~20 keys, call
ngx.sleep(0)