Add statuses action for batch handle queries

nmburgan · nmburgan · commit 7634d5c79f5b · 2026-03-19T17:08:45.000-07:00
The existing status action only accepts a single handle, requiring callers to make separate RPC calls for each running job. Attempting to get this to work at scale is challenging, since when using MCollective's shared NATS connector, concurrent RPC calls from separate threads race on the singleton connection and @@request_sequence counter, causing lost responses. This new statuses action accepts an array of handles and returns status, stdout, stderr, and exitcode for each completed job in a single response. This lets callers like the upcoming OpenBolt's Choria transport poll and fetch output in one batched RPC call per round instead of N sequential per-target calls.
diff --git a/files/mcollective/agent/shell.ddl b/files/mcollective/agent/shell.ddl
@@ -127,6 +127,20 @@ action "list", :description => "Get a list of all running commands" do
 
 end
 
+action "statuses", :description => "Get status and output of multiple managed commands" do
+    display :always
+
+    input   :handles,
+            :prompt      => "Handles",
+            :description => "Array of command handles to query",
+            :type        => :array,
+            :optional    => false
+
+    output  :statuses,
+            :description => "status and output keyed by handle",
+            :display_as  => "statuses"
+end
+
 action "kill", :description => "Kill a command by handle" do
     display :always
 
diff --git a/files/mcollective/agent/shell.json b/files/mcollective/agent/shell.json
@@ -96,6 +96,27 @@
       "display": "always",
       "description": "Run a command"
     },
+    {
+      "action": "statuses",
+      "input": {
+        "handles": {
+          "prompt": "Handles",
+          "description": "Array of command handles to query",
+          "type": "array",
+          "default": null,
+          "optional": false
+        }
+      },
+      "output": {
+        "statuses": {
+          "description": "status and output keyed by handle",
+          "display_as": "statuses",
+          "default": null
+        }
+      },
+      "display": "always",
+      "description": "Get status and output of multiple managed commands"
+    },
     {
       "action": "start",
       "input": {
diff --git a/files/mcollective/agent/shell.rb b/files/mcollective/agent/shell.rb
@@ -25,6 +25,26 @@ class Shell<RPC::Agent
         end
       end
 
+      action 'statuses' do
+        handles = request[:handles]
+        results = {}
+        handles.each do |handle|
+          begin
+            job = Job.new(handle)
+            entry = {
+              :status  => job.status,
+              :stdout  => job.stdout,
+              :stderr  => job.stderr,
+            }
+            entry[:exitcode] = job.exitcode if job.status == :stopped
+            results[handle] = entry
+          rescue StandardError => error
+            results[handle] = { :status => :error, :error => error.message }
+          end
+        end
+        reply[:statuses] = results
+      end
+
       action 'kill' do
         handle = request[:handle]
         job = Job.new(handle)
diff --git a/spec/unit/agent/shell_spec.rb b/spec/unit/agent/shell_spec.rb
@@ -18,6 +18,78 @@ module Agent
         end
       end
 
+      describe '#statuses' do
+        let(:reply) { {} }
+
+        before :each do
+          agent.stubs(:reply).returns(reply)
+          @tmpdir = Dir.mktmpdir
+          Shell::Job.stubs(:state_path).returns(@tmpdir)
+        end
+
+        after :each do
+          FileUtils.remove_entry_secure @tmpdir
+        end
+
+        it 'should return stdout, stderr, and exitcode for stopped jobs' do
+          job = Shell::Job.new
+          job.start_command('echo foo')
+          job.wait_for_process
+
+          agent.call(:statuses, :handles => [job.handle])
+          statuses = reply[:statuses]
+          statuses.should have_key(job.handle)
+          statuses[job.handle][:status].should == :stopped
+          statuses[job.handle][:stdout].should == "foo\n"
+          statuses[job.handle][:stderr].should == ''
+          statuses[job.handle][:exitcode].should == 0
+        end
+
+        it 'should return stdout and stderr for running jobs' do
+          job = Shell::Job.new
+          job.start_command(%{ruby -e 'STDOUT.sync = true; puts "partial"; sleep 60'})
+          sleep 0.5
+
+          agent.call(:statuses, :handles => [job.handle])
+          statuses = reply[:statuses]
+          statuses[job.handle][:status].should == :running
+          statuses[job.handle][:stdout].should == "partial\n"
+          statuses[job.handle][:stderr].should == ''
+          statuses[job.handle].should_not have_key(:exitcode)
+
+          job.kill
+        end
+
+        it 'should return error for invalid handle without affecting valid handles' do
+          job = Shell::Job.new
+          job.start_command('echo good')
+          job.wait_for_process
+
+          agent.call(:statuses, :handles => [job.handle, 'nonexistent-handle'])
+          statuses = reply[:statuses]
+          statuses[job.handle][:status].should == :stopped
+          statuses[job.handle][:stdout].should == "good\n"
+          statuses['nonexistent-handle'][:status].should == :error
+          statuses['nonexistent-handle'].should have_key(:error)
+        end
+
+        it 'should handle multiple handles in one call' do
+          job_one = Shell::Job.new
+          job_one.start_command('echo one')
+          job_one.wait_for_process
+
+          job_two = Shell::Job.new
+          job_two.start_command('echo two')
+          job_two.wait_for_process
+
+          agent.call(:statuses, :handles => [job_one.handle, job_two.handle])
+          statuses = reply[:statuses]
+          statuses.keys.size.should == 2
+          statuses[job_one.handle][:stdout].should == "one\n"
+          statuses[job_two.handle][:stdout].should == "two\n"
+        end
+      end
+
       describe '#run_command' do
         let(:reply) { {} }