fix(memory): Improve pagination behavior in get_last_k_turns() and list_messages() (#209)

kevmyung · aidandaly24 · jariy17 · web-flow · commit 2b047ff5b051 · 2026-01-13T17:03:37.000-05:00
* feat(memory): Improve pagination behavior in get_last_k_turns() and list_messages()

- get_last_k_turns(): Auto-calculate max_results based on k (max(100, k*3))
- list_messages(): Add fetch_all parameter to fetch all messages (up to 10000)
- Backward compatible: default behavior unchanged

* fix(memory): Address PR review comments for pagination behavior

- Extract shared pagination logic into pagination.py helper
- Fix test mocks to use _data_plane_client.list_events
- Add MAX_FETCH_ALL_RESULTS constant (10000) in strands session_manager
- Rename include_branches to include_parent_branches in client.py for consistency
- Add comprehensive tests for pagination helper

* fix(memory): Address PR review comments

- Remove fetch_all parameter from list_messages (misleading name)
- Use MAX_FETCH_ALL_RESULTS (10000) as default when no limit specified
- Remove pagination.py module, inline logic into client.py and session.py
- Revert include_branches rename to avoid breaking change

* fix: Apply ruff formatting to pagination changes

Apply automatic formatting fixes identified by ruff format pre-commit hook.

Co-Authored-By: Claude Sonnet 4.5 &lt;noreply@anthropic.com&gt;

---------

Co-authored-by: Aidan Daly &lt;aidandal@amazon.com&gt;
Co-authored-by: T.J Ariyawansa &lt;tjariy@amazon.com&gt;
Co-authored-by: Claude Sonnet 4.5 &lt;noreply@anthropic.com&gt;
diff --git a/src/bedrock_agentcore/memory/client.py b/src/bedrock_agentcore/memory/client.py
@@ -1096,55 +1096,75 @@ def get_last_k_turns(
         k: int = 5,
         branch_name: Optional[str] = None,
         include_branches: bool = False,
-        max_results: int = 100,
+        max_results: Optional[int] = None,
     ) -> List[List[Dict[str, Any]]]:
         """Get the last K conversation turns.
 
         A "turn" typically consists of a user message followed by assistant response(s).
         This method groups messages into logical turns for easier processing.
 
+        If max_results is specified, fetches up to that many events and finds turns within them
+        (backward compatible behavior).
+        If max_results is None, automatically paginates until k turns are found.
+
         Returns:
             List of turns, where each turn is a list of message dictionaries
         """
+        base_params = {
+            "memoryId": memory_id,
+            "actorId": actor_id,
+            "sessionId": session_id,
+        }
+
+        if branch_name and branch_name != "main":
+            base_params["filter"] = {"branch": {"name": branch_name, "includeParentBranches": include_branches}}
+
         try:
-            # Use the new list_events method
-            events = self.list_events(
-                memory_id=memory_id,
-                actor_id=actor_id,
-                session_id=session_id,
-                branch_name=branch_name,
-                include_parent_branches=False,
-                max_results=max_results,
-            )
+            turns: List[List[Dict[str, Any]]] = []
+            current_turn: List[Dict[str, Any]] = []
+            next_token = None
+            total_fetched = 0
+
+            while len(turns) < k:
+                if max_results is not None:
+                    remaining = max_results - total_fetched
+                    if remaining <= 0:
+                        break
+                    batch_size = min(100, remaining)
+                else:
+                    batch_size = 100
 
-            if not events:
-                return []
+                params = {**base_params, "maxResults": batch_size, "includePayloads": True}
+                if next_token:
+                    params["nextToken"] = next_token
 
-            # Process events to group into turns
-            turns = []
-            current_turn = []
+                response = self.gmdp_client.list_events(**params)
+                events = response.get("events", [])
 
-            for event in events:
-                if len(turns) >= k:
-                    break  # Only need last K turns
-                for payload_item in event.get("payload", []):
-                    if "conversational" in payload_item:
-                        role = payload_item["conversational"].get("role")
+                if not events:
+                    break
+
+                total_fetched += len(events)
 
-                        # Start new turn on USER message
-                        if role == Role.USER.value and current_turn:
-                            turns.append(current_turn)
-                            current_turn = []
+                for event in events:
+                    if len(turns) >= k:
+                        break
+                    for payload_item in event.get("payload", []):
+                        if "conversational" in payload_item:
+                            role = payload_item["conversational"].get("role")
+                            if role == Role.USER.value and current_turn:
+                                turns.append(current_turn)
+                                current_turn = []
+                            current_turn.append(payload_item["conversational"])
 
-                        current_turn.append(payload_item["conversational"])
+                next_token = response.get("nextToken")
+                if not next_token:
+                    break
 
-            # Don't forget the last turn
-            if current_turn:
+            if current_turn and len(turns) < k:
                 turns.append(current_turn)
 
-            # Return the last k turns
-            return turns[:k] if len(turns) > k else turns
-
+            return turns[:k]
         except ClientError as e:
             logger.error("Failed to get last K turns: %s", e)
             raise
diff --git a/src/bedrock_agentcore/memory/integrations/strands/session_manager.py b/src/bedrock_agentcore/memory/integrations/strands/session_manager.py
@@ -31,6 +31,7 @@
 SESSION_PREFIX = "session_"
 AGENT_PREFIX = "agent_"
 MESSAGE_PREFIX = "message_"
+MAX_FETCH_ALL_RESULTS = 10000
 
 
 class AgentCoreMemorySessionManager(RepositorySessionManager, SessionRepository):
@@ -427,7 +428,12 @@ def update_message(self, session_id: str, agent_id: str, session_message: Sessio
         )
 
     def list_messages(
-        self, session_id: str, agent_id: str, limit: Optional[int] = None, offset: int = 0, **kwargs: Any
+        self,
+        session_id: str,
+        agent_id: str,
+        limit: Optional[int] = None,
+        offset: int = 0,
+        **kwargs: Any,
     ) -> list[SessionMessage]:
         """List messages for an agent from AgentCore Memory with pagination.
 
@@ -448,7 +454,8 @@ def list_messages(
             raise SessionException(f"Session ID mismatch: expected {self.config.session_id}, got {session_id}")
 
         try:
-            max_results = (limit + offset) if limit else 100
+            max_results = (limit + offset) if limit else MAX_FETCH_ALL_RESULTS
+
             events = self.memory_client.list_events(
                 memory_id=self.config.memory_id,
                 actor_id=self.config.actor_id,
@@ -512,7 +519,8 @@ def retrieve_for_namespace(namespace: str, retrieval_config: RetrievalConfig):
             )
             if retrieval_config.relevance_score:
                 memories = [
-                    m for m in memories
+                    m
+                    for m in memories
                     if m.get("relevanceScore", retrieval_config.relevance_score) >= retrieval_config.relevance_score
                 ]
             context_items = []
diff --git a/src/bedrock_agentcore/memory/session.py b/src/bedrock_agentcore/memory/session.py
@@ -786,53 +786,75 @@ def get_last_k_turns(
         k: int = 5,
         branch_name: Optional[str] = None,
         include_parent_branches: bool = False,
-        max_results: int = 100,
+        max_results: Optional[int] = None,
     ) -> List[List[EventMessage]]:
         """Get the last K conversation turns.
 
         A "turn" typically consists of a user message followed by assistant response(s).
         This method groups messages into logical turns for easier processing.
 
+        If max_results is specified, fetches up to that many events and finds turns within them
+        (backward compatible behavior).
+        If max_results is None, automatically paginates until k turns are found.
+
         Returns:
             List of turns, where each turn is a list of message dictionaries
         """
+        base_params = {
+            "memoryId": self._memory_id,
+            "actorId": actor_id,
+            "sessionId": session_id,
+        }
+
+        if branch_name and branch_name != "main":
+            base_params["filter"] = {"branch": {"name": branch_name, "includeParentBranches": include_parent_branches}}
+
         try:
-            events = self.list_events(
-                actor_id=actor_id,
-                session_id=session_id,
-                branch_name=branch_name,
-                include_parent_branches=include_parent_branches,
-                max_results=max_results,
-            )
+            turns: List[List[EventMessage]] = []
+            current_turn: List[EventMessage] = []
+            next_token = None
+            total_fetched = 0
+
+            while len(turns) < k:
+                if max_results is not None:
+                    remaining = max_results - total_fetched
+                    if remaining <= 0:
+                        break
+                    batch_size = min(100, remaining)
+                else:
+                    batch_size = 100
 
-            if not events:
-                return []
+                params = {**base_params, "maxResults": batch_size, "includePayloads": True}
+                if next_token:
+                    params["nextToken"] = next_token
+
+                response = self._data_plane_client.list_events(**params)
+                events = response.get("events", [])
 
-            # Process events to group into turns
-            turns = []
-            current_turn = []
+                if not events:
+                    break
 
-            for event in events:
-                if len(turns) >= k:
-                    break  # Only need last K turns
-                for payload_item in event.get("payload", []):
-                    if "conversational" in payload_item:
-                        role = payload_item["conversational"].get("role")
+                total_fetched += len(events)
 
-                        # Start new turn on USER message
-                        if role == MessageRole.USER.value and current_turn:
-                            turns.append(current_turn)
-                            current_turn = []
+                for event in events:
+                    if len(turns) >= k:
+                        break
+                    for payload_item in event.get("payload", []):
+                        if "conversational" in payload_item:
+                            role = payload_item["conversational"].get("role")
+                            if role == MessageRole.USER.value and current_turn:
+                                turns.append(current_turn)
+                                current_turn = []
+                            current_turn.append(EventMessage(payload_item["conversational"]))
 
-                        current_turn.append(EventMessage(payload_item["conversational"]))
+                next_token = response.get("nextToken")
+                if not next_token:
+                    break
 
-            # Don't forget the last turn
-            if current_turn:
+            if current_turn and len(turns) < k:
                 turns.append(current_turn)
 
-            # Return the last k turns
-            return turns[:k] if len(turns) > k else turns
-
+            return turns[:k]
         except ClientError as e:
             logger.error("Failed to get last K turns: %s", e)
             raise
@@ -1153,7 +1175,7 @@ def get_last_k_turns(
         k: int = 5,
         branch_name: Optional[str] = None,
         include_parent_branches: Optional[bool] = None,
-        max_results: int = 100,
+        max_results: Optional[int] = None,
     ) -> List[List[EventMessage]]:
         """Delegates to manager.get_last_k_turns."""
         return self._manager.get_last_k_turns(
diff --git a/tests/bedrock_agentcore/memory/integrations/strands/test_agentcore_memory_session_manager.py b/tests/bedrock_agentcore/memory/integrations/strands/test_agentcore_memory_session_manager.py
@@ -1096,3 +1096,23 @@ def test_retrieve_customer_context_filters_by_relevance_score(self, mock_memory_
                     assert "High relevance 2" in injected_context
                     assert "Low relevance 1" not in injected_context
                     assert "Low relevance 2" not in injected_context
+
+    def test_list_messages_default_max_results(self, session_manager, mock_memory_client):
+        """Test listing messages without limit uses default max_results=10000."""
+        mock_memory_client.list_events.return_value = []
+
+        session_manager.list_messages("test-session-456", "test-agent-123")
+
+        mock_memory_client.list_events.assert_called_once()
+        call_kwargs = mock_memory_client.list_events.call_args[1]
+        assert call_kwargs["max_results"] == 10000
+
+    def test_list_messages_with_limit_calculates_max_results(self, session_manager, mock_memory_client):
+        """Test listing messages with limit calculates max_results correctly."""
+        mock_memory_client.list_events.return_value = []
+
+        session_manager.list_messages("test-session-456", "test-agent-123", limit=500, offset=50)
+
+        mock_memory_client.list_events.assert_called_once()
+        call_kwargs = mock_memory_client.list_events.call_args[1]
+        assert call_kwargs["max_results"] == 550  # limit + offset
diff --git a/tests/bedrock_agentcore/memory/test_client.py b/tests/bedrock_agentcore/memory/test_client.py
@@ -3064,3 +3064,67 @@ def test_wrap_configuration_custom_episodic_override():
             wrapped["reflection"]["customReflectionConfiguration"]["episodicReflectionOverride"]["appendToPrompt"]
             == "Reflect on episodes"
         )
+
+
+def test_get_last_k_turns_auto_pagination():
+    """Test get_last_k_turns automatically paginates until k turns are found."""
+    with patch("boto3.client"):
+        client = MemoryClient()
+
+        mock_gmdp = MagicMock()
+        client.gmdp_client = mock_gmdp
+
+        # First call returns events but not enough turns, with next_token
+        # Second call returns more events, no next_token
+        mock_gmdp.list_events.side_effect = [
+            {
+                "events": [
+                    {"payload": [{"conversational": {"role": "USER", "content": {"text": "Hi"}}}]},
+                    {"payload": [{"conversational": {"role": "ASSISTANT", "content": {"text": "Hello"}}}]},
+                ],
+                "nextToken": "token-123",
+            },
+            {
+                "events": [
+                    {"payload": [{"conversational": {"role": "USER", "content": {"text": "How are you?"}}}]},
+                    {"payload": [{"conversational": {"role": "ASSISTANT", "content": {"text": "Good"}}}]},
+                ],
+                "nextToken": None,
+            },
+        ]
+
+        # Request 2 turns without max_results - should paginate automatically
+        turns = client.get_last_k_turns(memory_id="mem-123", actor_id="user-123", session_id="session-456", k=2)
+
+        assert len(turns) == 2
+        assert mock_gmdp.list_events.call_count == 2
+
+
+def test_get_last_k_turns_explicit_max_results():
+    """Test get_last_k_turns respects explicitly provided max_results (backward compatible)."""
+    with patch("boto3.client"):
+        client = MemoryClient()
+
+        mock_gmdp = MagicMock()
+        client.gmdp_client = mock_gmdp
+
+        # Return events with next_token, but max_results should limit fetching
+        mock_gmdp.list_events.return_value = {
+            "events": [
+                {"payload": [{"conversational": {"role": "USER", "content": {"text": "Hi"}}}]},
+            ],
+            "nextToken": "token-123",
+        }
+
+        # Request with explicit max_results=50 - should respect limit
+        client.get_last_k_turns(
+            memory_id="mem-123", actor_id="user-123", session_id="session-456", k=200, max_results=50
+        )
+
+        # First call should request up to max_results (min of 100 and 50 = 50)
+        first_call_args = mock_gmdp.list_events.call_args_list[0]
+        assert first_call_args[1]["maxResults"] == 50
+
+        # Total events fetched should not exceed max_results
+        total_fetched = sum(1 for _ in mock_gmdp.list_events.call_args_list)
+        assert total_fetched <= 50  # Should stop after fetching 50 events worth of calls
diff --git a/tests/bedrock_agentcore/memory/test_session.py b/tests/bedrock_agentcore/memory/test_session.py