Incorporate feedback from review

mattt · mattt · commit 27f0292cd5ee · 2026-03-23T05:27:23.000-07:00
diff --git a/Sources/AnyLanguageModel/Models/MLXLanguageModel.swift b/Sources/AnyLanguageModel/Models/MLXLanguageModel.swift
@@ -497,16 +497,17 @@ import Foundation
                 return limits.max() ?? effectiveIdleLimit()
             }
 
+            private func idlePolicyConfiguration() -> GPUMemoryConfiguration {
+                knownConfigs.max(by: { $0.idleCacheLimit < $1.idleCacheLimit })
+                    ?? GPUMemoryConfiguration.automatic
+            }
+
             private func effectiveIdleLimit() -> Int {
-                let limits = knownConfigs.map(\.idleCacheLimit)
-                return limits.min() ?? GPUMemoryConfiguration.automatic.idleCacheLimit
+                idlePolicyConfiguration().idleCacheLimit
             }
 
             private func shouldClearOnEviction() -> Bool {
-                if knownConfigs.isEmpty {
-                    return GPUMemoryConfiguration.automatic.clearCacheOnEviction
-                }
-                return knownConfigs.contains { $0.clearCacheOnEviction }
+                idlePolicyConfiguration().clearCacheOnEviction
             }
         }
 
@@ -619,7 +620,25 @@ import Foundation
             .concurrentRequests(
                 .init(
                     debugDescription:
-                        "Concurrent requests on the same LanguageModelSession are not supported while MLX KV cache reuse is enabled."
+                        "Concurrent requests on the same LanguageModelSession are not supported for MLX due to cache and memory management constraints."
+                )
+            )
+        }
+
+        private static func maxToolIterationsExceededError(limit: Int) -> LanguageModelSession.GenerationError {
+            .decodingFailure(
+                .init(
+                    debugDescription:
+                        "Exceeded maximum tool iterations (\(limit)) while processing MLX tool calls."
+                )
+            )
+        }
+
+        private static func repeatedToolCallLoopError() -> LanguageModelSession.GenerationError {
+            .decodingFailure(
+                .init(
+                    debugDescription:
+                        "Detected repeated MLX tool-call signature and aborted to avoid an infinite tool loop."
                 )
             )
         }
@@ -663,7 +682,7 @@ import Foundation
             guard entry.prefixTokens.count == entry.prefillTokenCount else {
                 return false
             }
-            return Array(currentTokens.prefix(entry.prefillTokenCount)) == entry.prefixTokens
+            return currentTokens.starts(with: entry.prefixTokens)
         }
 
         private func resolveCache(
@@ -846,15 +865,17 @@ import Foundation
                 if !collectedToolCalls.isEmpty {
                     toolIteration += 1
                     if toolIteration > maxToolIterations {
-                        break
+                        allEntries.append(.toolCalls(Transcript.ToolCalls(collectedToolCalls)))
+                        throw Self.maxToolIterationsExceededError(limit: maxToolIterations)
                     }
 
                     let signature =
                         collectedToolCalls
                         .map { "\($0.function.name):\($0.function.arguments)" }
                         .joined(separator: "|")
                     if signature == previousToolCallSignature {
-                        break
+                        allEntries.append(.toolCalls(Transcript.ToolCalls(collectedToolCalls)))
+                        throw Self.repeatedToolCallLoopError()
                     }
                     previousToolCallSignature = signature