@@ -497,16 +497,17 @@ import Foundation
497497 return limits. max ( ) ?? effectiveIdleLimit ( )
498498 }
499499
500+ private func idlePolicyConfiguration( ) -> GPUMemoryConfiguration {
501+ knownConfigs. max ( by: { $0. idleCacheLimit < $1. idleCacheLimit } )
502+ ?? GPUMemoryConfiguration . automatic
503+ }
504+
500505 private func effectiveIdleLimit( ) -> Int {
501- let limits = knownConfigs. map ( \. idleCacheLimit)
502- return limits. min ( ) ?? GPUMemoryConfiguration . automatic. idleCacheLimit
506+ idlePolicyConfiguration ( ) . idleCacheLimit
503507 }
504508
505509 private func shouldClearOnEviction( ) -> Bool {
506- if knownConfigs. isEmpty {
507- return GPUMemoryConfiguration . automatic. clearCacheOnEviction
508- }
509- return knownConfigs. contains { $0. clearCacheOnEviction }
510+ idlePolicyConfiguration ( ) . clearCacheOnEviction
510511 }
511512 }
512513
@@ -619,7 +620,25 @@ import Foundation
619620 . concurrentRequests(
620621 . init(
621622 debugDescription:
622- " Concurrent requests on the same LanguageModelSession are not supported while MLX KV cache reuse is enabled. "
623+ " Concurrent requests on the same LanguageModelSession are not supported for MLX due to cache and memory management constraints. "
624+ )
625+ )
626+ }
627+
628+ private static func maxToolIterationsExceededError( limit: Int ) -> LanguageModelSession . GenerationError {
629+ . decodingFailure(
630+ . init(
631+ debugDescription:
632+ " Exceeded maximum tool iterations ( \( limit) ) while processing MLX tool calls. "
633+ )
634+ )
635+ }
636+
637+ private static func repeatedToolCallLoopError( ) -> LanguageModelSession . GenerationError {
638+ . decodingFailure(
639+ . init(
640+ debugDescription:
641+ " Detected repeated MLX tool-call signature and aborted to avoid an infinite tool loop. "
623642 )
624643 )
625644 }
@@ -663,7 +682,7 @@ import Foundation
663682 guard entry. prefixTokens. count == entry. prefillTokenCount else {
664683 return false
665684 }
666- return Array ( currentTokens. prefix ( entry . prefillTokenCount ) ) == entry. prefixTokens
685+ return currentTokens. starts ( with : entry. prefixTokens)
667686 }
668687
669688 private func resolveCache(
@@ -846,15 +865,17 @@ import Foundation
846865 if !collectedToolCalls. isEmpty {
847866 toolIteration += 1
848867 if toolIteration > maxToolIterations {
849- break
868+ allEntries. append ( . toolCalls( Transcript . ToolCalls ( collectedToolCalls) ) )
869+ throw Self . maxToolIterationsExceededError ( limit: maxToolIterations)
850870 }
851871
852872 let signature =
853873 collectedToolCalls
854874 . map { " \( $0. function. name) : \( $0. function. arguments) " }
855875 . joined ( separator: " | " )
856876 if signature == previousToolCallSignature {
857- break
877+ allEntries. append ( . toolCalls( Transcript . ToolCalls ( collectedToolCalls) ) )
878+ throw Self . repeatedToolCallLoopError ( )
858879 }
859880 previousToolCallSignature = signature
860881
0 commit comments