diff --git a/docs/release-notes/.FSharp.Core/10.0.300.md b/docs/release-notes/.FSharp.Core/10.0.300.md index f823f462ee..3b1d0259a3 100644 --- a/docs/release-notes/.FSharp.Core/10.0.300.md +++ b/docs/release-notes/.FSharp.Core/10.0.300.md @@ -1,5 +1,6 @@ ### Fixed +* Optimize Set.intersect performance symmetry and preserve identity from the first set argument. ([PR #19291](https://github.com/dotnet/fsharp/pull/19291)) (Fixes #19139) * Fix anonymous record field ordering in LINQ expression conversion to produce consistent expression trees regardless of field declaration order. ([Issue #11131](https://github.com/dotnet/fsharp/issues/11131), [Issue #15648](https://github.com/dotnet/fsharp/issues/15648)) * Fix array indexing in LINQ expressions to generate proper array index expressions instead of GetArray method calls, enabling LINQ providers like Azure Cosmos DB to translate array access. ([Issue #16918](https://github.com/dotnet/fsharp/issues/16918)) * Fix tuple join conditions and groupBy operations to properly compare tuple keys using structural equality. AnonymousObject types now implement Equals and GetHashCode, enabling inline tuple joins like `join b on ((a.Id1, a.Id2) = (b.Id1, b.Id2))` to work correctly. ([Issue #7885](https://github.com/dotnet/fsharp/issues/7885), [Issue #47](https://github.com/dotnet/fsharp/issues/47)) diff --git a/src/FSharp.Core/set.fs b/src/FSharp.Core/set.fs index df37136e38..8e2a370739 100644 --- a/src/FSharp.Core/set.fs +++ b/src/FSharp.Core/set.fs @@ -267,6 +267,24 @@ module internal SetTree = elif c = 0 then true else mem comparer k tn.Right + let rec tryGet (comparer: IComparer<'T>) k (t: SetTree<'T>) = + if isEmpty t then + ValueNone + else + let c = comparer.Compare(k, t.Key) + + if t.Height = 1 then + if c = 0 then + ValueSome t.Key + else + ValueNone + else + let tn = asNode t + + if c < 0 then tryGet comparer k tn.Left + elif c = 0 then ValueSome tn.Key + else tryGet comparer k tn.Right + let rec iter f (t: SetTree<'T>) = if isEmpty t then () @@ -391,6 +409,24 @@ module internal SetTree = balance comparer (union comparer t2n.Left lo) t2n.Key (union comparer t2n.Right hi) + let rec intersectionAuxFromSmall comparer a (t: SetTree<'T>) acc = + if isEmpty t then + acc + else if t.Height = 1 then + match tryGet comparer t.Key a with + | ValueSome v -> add comparer v acc + | ValueNone -> acc + else + let tn = asNode t + let acc = intersectionAuxFromSmall comparer a tn.Right acc + + let acc = + match tryGet comparer tn.Key a with + | ValueSome v -> add comparer v acc + | ValueNone -> acc + + intersectionAuxFromSmall comparer a tn.Left acc + let rec intersectionAux comparer b (t: SetTree<'T>) acc = if isEmpty t then acc @@ -412,7 +448,13 @@ module internal SetTree = intersectionAux comparer b tn.Left acc let intersection comparer a b = - intersectionAux comparer b a empty + let h1 = height a + let h2 = height b + + if h1 <= h2 then + intersectionAux comparer b a empty + else + intersectionAuxFromSmall comparer a b empty let partition1 comparer f k (acc1, acc2) = if f k then diff --git a/tests/FSharp.Core.UnitTests/FSharp.Core/Microsoft.FSharp.Collections/SetType.fs b/tests/FSharp.Core.UnitTests/FSharp.Core/Microsoft.FSharp.Collections/SetType.fs index 9c1120675a..6cb7c9bc0f 100644 --- a/tests/FSharp.Core.UnitTests/FSharp.Core/Microsoft.FSharp.Collections/SetType.fs +++ b/tests/FSharp.Core.UnitTests/FSharp.Core/Microsoft.FSharp.Collections/SetType.fs @@ -11,6 +11,24 @@ open System.Collections.Generic open FSharp.Core.UnitTests.LibraryTestFx open Xunit + +/// This type is used to verify that Set.intersect preserves the object identity (reference) +/// of the first set (S1). By overriding equality to only check the 'Id' field, +/// we can create two objects that are "equal" but live in different memory locations. +[] +type IntersectItem = + { Id: int; Tag: string } + override x.Equals(obj) = + match obj with + | :? IntersectItem as other -> x.Id = other.Id + | _ -> false + override x.GetHashCode() = hash x.Id + interface IComparable with + member x.CompareTo(obj) = + match obj with + | :? IntersectItem as other -> compare x.Id other.Id + | _ -> invalidArg "obj" "not an IntersectItem" + (* [Test Strategy] Make sure each method works on: @@ -332,6 +350,62 @@ type SetType() = Assert.AreEqual(Set.maxElement fir, 6) Assert.AreEqual(Set.maxElement sec, 7) + + [] + member _.Intersect_PreservesIdentity() = + // 1. [Test Strategy] Empty set + let emptyA : Set = Set.empty + let emptyB : Set = Set.empty + Assert.True((Set.intersect emptyA emptyB).IsEmpty) + Assert.True((Set.intersect (Set.singleton {Id=1; Tag="x"}) emptyB).IsEmpty) + + // 2. [Test Strategy] Single-element set & [Identity Property] + let item1_S1 = { Id = 1; Tag = "From_S1" } + let item1_S2 = { Id = 1; Tag = "From_S2" } + + let s1_single = Set.singleton item1_S1 + let s2_single = Set.singleton item1_S2 + + let resSingle = Set.intersect s1_single s2_single + Assert.Equal(1, resSingle.Count) + + // Identity Check: Must be EXACTLY the same object in memory as S1 + let singleResult = Set.minElement resSingle + Assert.True(Object.ReferenceEquals(singleResult, item1_S1), "Single-element identity failed: Reference must come from S1") + + // 3. [Test Strategy] Sets with 4 or more elements (Optimized Path Check) + let item5_S1 = { Id = 5; Tag = "ID5_From_S1" } + let item5_S2 = { Id = 5; Tag = "ID5_From_S2" } + + // S1 is large: hits intersectionAuxFromSmall (The optimized path) + let list1 = [ for i in 1..50 -> if i = 5 then item5_S1 else { Id = i; Tag = sprintf "L%d" i } ] + let list2 = [ for i in 1..5 -> if i = 5 then item5_S2 else { Id = i; Tag = sprintf "R%d" i } ] + + let s1_large = Set.ofList list1 + let s2_small = Set.ofList list2 + + let resMany = Set.intersect s1_large s2_small + + // Identity Check in Loop: + // We iterate through the result and ensure every common element + // has the physical reference from the first set (list1) + resMany |> Set.iter (fun item -> + let originalInS1 = list1 |> List.find (fun x -> x.Id = item.Id) + if not (Object.ReferenceEquals(item, originalInS1)) then + Assert.Fail(sprintf "Identity mismatch for ID %d: Expected reference from S1 (list1), but got another instance." item.Id) + ) + + // 4. [Reversed Size Case] (Standard Path Check) + // S1 is small, S2 is large: hits intersectionAux (The standard path) + let resReversed = Set.intersect s2_small s1_large + + // Find the specific item with ID 5 in the result + let item5InResult = resReversed |> Set.filter (fun x -> x.Id = 5) |> Set.minElement + + // Identity Check: Since s2_small was the FIRST argument, + // the result MUST contain item5_S2 (from s2_small) + Assert.True(Object.ReferenceEquals(item5InResult, item5_S2), "Reversed path identity failed: Reference must come from s2_small") + #if NET8_0_OR_GREATER #nowarn "1204" // FS1204: This type/method is for compiler use and should not be used directly.