Skip to content

Commit 9031d66

Browse files
authored
Use prime for ConcurrentDictionary initial size (#421)
* initial * fix * tests * 137 * link * link .net * comments * comments
1 parent ee1da5f commit 9031d66

File tree

5 files changed

+172
-3
lines changed

5 files changed

+172
-3
lines changed
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
using System.Collections.Generic;
2+
using System;
3+
using BitFaster.Caching;
4+
using FluentAssertions;
5+
using Xunit;
6+
using Xunit.Abstractions;
7+
8+
namespace BitFaster.Caching.UnitTests
9+
{
10+
public class HashTablePrimesTests
11+
{
12+
private readonly ITestOutputHelper testOutputHelper;
13+
14+
public HashTablePrimesTests(ITestOutputHelper testOutputHelper)
15+
{
16+
this.testOutputHelper = testOutputHelper;
17+
}
18+
19+
[Theory]
20+
[InlineData(3, 7)]
21+
[InlineData(8, 11)]
22+
[InlineData(12, 17)]
23+
[InlineData(132, 137)]
24+
[InlineData(500, 137)]
25+
public void NextPrimeGreaterThan(int input, int nextPrime)
26+
{
27+
HashTablePrimes.NextPrimeGreaterThan(input).Should().Be(nextPrime);
28+
}
29+
30+
// This test method replicates the hash table sizes that will be computed by ConcurrentDictionary
31+
// on earlier versions of .NET before prime numbers are used.
32+
// 277 is prime
33+
// 557 is prime
34+
// 1117 is prime
35+
// 2237 is prime
36+
// 4477 has factors 11, 37, 121, 407
37+
// 8957 has factors 13, 53, 169, 689
38+
// 17917 has factors 19, 23, 41, 437, 779, 943
39+
// 35837 is prime
40+
// 71677 has factors 229, 313
41+
// 143357 is prime
42+
// 286717 has factors 163, 1759
43+
// 573437 is prime
44+
// 1146877 is prime
45+
// 2293757 is prime
46+
// 4587517 has factors 11, 103, 1133, 4049, 44539, 417047
47+
// 9175037 is prime
48+
// 18350077 has factors 701, 26177
49+
// 36700157 has factors 13, 23, 299, 122743, 1595659, 2823089
50+
// 73400317 has factors 4999, 14683
51+
// 146800637 is prime
52+
// 293601277 has factors 6113, 48029
53+
// 587202557 has factors 1877, 312841
54+
// 1174405117 has factors 10687, 109891
55+
[Fact(Skip="Not a functional test")]
56+
public void ComputeHashTableSizes()
57+
{
58+
// 137 gives a good balance of primes for smaller sizes, and few factors for larger sizes.
59+
// Other good candidates: 131, 151, 163, 211
60+
int size = 137;
61+
for (int i = 0; i < 23; i++)
62+
{
63+
int nextSize = NextTableSize(size);
64+
this.testOutputHelper.WriteLine($"{nextSize} {GetFactorsString(nextSize)}");
65+
size = nextSize;
66+
}
67+
}
68+
69+
// Replicates .NET framework ConcurrentDictionary resize logic:
70+
// https://github.com/microsoft/referencesource/blob/51cf7850defa8a17d815b4700b67116e3fa283c2/mscorlib/system/collections/Concurrent/ConcurrentDictionary.cs#L1828C29-L1828C29
71+
private static int NextTableSize(int initial)
72+
{
73+
// Double the size of the buckets table and add one, so that we have an odd integer.
74+
int newLength = initial * 2 + 1;
75+
76+
// Now, we only need to check odd integers, and find the first that is not divisible
77+
// by 3, 5 or 7.
78+
while (newLength % 3 == 0 || newLength % 5 == 0 || newLength % 7 == 0)
79+
{
80+
newLength += 2;
81+
}
82+
83+
return newLength;
84+
}
85+
86+
private static string GetFactorsString(int nextSize)
87+
{
88+
var factors = Factor(nextSize);
89+
90+
factors.Remove(1);
91+
factors.Remove(nextSize);
92+
factors.Sort();
93+
94+
if (factors.Count == 0)
95+
{
96+
return "is prime";
97+
}
98+
99+
return $"has factors {string.Join(", ", factors)}";
100+
}
101+
102+
// https://stackoverflow.com/questions/239865/best-way-to-find-all-factors-of-a-given-number
103+
private static List<int> Factor(int number)
104+
{
105+
var factors = new List<int>();
106+
int max = (int)Math.Sqrt(number); // Round down
107+
108+
for (int factor = 1; factor <= max; ++factor) // Test from 1 to the square root, or the int below it, inclusive.
109+
{
110+
if (number % factor == 0)
111+
{
112+
factors.Add(factor);
113+
if (factor != number / factor) // Don't add the square root twice! Thanks Jon
114+
factors.Add(number / factor);
115+
}
116+
}
117+
118+
return factors;
119+
}
120+
}
121+
}
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
using System;
2+
3+
namespace BitFaster.Caching
4+
{
5+
// Using the capacity passed into the cache ctor to initialize the ConcurrentDictionary has 2 problems:
6+
//
7+
// 1. By allocating up front, we eliminate resizing. However, if the capacity is very large and the cache is not used,
8+
// we will waste a lot of memory.
9+
// 2. On earlier versions of .NET, ConcurrentDictionary uses the capacity arg to directly initialize the hash table
10+
// size. On resize, the hashtable is grown to 2x + 1 while avoiding factors of 3, 5, or 7 (but not larger). On
11+
// newer versions of.NET, both initial size and resize is based the next prime number larger than capacity. Collisions
12+
// are reduced when hash table size is prime. Hence the change to use primes in all cases in newer versions of the
13+
// framework.
14+
//
15+
// To mitigate this, we adopt a simple scheme: find the next prime larger than the capacity arg, up to 137. If the
16+
// capacity is greater than 137, just set the initial size to 137, thereby bounding initial memory consumption for
17+
// large caches.
18+
//
19+
// - Older.NET implementations: For smaller caches, we fix size at the next largest prime. For larger tables, we now
20+
// start out with a larger prime (avoiding all factors up to 137, not just 3, 5 and 7). Above 137, some sizes will be
21+
// prime and others have relatively few factors.The complete list is given as a comment in the unit test code.
22+
// - Newer.NET implementations: as above for smaller caches. For larger caches, the resize will use successively larger
23+
// primes.The duplicate prime computation added is only during construction and is effectively a no-op.
24+
internal class HashTablePrimes
25+
{
26+
#if NETSTANDARD2_0
27+
internal static int[] Primes = new int[] {
28+
#else
29+
internal static ReadOnlySpan<int> Primes => new int[] {
30+
#endif
31+
7, 11, 17, 23, 29, 37, 47, 59, 71, 89, 107, 131
32+
};
33+
internal static int NextPrimeGreaterThan(int min)
34+
{
35+
foreach (int prime in Primes)
36+
{
37+
if (prime > min)
38+
{
39+
return prime;
40+
}
41+
}
42+
43+
return 137;
44+
}
45+
}
46+
}

BitFaster.Caching/Lfu/ConcurrentLfu.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,8 @@ public ConcurrentLfu(int capacity)
9090
/// <param name="comparer">The equality comparer.</param>
9191
public ConcurrentLfu(int concurrencyLevel, int capacity, IScheduler scheduler, IEqualityComparer<K> comparer)
9292
{
93-
this.dictionary = new ConcurrentDictionary<K, LfuNode<K, V>>(concurrencyLevel, capacity, comparer);
93+
int dictionaryCapacity = HashTablePrimes.NextPrimeGreaterThan(capacity);
94+
this.dictionary = new ConcurrentDictionary<K, LfuNode<K, V>>(concurrencyLevel, dictionaryCapacity, comparer);
9495

9596
// cap concurrency at proc count * 2
9697
int readStripes = Math.Min(BitOps.CeilingPowerOfTwo(concurrencyLevel), BitOps.CeilingPowerOfTwo(Environment.ProcessorCount * 2));

BitFaster.Caching/Lru/ClassicLru.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,8 @@ public ClassicLru(int concurrencyLevel, int capacity, IEqualityComparer<K> compa
5252
Throw.ArgNull(ExceptionArgument.comparer);
5353

5454
this.capacity = capacity;
55-
this.dictionary = new ConcurrentDictionary<K, LinkedListNode<LruItem>>(concurrencyLevel, this.capacity + 1, comparer);
55+
int dictionaryCapacity = HashTablePrimes.NextPrimeGreaterThan(capacity);
56+
this.dictionary = new ConcurrentDictionary<K, LinkedListNode<LruItem>>(concurrencyLevel, dictionaryCapacity, comparer);
5657
this.policy = new CachePolicy(new Optional<IBoundedPolicy>(this), Optional<ITimePolicy>.None());
5758
}
5859

BitFaster.Caching/Lru/ConcurrentLruCore.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ public ConcurrentLruCore(
8888
this.warmQueue = new ConcurrentQueue<I>();
8989
this.coldQueue = new ConcurrentQueue<I>();
9090

91-
int dictionaryCapacity = this.Capacity + 1;
91+
int dictionaryCapacity = HashTablePrimes.NextPrimeGreaterThan(this.Capacity);
9292

9393
this.dictionary = new ConcurrentDictionary<K, I>(concurrencyLevel, dictionaryCapacity, comparer);
9494
this.itemPolicy = itemPolicy;

0 commit comments

Comments
 (0)