Skip to content

Commit cb03af7

Browse files
committed
modify: bucket management in hashmap
- enhance support for large hashmaps
1 parent 3599ecb commit cb03af7

8 files changed

Lines changed: 63 additions & 43 deletions

File tree

.github/workflows/generate-docs.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ name: Generate and Deploy Documentation
33
on:
44
push:
55
branches:
6-
- main
6+
- develop
77

88
permissions:
99
actions: read

src/WebExpress.WebIndex.Test/DocumentStore/UnitTestDocumentStoreStorageA.cs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -207,10 +207,14 @@ public void Clear()
207207

208208
// test execution
209209
documentStore.Clear();
210+
Assert.Empty(documentStore.All);
211+
210212
documentStore.Add(Fixture.TestData[0]);
213+
214+
//var all = documentStore.All.ToList();
211215
documentStore.Add(Fixture.TestData[1]);
212216

213-
var all = documentStore.All;
217+
var all = documentStore.All.ToList();
214218

215219
Assert.Equal(all.Select(x => x.Id).OrderBy(x => x), Fixture.TestData.Take(2).Select(x => x.Id).OrderBy(x => x));
216220

src/WebExpress.WebIndex/Storage/IndexStorageBuffer.cs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,15 @@ public void Invalidation(IIndexStorageSegment segment)
151151
_imperishableCache.Remove(segment.Addr, out _);
152152
}
153153

154+
/// <summary>
155+
/// Performs cache invalidation for a all IndexStorageSegment object.
156+
/// </summary>
157+
public void InvalidationAll()
158+
{
159+
_readCache.Clear();
160+
_imperishableCache.Clear();
161+
}
162+
154163
/// <summary>
155164
/// Adds an segment to the end of the buffer.
156165
/// </summary>

src/WebExpress.WebIndex/Storage/IndexStorageDocumentStore.cs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,9 @@ public void Update(TIndexItem item)
156156
public void Clear()
157157
{
158158
IndexFile.NextFreeAddr = 0;
159+
IndexFile.InvalidationAll();
160+
IndexFile.Flush();
161+
159162
Header = new IndexStorageSegmentHeader(StorageContext) { Identifier = _extentions, Version = (byte)_version };
160163
Allocator = new IndexStorageSegmentAllocatorDocumentStore(StorageContext);
161164
Statistic = new IndexStorageSegmentStatistic(StorageContext);
@@ -166,6 +169,7 @@ public void Clear()
166169
HashMap.Initialization(false);
167170
Allocator.Initialization(false);
168171

172+
169173
IndexFile.Flush();
170174
}
171175

src/WebExpress.WebIndex/Storage/IndexStorageFile.cs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,14 @@ public void Invalidation(IIndexStorageSegment segment)
135135
Buffer.Invalidation(segment);
136136
}
137137

138+
/// <summary>
139+
/// Performs cache invalidation for a all IndexStorageSegment object.
140+
/// </summary>
141+
public void InvalidationAll()
142+
{
143+
Buffer.InvalidationAll();
144+
}
145+
138146
/// <summary>
139147
/// Is called to free up resources.
140148
/// </summary>

src/WebExpress.WebIndex/Storage/IndexStorageReverse.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,8 @@ public void Delete(TIndexItem item, IEnumerable<IndexTermToken> terms)
168168
public void Clear()
169169
{
170170
IndexFile.NextFreeAddr = 0;
171+
IndexFile.InvalidationAll();
172+
IndexFile.Flush();
171173

172174
Header = new IndexStorageSegmentHeader(new IndexStorageContext(this)) { Identifier = _extentions, Version = (byte)_version };
173175
Allocator = new IndexStorageSegmentAllocatorReverseIndex(new IndexStorageContext(this));

src/WebExpress.WebIndex/Storage/IndexStorageSegmentBucket.cs

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -27,22 +27,6 @@ public IndexStorageSegmentBucket(IndexStorageContext context, ulong addr)
2727
{
2828
}
2929

30-
/// <summary>
31-
/// Initialization method for the hash map segment.
32-
/// </summary>
33-
/// <param name="initializationFromFile">If true, initializes from file. Otherwise, initializes and writes to file.</param>
34-
public virtual void Initialization(bool initializationFromFile)
35-
{
36-
if (initializationFromFile)
37-
{
38-
Context.IndexFile.Read(this);
39-
}
40-
else
41-
{
42-
Context.IndexFile.Write(this);
43-
}
44-
}
45-
4630
/// <summary>
4731
/// Reads the record from the storage medium.
4832
/// </summary>

src/WebExpress.WebIndex/Storage/IndexStorageSegmentHashMap.cs

Lines changed: 34 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ public class IndexStorageSegmentHashMap : IndexStorageSegment
1212
{
1313
private readonly uint _capacity;
1414
private readonly Lock _guard = new();
15+
private const int _bufferSize = 4096;
1516

1617
/// <summary>
1718
/// Returns the amount of space required on the storage device.
@@ -29,7 +30,7 @@ public class IndexStorageSegmentHashMap : IndexStorageSegment
2930
/// specific hash value. A bucket provides a concatenated list by recording the
3031
/// collisions (different keys with the same hash value).
3132
/// </summary>
32-
private IndexStorageSegmentBucket[] Buckets { get; set; }
33+
//private IndexStorageSegmentBucket[] Buckets { get; set; }
3334

3435
/// <summary>
3536
/// Returns all items.
@@ -38,16 +39,17 @@ public IEnumerable<IndexStorageSegmentItem> All
3839
{
3940
get
4041
{
41-
foreach (var bucket in Buckets)
42+
for (var i = 0u; i < BucketCount; i++)
4243
{
44+
var bucket = GetBucket(i);
4345
var addr = bucket.ItemAddr;
4446

4547
while (addr != 0)
4648
{
4749
var item = Context.IndexFile.Read<IndexStorageSegmentItem>(addr, Context);
48-
yield return item;
49-
5050
addr = item.SuccessorAddr;
51+
52+
yield return item;
5153
}
5254
}
5355
}
@@ -75,31 +77,25 @@ public virtual void Initialization(bool initializationFromFile)
7577
{
7678
Context.IndexFile.Read(this);
7779

78-
Buckets = new IndexStorageSegmentBucket[BucketCount];
79-
80-
var initalAddress = Context.IndexFile.Alloc(SegmentSize + BucketCount * IndexStorageSegmentBucket.SegmentSize);
81-
82-
for (uint i = 0; i < BucketCount; i++)
83-
{
84-
var addr = initalAddress + (i * IndexStorageSegmentBucket.SegmentSize);
85-
Buckets[i] = new IndexStorageSegmentBucket(Context, addr);
86-
Context.IndexFile.Read(Buckets[i]);
87-
}
80+
Context.IndexFile.Alloc(SegmentSize + BucketCount * IndexStorageSegmentBucket.SegmentSize);
8881
}
8982
else
9083
{
91-
Buckets = new IndexStorageSegmentBucket[BucketCount];
92-
9384
var initalAddress = Context.IndexFile.Alloc(SegmentSize + (BucketCount * IndexStorageSegmentBucket.SegmentSize));
85+
var zeroBuffer = new byte[_bufferSize];
86+
var totalBytes = BucketCount * IndexStorageSegmentBucket.SegmentSize;
87+
var bytesWritten = 0L;
88+
89+
Context.IndexFile.Write(this);
90+
Context.IndexFile.FileStream.Seek((long)initalAddress, SeekOrigin.Begin);
9491

95-
for (uint i = 0; i < BucketCount; i++)
92+
while (bytesWritten < totalBytes)
9693
{
97-
var addr = initalAddress + (i * IndexStorageSegmentBucket.SegmentSize);
98-
Buckets[i] = new IndexStorageSegmentBucket(Context, addr);
99-
Context.IndexFile.Write(Buckets[i]);
100-
}
94+
var bytesToWrite = Math.Min(_bufferSize, totalBytes - bytesWritten);
10195

102-
Context.IndexFile.Write(this);
96+
Context.IndexFile.FileStream.Write(zeroBuffer, 0, (int)bytesToWrite);
97+
bytesWritten += bytesToWrite;
98+
}
10399
}
104100
}
105101

@@ -111,7 +107,7 @@ public IndexStorageSegmentItem Add(IndexStorageSegmentItem segment)
111107
{
112108
var hash = segment.Id.GetHashCode();
113109
var index = (uint)hash % BucketCount;
114-
var bucket = Buckets[index];
110+
var bucket = GetBucket(index);
115111

116112
lock (_guard)
117113
{
@@ -181,7 +177,7 @@ public IEnumerable<IndexStorageSegmentItem> GetBucket(Guid id)
181177
{
182178
var hash = id.GetHashCode();
183179
var index = (uint)hash % BucketCount;
184-
var bucket = Buckets[index];
180+
var bucket = GetBucket(index);
185181

186182
if (bucket.ItemAddr == 0)
187183
{
@@ -210,7 +206,7 @@ public bool Remove(IndexStorageSegmentItem segment)
210206
{
211207
var hash = segment.Id.GetHashCode();
212208
var index = (uint)hash % BucketCount;
213-
var bucket = Buckets[index];
209+
var bucket = GetBucket(index);
214210

215211
lock (_guard)
216212
{
@@ -275,6 +271,19 @@ public override void Read(BinaryReader reader)
275271
BucketCount = reader.ReadUInt32();
276272
}
277273

274+
/// <summary>
275+
/// Returns the bucket at the specified index.
276+
/// </summary>
277+
/// <param name="index">The index of the bucket to retrieve.</param>
278+
/// <returns>The bucket at the specified index.</returns>
279+
private IndexStorageSegmentBucket GetBucket(uint index)
280+
{
281+
var addr = Addr + SegmentSize + (index * IndexStorageSegmentBucket.SegmentSize);
282+
var bucket = new IndexStorageSegmentBucket(Context, addr);
283+
284+
return Context.IndexFile.Read(bucket);
285+
}
286+
278287
/// <summary>
279288
/// Writes the record to the storage medium.
280289
/// </summary>

0 commit comments

Comments
 (0)