-
Notifications
You must be signed in to change notification settings - Fork 112
Expand file tree
/
Copy pathKernelMemory.cs
More file actions
232 lines (200 loc) · 9.31 KB
/
KernelMemory.cs
File metadata and controls
232 lines (200 loc) · 9.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
using AutoMapper;
using DnsClient.Internal;
using Microsoft.GS.DPS.Images;
using Microsoft.GS.DPS.Model.KernelMemory;
using Microsoft.GS.DPS.Storage.Document;
using Microsoft.KernelMemory;
using Microsoft.KernelMemory.Context;
using Microsoft.KernelMemory.Pipeline;
using MongoDB.Bson;
using System;
using System.Collections.Generic;
using System.Data;
using System.Linq;
using System.Reflection;
using System.Text;
using System.Text.Json;
using System.Threading.Tasks;
using Document = Microsoft.GS.DPS.Storage.Document.Entities.Document;
using Microsoft.GS.DPS.API.UserInterface;
using Microsoft.GS.DPS.Storage.AISearch;
using static System.Runtime.InteropServices.JavaScript.JSType;
namespace Microsoft.GS.DPS.API
{
public class KernelMemory
{
private MemoryWebClient _kmClient;
private DocumentRepository _documentRepository;
private DataCacheManager _dataCache;
private TagUpdater _tagUpdator;
private static string keywordExtractorPrompt = "";
static KernelMemory()
{
//Set Location of the System Prompt under running Assembly directory location.
var assemblyLocation = Assembly.GetExecutingAssembly().Location;
var assemblyDirectory = System.IO.Path.GetDirectoryName(assemblyLocation);
// binding assembly directory with file path (Prompts/KeywordExtract_SystemPrompt.txt)
var systemPromptFilePath = System.IO.Path.Combine(assemblyDirectory, "Prompts", "KeywordExtract_SystemPrompt.txt");
KernelMemory.keywordExtractorPrompt = System.IO.File.ReadAllText(systemPromptFilePath);
}
public KernelMemory(MemoryWebClient kmClient, DocumentRepository documentRepository, DataCacheManager dataCache, TagUpdater tagUpdator)
{
_kmClient = kmClient;
_documentRepository = documentRepository;
_dataCache = dataCache;
_tagUpdator = tagUpdator;
}
public async Task<DocumentImportedResult> ImportDocument(Stream documentStream,
string fileName,
string contentType)
{
// Implementation of the file upload
var documentId = await _kmClient.ImportDocumentAsync(documentStream, fileName, steps: [
Constants.PipelineStepsExtract,
"keyword_extract",
Constants.PipelineStepsSummarize,
Constants.PipelineStepsPartition,
Constants.PipelineStepsGenEmbeddings,
Constants.PipelineStepsSaveRecords
]);
// Check the processing status of the document with Timeout 3mins
var startTime = DateTime.Now;
var elapsedTime = DateTime.Now - startTime;
// Set Timeout 60 mins - Document Processing Time
var timeout = TimeSpan.FromMinutes(60);
while (true)
{
var isReady = await _kmClient.IsDocumentReadyAsync(documentId);
if (isReady) break;
await Task.Delay(5000);
elapsedTime = DateTime.Now - startTime;
if (elapsedTime > timeout)
{
throw new TimeoutException("Document processing timeout");
}
}
var importedResult = new DocumentImportedResult
{
DocumentId = documentId,
ImportedTime = DateTime.UtcNow,
MimeType = contentType,
FileName = fileName,
ProcessingTime = elapsedTime,
Keywords = await getKeywords(documentId, fileName),
Summary = await getSummary(documentId, fileName)
};
// Save the document to the repository
Document document = new Document
{
DocumentId = documentId,
FileName = fileName,
ImportedTime = importedResult.ImportedTime,
MimeType = contentType,
ProcessingTime = importedResult.ProcessingTime,
Summary = importedResult.Summary,
Keywords = importedResult.Keywords
};
await _documentRepository.RegisterAsync(document);
//Cache Refresh
_dataCache.ManualRefresh();
return importedResult;
}
public async Task<bool> DeleteDocument(string documentId)
{
if (string.IsNullOrEmpty(documentId))
{
throw new ArgumentException("DocumentId is required");
}
// DeleteAsync the document from the repository
Document registeredDocument = await _documentRepository.FindByDocumentIdAsync(documentId);
//var document = registeredDocument.Results.FirstOrDefault();
if (registeredDocument != null) await _documentRepository.DeleteAsync(registeredDocument.id);
// DeleteAsync the document from the Kernel Memory
await _kmClient.DeleteDocumentAsync(documentId);
return true;
}
private async Task<string> getSummary(string documentId, string fileName)
{
// Summary file
var summaryFileName = $"{fileName}.summarize.0.txt";
// Download Summary file
var summaryFile = await _kmClient.ExportFileAsync(documentId, summaryFileName);
var summaryFileStream = await summaryFile.GetStreamAsync();
// Read Stream to string
return await new StreamReader(summaryFileStream).ReadToEndAsync();
}
private async Task<Dictionary<string, string>?> getKeywords(string documentId, string fileName)
{
// Get Keyword file
var keywordFileName = $"{fileName}.tags.json";
// Download Keyword file
var keywordFile = await _kmClient.ExportFileAsync(documentId, keywordFileName);
var keywordFileStream = await keywordFile.GetStreamAsync();
// Read Stream to string
string? keywordContent = await new StreamReader(keywordFileStream).ReadToEndAsync();
if (string.IsNullOrEmpty(keywordContent))
{
return new Dictionary<string,string>();
}else
{
// Read the keyword file then parse to KeyValuePair<string, string[]>
try
{
var result = JsonSerializer.Deserialize<List<Dictionary<string, List<string>>>>(keywordContent);
if (result.Count == 0)
{
//Just in case the document is large, get keywords via KM.
var answer = await _kmClient.AskAsync(question: KernelMemory.keywordExtractorPrompt, filters: new List<MemoryFilter> { new MemoryFilter().ByDocument(documentId) });
result = JsonSerializer.Deserialize<List<Dictionary<string, List<string>>>>(answer.Result);
var listKeyValueString = new List<string>();
foreach (var dict in result)
{
foreach (var kvp in dict)
{
foreach (var value in kvp.Value)
{
listKeyValueString.Add($"{kvp.Key.Trim()}:{value.Trim()}");
}
}
}
//Update Azure Search tags collection.
await _tagUpdator.UpdateTags(documentId, listKeyValueString);
}
//convert result to Dictionary<string, string>
var keywordDict = new Dictionary<string, string>();
foreach (var item in result)
{
foreach (var key in item.Keys)
{
keywordDict.Add(key, string.Join(", ", item[key]));
}
}
return keywordDict;
}
catch (Exception)
{
return new Dictionary<string, string>();
}
}
}
public async Task<MemoryAnswer> Ask(string question, string[] documents, ICollection<MemoryFilter>? filters = null, RequestContext? context = null)
{
ICollection<MemoryFilter>? memFilters = null;
if (documents.Length > 0)
{
memFilters = new List<MemoryFilter>();
foreach (var documentId in documents)
{
memFilters.Add(new MemoryFilter().ByDocument(documentId));
}
}
var answer = await _kmClient.AskAsync(question: question, filters: memFilters, context: context, minRelevance: 0.012);
return answer;
}
public async Task<StreamableFileContent> ExportFile(string documentId, string fileName)
{
var fileContent = await _kmClient.ExportFileAsync(documentId, fileName);
return fileContent;
}
}
}