-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathLuceneWithS3.cs
More file actions
100 lines (93 loc) · 4.45 KB
/
LuceneWithS3.cs
File metadata and controls
100 lines (93 loc) · 4.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
using Microsoft.WindowsAzure.Storage;
using Microsoft.WindowsAzure.Storage.Blob;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Lucene.Net;
using Lucene.Net.Analysis;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.QueryParsers;
using Lucene.Net.Store;
using Lucene.Net.Analysis.Standard;
namespace SourceSearch
{
class Program
{
private const string bucketName = "ravi-rajamani-shared";
private const string keyName1 = "searchIndex";
private const string filePath = @"\Code\Index2";
private const string sourcePath = @"\code\API";
private static string storageConnectionString = Environment.GetEnvironmentVariable("storageconnectionstring");
static void Main(string[] args)
{
if (args.Count() != 1)
{
Console.WriteLine("Usage: SourceSearch <term>");
return;
}
try {
CloudStorageAccount storageAccount = null;
CloudBlobContainer cloudBlobContainer = null;
storageAccount = CloudStorageAccount.Parse(storageConnectionString);
CloudBlobClient cloudBlobClient = storageAccount.CreateCloudBlobClient();
cloudBlobContainer = cloudBlobClient.GetContainerReference(bucketName + Guid.NewGuid().ToString());
cloudBlobContainer.Create();
BlobContainerPermissions permissions = new BlobContainerPermissions
{
PublicAccess = BlobContainerPublicAccessType.Blob
};
cloudBlobContainer.SetPermissions(permissions);
var indexAt = SimpleFSDirectory.Open(new DirectoryInfo(@"C:\Code\Index2"));
var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
using (var indexer = new IndexWriter(
indexAt,
analyzer, true,
IndexWriter.MaxFieldLength.UNLIMITED))
{
var src = new DirectoryInfo(sourcePath);
var source = new SimpleFSDirectory(src);
src.EnumerateFiles("*.cs", SearchOption.AllDirectories).ToList()
.ForEach(x =>
{
using (var reader = File.OpenText(x.FullName))
{
var doc = new Document();
TeeSinkTokenFilter tfilter = new TeeSinkTokenFilter(new WhitespaceTokenizer(reader));
TeeSinkTokenFilter.SinkTokenStream sink = tfilter.NewSinkTokenStream();
TokenStream final = new LowerCaseFilter(tfilter);
doc.Add(new Field("contents", final));
doc.Add(new Field("title", x.FullName, Field.Store.YES, Field.Index.ANALYZED));
indexer.AddDocument(doc);
CloudBlockBlob cloudBlockBlob = cloudBlobContainer.GetBlockBlobReference(x.Name);
using (MemoryStream stream = new MemoryStream())
{
using (var writer = new StreamWriter(stream))
{
writer.Write(doc.ToString());
stream.Position = 0;
cloudBlockBlob.UploadFromStream(stream);
}
}
}
});
indexer.Optimize();
Console.WriteLine("Total number of files indexed : " + indexer.MaxDoc());
}
using (var reader = IndexReader.Open(indexAt, true))
{
var pos = reader.TermPositions(new Term("contents", args.First().ToLower()));
while (pos.Next())
{
Console.WriteLine("Match in document " + reader.Document(pos.Doc).GetValues("title").FirstOrDefault());
}
}
} catch (Exception e) {
Console.WriteLine(e);
}
}
}
}