-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathPHCL-Extract.psm1
More file actions
211 lines (179 loc) · 9.23 KB
/
PHCL-Extract.psm1
File metadata and controls
211 lines (179 loc) · 9.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
#
# Script file for module 'PoSh-HipChatLogs'
#
# Generated by: Nial Francis
#
# Generated on: 18/10/2019
#
# This script contains search and nearby message extraction functions for searching by line or text
function Find-HipChatLog {
<#
.DESCRIPTION
Initiates a search for either:
- An array of terms, or
- A specific line
then proceeds to search and extract the target from the given file or files in a given directory.
The result is then passed to Format-HipChatLogAsChat to be displayed or saved to file in a human-readable format.
.SYNOPSIS
Searches HipChat logs for text or a specific line and displays it.
.PARAMETER Before
Can be combined with After to create a constrained range.
.PARAMETER After
Can be combined with Before to create a constrained range.
.PARAMETER Extract
When the input file is a direct message log, conversations from the same day that do not contain the search term can be returned.
This switch will exclude those from the results.
.EXAMPLE
Find-HipChatLog -FileName "C:\archive\rooms\1111\history.json" -Patterns '*find this text*' -Before '2017-02' -AuditDirectory 'C:\audit'
Searches FileName for the text "find this text" with wildcard matching.
The date comparison will also exclude matches on or after February 1, 2017.
The search hits and misses will be saved to disk inside AuditDirectory.
.EXAMPLE
Find-HipChatLog -Directory "C:\archive\rooms\1111\" -Patterns '*find this text*' -After '2017-02' -AuditDirectory 'C:\audit' -NoConsoleLog
Searches all files named history.json under Directory for the text "find this text" with wildcard matching.
The date comparison will also exclude matches on or before February 1, 2017.
The search hits and misses will be saved to disk inside AuditDirectory.
The results will not be printed to the console - this can increase performance for large searches.
.EXAMPLE
Find-HipChatLog -FileName "C:\archive\rooms\1111\history.json" -Line 3627 -Context 900 -LegacyFormat
Searches FileName for line 3627 and extracts 900 context lines (default: 1000) before and after.
It then prints the results to the console in the legacy text format.
.NOTES
The most useful way to parse the log files is if they are unchanged in the original archive export format from HipChat.
When searching by range, the input times will be your local by default. This makes searching for a specific message localised. You can specify the timezone or pass a [datetime] object.
#TODO Make date extraction a little better
When extracting results a whole day's messages will be returned - but the extraction 'day' is UTC, which is native to the HipChat log.
For example; in the +10 timezone, 9am will be the previous day. A message extracted at 9am will therefore include the past 23 hours and the next hour until 10am.
#>
[cmdletbinding(
DefaultParameterSetName='Search'
)]
Param(
[Parameter(ParameterSetName='Line',Mandatory = $true)][int]$Line,
[Parameter(ParameterSetName='Line')][int]$Context = 1000,
[Parameter(ParameterSetName='Search')][string]$Directory,
[Parameter(ParameterSetName='Search')][datetime]$Before,
[Parameter(ParameterSetName='Search')][datetime]$After,
[Parameter(ParameterSetName='Search')][datetime]$OnDate,
[Parameter(ParameterSetName='Line')]
[Parameter(ParameterSetName='Search',Mandatory = $true)]
[array]$Patterns,
[string]$FileName,
[string]$AuditDirectory,
[switch]$Highlight,
[switch]$LegacyFormat,
[switch]$NoConsoleLog,
[switch]$Extract
)
if ($NoConsoleLog -and (-not $AuditDirectory) -and (-not $LegacyFormat)) {
throw "Parameters specify no output destination"
}
if ($PSCmdlet.ParameterSetName -eq 'Line') {
Write-Verbose "Searching by line number"
ExtractHipChatLogLine
} else {
Write-Verbose "Searching for pattern in json format"
SearchHipChatLog
}
[GC]::Collect()
}
# Ensure only a log file or directory is specified
# If directory, recurse and select all history.json files
# Determine the type of date range provided
# Initiate the search
function SearchHipChatLog {
if ($FileName -and $Directory) { throw 'Only one of FileName or Directory parameters can be used' }
elseif (!($FileName -or $Directory)) { throw 'A log file or directory must be specified' }
elseif ($FileName) {
if (Test-Path -PathType Leaf $FileName) { $filelist = Get-Item $FileName }
} elseif (Test-Path -PathType Container $Directory) { $filelist = Get-ChildItem -File -Path $Directory -Recurse -Filter history.json }
if (!($filelist)) { throw 'Could not resolve the file descriptor provided' }
if ($After -and $Before) { $ds = 1; Write-Verbose "Searching for messages sent between $After and $Before" }
elseif ($After) { $ds = 2; Write-Verbose "Searching for messages sent after $After" }
elseif ($Before) { $ds = 3; Write-Verbose "Searching for messages sent before $Before" }
elseif ($OnDate) { $ds = 4; Write-Verbose "Searching for messages sent on $OnDate" }
foreach ($file in $filelist) {
SearchJsonForStrings -File $file
}
}
# Load file, deserialise
# Iterate through the seach terms and the object and use string comparison to find the terms
## For each hit, get the date, compare to the specified range
## If it's in range, extract all messages for that date
# Send each day's messages for output
function SearchJsonForStrings {
[cmdletbinding()]
Param(
$File
)
$rawdata = Get-Content -Raw $file.FullName
$parentdir = $File.Directory.Name
$contdatematch = 'Write-Verbose "[Skipping: result is outside date range]"; WriteAudit $parentdir "[$stssh - Skipping: result is outside date range $file]"; continue ExtractWholeDay'
try {
Add-Type -AssemblyName System.Web.Extensions
$jss = New-Object -TypeName System.Web.Script.Serialization.JavaScriptSerializer
$jss.MaxJsonLength = 1GB
$jsondata = $jss.Deserialize($rawdata, [System.Object])
Write-Verbose "Using .Net JavaScriptSerializer"
} catch [System.TypeLoadException] {
if ($PSVersionTable.PSEdition -eq 'Core') {
throw "This module uses the JavaScriptSerializer from System.Web.Extensions.dll for better performance with large files.
Try running under powershell.exe (not PowerShell Core). For more info see:
https://docs.microsoft.com/en-us/dotnet/api/system.web.script.serialization.javascriptserializer?view=netframework-4.8".replace("`t",'')
} else { throw $error[0] }
}
$days = @{}
$hits = $hilids = @()
foreach ($term in $Patterns) {
Write-Verbose "[Pattern iteration: $term]"
[array]$hits += $jsondata.Values | Where-Object { $_.message -like "$term" }
}
if (!$hits) { Write-Host "0 matches: $($file.FullName)"; WriteAudit 'Audit' ("[0 instances {0}: $($file.FullName)] " -f ($Patterns -join ',')); return }
else { Write-Host $hits.Count "matches: $($file.FullName)" }
:ExtractWholeDay foreach ($hit in $hits) {
if ($Highlight) { $hilids += $hit.id }
$sts = [datetime]$hit.timestamp.Split(' ')[0]
$stssh = $sts.ToUniversalTime().ToString("yyyy-MM-dd")
$vmsg = "$stssh : " + $hit.message.Substring(0,[System.Math]::Min($hit.message.Length, 99)).Replace("`n",'')
Write-Verbose $vmsg
if ( $days.Keys -contains $stssh ) { Write-Verbose "$stssh [Skipping: date already extracted]"; WriteAudit $parentdir "[$stssh - Skipping: date already extracted $file]"; continue }
switch ($ds) {
1 { if (($sts -lt $After) -or ($sts -gt $Before)) { Invoke-Expression -Command $contdatematch } }
2 { if ($sts -lt $After) { Invoke-Expression -Command $contdatematch } }
3 { if ($sts -gt $Before) { Invoke-Expression -Command $contdatematch } }
4 { if ($sts.ToString('yyyy-MM-dd') -ne $OnDate.ToString('yyyy-MM-dd')) { Invoke-Expression -Command $contdatematch } }
}
Write-Verbose "[Extracting date range]"
WriteAudit $parentdir ("[{0}: Extracting ID {1}: {2}]" -f $stssh, $hit.id, $vmsg)
$days[$stssh] = $jsondata | Where-Object { $_.Values.timestamp -like "$stssh*" }
}
foreach ($log in $days.GetEnumerator()) {
Format-HipChatLogAsChat -LogObject $log -HighlightIDs $hilids -Extract:$Extract.IsPresent -AuditDirectory $AuditDirectory -ParentDir $parentdir -NoConsoleLog:$NoConsoleLog.IsPresent -LegacyFormat:$LegacyFormat.IsPresent
}
}
function WriteAudit {
Param(
[string]$Log,
[parameter(ValueFromPipeline = $true)][string]$Data
)
if (!($AuditDirectory)) {return}
$lf = Join-Path $AuditDirectory ($log + '.log')
(Get-Date -Format 'yyyyMMdd-HHmmss: ').ToString() + $data | Out-File -Append $lf
}
# Read the file and select the correct lines
# Ensure the object is correctly encapsulated
# Send the object for output
function ExtractHipChatLogLine {
$first = [System.Math]::Max($line - $context,0)
$last = $line + $context
$rawdata = [System.IO.File]::ReadLines($FileName) | Select-Object -Index ($first..$last)
try {
$js = $rawdata.IndexOf(' {')
$je = [array]::LastIndexOf($rawdata,' },')
} catch [System.Management.Automation.RuntimeException] {
throw 'Invalid data read from file. Ensure the line number and context is correct.'
}
[string]$string = $rawdata[$js..$je]
[string]$json = '[' + $string.TrimEnd(",") + ']'
Format-HipChatLogAsChat -json $json -Highlight $Patterns -Extract:$Extract.IsPresent -ParentDir $parentdir -NoConsoleLog:$NoConsoleLog.IsPresent -LegacyFormat:$LegacyFormat.IsPresent
}