1+ // pkg/correlation/pattern_matcher.go
2+ package correlation
3+
4+ import (
5+ "regexp"
6+ "strings"
7+
8+ "github.com/CodeMonkeyCybersecurity/shells/pkg/types"
9+ )
10+
11+ // PatternMatcher extracts and matches patterns across findings
12+ type PatternMatcher struct {
13+ domainPatterns []DomainPattern
14+ endpointPatterns []EndpointPattern
15+ parameterPatterns []ParameterPattern
16+ }
17+
18+ // DomainPattern represents a pattern found in domain names
19+ type DomainPattern struct {
20+ Pattern string
21+ Regex * regexp.Regexp
22+ Type string
23+ Examples []string
24+ Confidence float64
25+ }
26+
27+ // EndpointPattern represents a pattern found in endpoints
28+ type EndpointPattern struct {
29+ Pattern string
30+ Regex * regexp.Regexp
31+ Type string
32+ Examples []string
33+ Confidence float64
34+ }
35+
36+ // ParameterPattern represents a pattern found in parameters
37+ type ParameterPattern struct {
38+ Pattern string
39+ Type string
40+ Examples []string
41+ Confidence float64
42+ }
43+
44+ // NewPatternMatcher creates a new pattern matcher
45+ func NewPatternMatcher () * PatternMatcher {
46+ return & PatternMatcher {
47+ domainPatterns : []DomainPattern {},
48+ endpointPatterns : []EndpointPattern {},
49+ parameterPatterns : []ParameterPattern {},
50+ }
51+ }
52+
53+ // ExtractDomainPatterns extracts domain patterns from findings
54+ func (pm * PatternMatcher ) ExtractDomainPatterns (findings []types.Finding ) []DomainPattern {
55+ var patterns []DomainPattern
56+ domainMap := make (map [string ][]string )
57+
58+ // Collect domains from findings
59+ for _ , finding := range findings {
60+ if domain , ok := finding .Metadata ["domain" ].(string ); ok {
61+ key := pm .identifyDomainPattern (domain )
62+ domainMap [key ] = append (domainMap [key ], domain )
63+ }
64+ }
65+
66+ // Create patterns from grouped domains
67+ for patternKey , domains := range domainMap {
68+ if len (domains ) >= 2 { // Only patterns with multiple examples
69+ pattern := DomainPattern {
70+ Pattern : patternKey ,
71+ Type : pm .classifyDomainPattern (patternKey ),
72+ Examples : domains ,
73+ Confidence : float64 (len (domains )) / 10.0 , // Simple confidence calculation
74+ }
75+ if pattern .Confidence > 1.0 {
76+ pattern .Confidence = 1.0
77+ }
78+ patterns = append (patterns , pattern )
79+ }
80+ }
81+
82+ return patterns
83+ }
84+
85+ // ExtractEndpointPatterns extracts endpoint patterns from findings
86+ func (pm * PatternMatcher ) ExtractEndpointPatterns (findings []types.Finding ) []EndpointPattern {
87+ var patterns []EndpointPattern
88+ endpointMap := make (map [string ][]string )
89+
90+ // Collect endpoints from findings
91+ for _ , finding := range findings {
92+ if endpoint , ok := finding .Metadata ["endpoint" ].(string ); ok {
93+ key := pm .identifyEndpointPattern (endpoint )
94+ endpointMap [key ] = append (endpointMap [key ], endpoint )
95+ }
96+ }
97+
98+ // Create patterns from grouped endpoints
99+ for patternKey , endpoints := range endpointMap {
100+ if len (endpoints ) >= 2 {
101+ pattern := EndpointPattern {
102+ Pattern : patternKey ,
103+ Type : pm .classifyEndpointPattern (patternKey ),
104+ Examples : endpoints ,
105+ Confidence : float64 (len (endpoints )) / 10.0 ,
106+ }
107+ if pattern .Confidence > 1.0 {
108+ pattern .Confidence = 1.0
109+ }
110+ patterns = append (patterns , pattern )
111+ }
112+ }
113+
114+ return patterns
115+ }
116+
117+ // ExtractParameterPatterns extracts parameter patterns from findings
118+ func (pm * PatternMatcher ) ExtractParameterPatterns (findings []types.Finding ) []ParameterPattern {
119+ var patterns []ParameterPattern
120+ paramMap := make (map [string ][]string )
121+
122+ // Collect parameters from findings
123+ for _ , finding := range findings {
124+ if params , ok := finding .Metadata ["parameters" ].([]string ); ok {
125+ for _ , param := range params {
126+ key := pm .identifyParameterPattern (param )
127+ paramMap [key ] = append (paramMap [key ], param )
128+ }
129+ }
130+ }
131+
132+ // Create patterns from grouped parameters
133+ for patternKey , params := range paramMap {
134+ if len (params ) >= 2 {
135+ pattern := ParameterPattern {
136+ Pattern : patternKey ,
137+ Type : pm .classifyParameterPattern (patternKey ),
138+ Examples : params ,
139+ Confidence : float64 (len (params )) / 10.0 ,
140+ }
141+ if pattern .Confidence > 1.0 {
142+ pattern .Confidence = 1.0
143+ }
144+ patterns = append (patterns , pattern )
145+ }
146+ }
147+
148+ return patterns
149+ }
150+
151+ // Helper methods for pattern identification
152+
153+ func (pm * PatternMatcher ) identifyDomainPattern (domain string ) string {
154+ // Simple pattern identification
155+ parts := strings .Split (domain , "." )
156+ if len (parts ) >= 2 {
157+ // Look for numbered patterns
158+ if matched , _ := regexp .MatchString (`\d+` , parts [0 ]); matched {
159+ return "numbered_subdomain"
160+ }
161+ // Look for environment patterns
162+ if matched , _ := regexp .MatchString (`(dev|test|stage|staging|prod|production)` , parts [0 ]); matched {
163+ return "environment_subdomain"
164+ }
165+ // Look for region patterns
166+ if matched , _ := regexp .MatchString (`(us|eu|asia|uk|au|ca)-?(east|west|north|south|central)?-?\d*` , parts [0 ]); matched {
167+ return "region_subdomain"
168+ }
169+ }
170+ return "generic_subdomain"
171+ }
172+
173+ func (pm * PatternMatcher ) identifyEndpointPattern (endpoint string ) string {
174+ // API patterns
175+ if matched , _ := regexp .MatchString (`/api/v\d+/` , endpoint ); matched {
176+ return "versioned_api"
177+ }
178+ if matched , _ := regexp .MatchString (`/admin/` , endpoint ); matched {
179+ return "admin_endpoint"
180+ }
181+ if matched , _ := regexp .MatchString (`/test/` , endpoint ); matched {
182+ return "test_endpoint"
183+ }
184+ return "generic_endpoint"
185+ }
186+
187+ func (pm * PatternMatcher ) identifyParameterPattern (param string ) string {
188+ // Common parameter patterns
189+ if matched , _ := regexp .MatchString (`(id|user_id|userId)` , param ); matched {
190+ return "id_parameter"
191+ }
192+ if matched , _ := regexp .MatchString (`(token|auth|authorization)` , param ); matched {
193+ return "auth_parameter"
194+ }
195+ if matched , _ := regexp .MatchString (`(debug|test|dev)` , param ); matched {
196+ return "debug_parameter"
197+ }
198+ return "generic_parameter"
199+ }
200+
201+ func (pm * PatternMatcher ) classifyDomainPattern (pattern string ) string {
202+ switch pattern {
203+ case "numbered_subdomain" :
204+ return "sequential"
205+ case "environment_subdomain" :
206+ return "environment"
207+ case "region_subdomain" :
208+ return "regional"
209+ default :
210+ return "unknown"
211+ }
212+ }
213+
214+ func (pm * PatternMatcher ) classifyEndpointPattern (pattern string ) string {
215+ switch pattern {
216+ case "versioned_api" :
217+ return "api_versioning"
218+ case "admin_endpoint" :
219+ return "administrative"
220+ case "test_endpoint" :
221+ return "testing"
222+ default :
223+ return "unknown"
224+ }
225+ }
226+
227+ func (pm * PatternMatcher ) classifyParameterPattern (pattern string ) string {
228+ switch pattern {
229+ case "id_parameter" :
230+ return "identifier"
231+ case "auth_parameter" :
232+ return "authentication"
233+ case "debug_parameter" :
234+ return "debugging"
235+ default :
236+ return "unknown"
237+ }
238+ }
0 commit comments