11// ReSharper disable ForeachCanBeConvertedToQueryUsingAnotherGetEnumerator
22namespace Atc . CodingRules . AnalyzerProviders . Providers ;
33
4- public class StyleCopAnalyzersProvider : AnalyzerProviderBase
4+ public partial class StyleCopAnalyzersProvider : AnalyzerProviderBase
55{
66 private const int TableColumnId = 0 ;
77 private const int TableColumnTitle = 1 ;
88 private const int TableColumnDescription = 2 ;
99
10+ [ GeneratedRegex ( @"\[(?<code>[A-Z]+\d+[A-Z]*)\]\((?<link>[^)]+)\)" , RegexOptions . ExplicitCapture , matchTimeoutMilliseconds : 1000 ) ]
11+ private static partial Regex RuleIdRegex ( ) ;
12+
13+ [ GeneratedRegex ( @"###\s+(?<category>.+)" , RegexOptions . ExplicitCapture , matchTimeoutMilliseconds : 1000 ) ]
14+ private static partial Regex CategoryRegex ( ) ;
15+
1016 public StyleCopAnalyzersProvider (
1117 ILogger logger ,
1218 bool logWithAnsiConsoleMarkup = false )
@@ -20,35 +26,34 @@ public StyleCopAnalyzersProvider(
2026
2127 public override Uri ? DocumentationLink { get ; set ; } = new ( "https://github.com/DotNetAnalyzers/StyleCopAnalyzers/blob/master/DOCUMENTATION.md" , UriKind . Absolute ) ;
2228
29+ private static Uri RawContentBaseUri { get ; } = new ( "https://raw.githubusercontent.com/DotNetAnalyzers/StyleCopAnalyzers/master/" , UriKind . Absolute ) ;
30+
2331 protected override AnalyzerProviderBaseRuleData CreateData ( )
2432 => new ( Name ) ;
2533
2634 protected override async Task ReCollect ( AnalyzerProviderBaseRuleData data )
2735 {
2836 ArgumentNullException . ThrowIfNull ( data ) ;
2937
30- var web = new HtmlWeb ( ) ;
31- var htmlDoc = await web
32- . LoadFromWebAsync ( DocumentationLink ! . AbsoluteUri )
33- . ConfigureAwait ( false ) ;
34-
35- var embeddedNode = htmlDoc . DocumentNode . SelectSingleNode ( "//script[@data-target='react-app.embeddedData']" ) ;
36- if ( embeddedNode is not null )
38+ var ruleFiles = new [ ]
3739 {
38- var dynamicJson = new DynamicJson ( embeddedNode . InnerText ) ;
39- var html = dynamicJson . GetValue ( "payload.blob.richText" ) ? . ToString ( ) ;
40-
41- htmlDoc . LoadHtml ( html ) ;
42- }
43-
44- var articleNode = htmlDoc . DocumentNode . SelectNodes ( "//article[@class='markdown-body entry-content container-lg']" ) [ 0 ] ;
45- var articleRuleLinks = articleNode
46- . SelectNodes ( "//*//strong//a" )
47- . ToList ( ) ;
48-
49- foreach ( var item in articleRuleLinks . Where ( x => x . Attributes . Count == 1 && x . InnerText . Contains ( "(S" , StringComparison . Ordinal ) ) )
40+ "documentation/SpecialRules.md" ,
41+ "documentation/SpacingRules.md" ,
42+ "documentation/ReadabilityRules.md" ,
43+ "documentation/OrderingRules.md" ,
44+ "documentation/NamingRules.md" ,
45+ "documentation/MaintainabilityRules.md" ,
46+ "documentation/LayoutRules.md" ,
47+ "documentation/DocumentationRules.md" ,
48+ "documentation/AlternativeRules.md" ,
49+ } ;
50+
51+ using var httpClient = new HttpClient ( ) ;
52+ foreach ( var rulePath in ruleFiles )
5053 {
51- var rules = await GetRules ( item ) ;
54+ var rules = await GetRulesFromMarkdown (
55+ rulePath ,
56+ httpClient ) ;
5257 foreach ( var rule in rules )
5358 {
5459 data . Rules . Add ( rule ) ;
@@ -57,65 +62,48 @@ protected override async Task ReCollect(AnalyzerProviderBaseRuleData data)
5762 }
5863
5964 [ SuppressMessage ( "Design" , "MA0051:Method is too long" , Justification = "OK." ) ]
60- private static async Task < List < Rule > > GetRules ( HtmlNode item )
65+ private static async Task < List < Rule > > GetRulesFromMarkdown (
66+ string rulePath ,
67+ HttpClient httpClient )
6168 {
62- var link = $ "https://github.com{ item . Attributes [ "href" ] . Value } ";
63- var web = new HtmlWeb ( ) ;
64- var htmlDoc = await web
65- . LoadFromWebAsync ( link )
69+ var linkUri = new Uri ( RawContentBaseUri , rulePath ) ;
70+ var markdown = await httpClient
71+ . GetStringAsync ( linkUri )
6672 . ConfigureAwait ( false ) ;
6773
68- var embeddedNode = htmlDoc . DocumentNode . SelectSingleNode ( "//script[@data-target='react-app.embeddedData']" ) ;
69- if ( embeddedNode is not null )
70- {
71- var dynamicJson = new DynamicJson ( embeddedNode . InnerText ) ;
72- var html = dynamicJson . GetValue ( "payload.blob.richText" ) ? . ToString ( ) ;
73-
74- htmlDoc . LoadHtml ( html ) ;
75- }
76-
77- var articleNode = htmlDoc . DocumentNode . SelectNodes ( "//article[@class='markdown-body entry-content container-lg']" ) [ 0 ] ;
78- var articleTableRows = articleNode
79- . SelectNodes ( "//*//table[1]//tr" )
80- . ToList ( ) ;
81-
82- var category = articleNode
83- . Descendants ( "h3" )
84- . First ( ) . InnerText ;
74+ var category = ExtractCategoryFromMarkdown ( markdown ) ;
75+ var tableRows = ExtractTableRowsFromMarkdown ( markdown ) ;
8576
8677 var i = category . IndexOf ( " Rules" , StringComparison . Ordinal ) ;
8778 if ( i > 0 )
8879 {
89- category = category . Substring ( 0 , i ) ;
80+ category = category [ .. i ] ;
9081 }
9182
83+ var baseUrl = "/DotNetAnalyzers/StyleCopAnalyzers/blob/master/documentation/" ;
84+
9285 var rules = new List < Rule > ( ) ;
93- foreach ( var row in articleTableRows )
86+ foreach ( var row in tableRows )
9487 {
95- if ( row . SelectNodes ( "td" ) is null )
96- {
97- continue ;
98- }
99-
100- var cells = row
101- . SelectNodes ( "td" )
102- . ToList ( ) ;
103-
104- if ( cells . Count <= 0 )
88+ var columns = row . Split ( '|' , StringSplitOptions . TrimEntries ) ;
89+ if ( columns . Length < 3 )
10590 {
10691 continue ;
10792 }
10893
109- var aHrefNode = cells [ TableColumnId ] . SelectSingleNode ( "a" ) ;
110- if ( aHrefNode is null )
94+ var idColumn = columns [ TableColumnId ] ;
95+ var match = RuleIdRegex ( ) . Match ( idColumn ) ;
96+ if ( ! match . Success )
11197 {
11298 continue ;
11399 }
114100
115- var code = aHrefNode . InnerText ;
116- var title = HtmlEntity . DeEntitize ( cells [ TableColumnTitle ] . InnerText ) . NormalizePascalCase ( ) ;
117- var helpLink = $ "https://github.com{ aHrefNode . Attributes [ "href" ] . Value } ";
118- var description = cells [ TableColumnDescription ] . InnerText ;
101+ var code = match . Groups [ "code" ] . Value ;
102+ var relativeLink = match . Groups [ "link" ] . Value ;
103+ var titleTrimmed = columns [ TableColumnTitle ] . Trim ( ) ;
104+ var title = titleTrimmed . NormalizePascalCase ( ) ;
105+ var description = columns [ TableColumnDescription ] . Trim ( ) ;
106+ var helpLink = $ "https://github.com{ baseUrl } { relativeLink } ";
119107
120108 rules . Add (
121109 new Rule (
@@ -128,4 +116,37 @@ private static async Task<List<Rule>> GetRules(HtmlNode item)
128116
129117 return rules ;
130118 }
119+
120+ private static string ExtractCategoryFromMarkdown ( string markdown )
121+ {
122+ var match = CategoryRegex ( ) . Match ( markdown ) ;
123+ return match . Success ? match . Groups [ "category" ] . Value . Trim ( ) : "Unknown" ;
124+ }
125+
126+ private static List < string > ExtractTableRowsFromMarkdown ( string markdown )
127+ {
128+ var lines = markdown . Split ( '\n ' ) ;
129+ var tableRows = new List < string > ( ) ;
130+ var inTable = false ;
131+
132+ foreach ( var line in lines )
133+ {
134+ var trimmedLine = line . Trim ( ) ;
135+ if ( trimmedLine . Contains ( '|' , StringComparison . Ordinal ) )
136+ {
137+ inTable = true ;
138+ if ( ! trimmedLine . Contains ( "---" , StringComparison . Ordinal ) &&
139+ ! trimmedLine . Contains ( "Identifier" , StringComparison . OrdinalIgnoreCase ) )
140+ {
141+ tableRows . Add ( trimmedLine ) ;
142+ }
143+ }
144+ else if ( inTable && ! string . IsNullOrWhiteSpace ( trimmedLine ) )
145+ {
146+ break ;
147+ }
148+ }
149+
150+ return tableRows ;
151+ }
131152}
0 commit comments