Skip to content

Commit cd48474

Browse files
author
Per Kops
committed
fix(providers): update StyleCopAnalyzersProvider for GitHub page changes
- Fetch raw markdown from raw.githubusercontent.com instead of scraping HTML - Add GeneratedRegex patterns for parsing rule IDs and categories - Hardcode known rule file paths for reliable collection - Fix regex to match rule codes with letter suffixes (e.g., SX1309S)
1 parent b3bd1a6 commit cd48474

File tree

2 files changed

+91
-65
lines changed

2 files changed

+91
-65
lines changed
Lines changed: 82 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,18 @@
11
// ReSharper disable ForeachCanBeConvertedToQueryUsingAnotherGetEnumerator
22
namespace Atc.CodingRules.AnalyzerProviders.Providers;
33

4-
public class StyleCopAnalyzersProvider : AnalyzerProviderBase
4+
public partial class StyleCopAnalyzersProvider : AnalyzerProviderBase
55
{
66
private const int TableColumnId = 0;
77
private const int TableColumnTitle = 1;
88
private const int TableColumnDescription = 2;
99

10+
[GeneratedRegex(@"\[(?<code>[A-Z]+\d+[A-Z]*)\]\((?<link>[^)]+)\)", RegexOptions.ExplicitCapture, matchTimeoutMilliseconds: 1000)]
11+
private static partial Regex RuleIdRegex();
12+
13+
[GeneratedRegex(@"###\s+(?<category>.+)", RegexOptions.ExplicitCapture, matchTimeoutMilliseconds: 1000)]
14+
private static partial Regex CategoryRegex();
15+
1016
public StyleCopAnalyzersProvider(
1117
ILogger logger,
1218
bool logWithAnsiConsoleMarkup = false)
@@ -20,35 +26,34 @@ public StyleCopAnalyzersProvider(
2026

2127
public override Uri? DocumentationLink { get; set; } = new("https://github.com/DotNetAnalyzers/StyleCopAnalyzers/blob/master/DOCUMENTATION.md", UriKind.Absolute);
2228

29+
private static Uri RawContentBaseUri { get; } = new("https://raw.githubusercontent.com/DotNetAnalyzers/StyleCopAnalyzers/master/", UriKind.Absolute);
30+
2331
protected override AnalyzerProviderBaseRuleData CreateData()
2432
=> new(Name);
2533

2634
protected override async Task ReCollect(AnalyzerProviderBaseRuleData data)
2735
{
2836
ArgumentNullException.ThrowIfNull(data);
2937

30-
var web = new HtmlWeb();
31-
var htmlDoc = await web
32-
.LoadFromWebAsync(DocumentationLink!.AbsoluteUri)
33-
.ConfigureAwait(false);
34-
35-
var embeddedNode = htmlDoc.DocumentNode.SelectSingleNode("//script[@data-target='react-app.embeddedData']");
36-
if (embeddedNode is not null)
38+
var ruleFiles = new[]
3739
{
38-
var dynamicJson = new DynamicJson(embeddedNode.InnerText);
39-
var html = dynamicJson.GetValue("payload.blob.richText")?.ToString();
40-
41-
htmlDoc.LoadHtml(html);
42-
}
43-
44-
var articleNode = htmlDoc.DocumentNode.SelectNodes("//article[@class='markdown-body entry-content container-lg']")[0];
45-
var articleRuleLinks = articleNode
46-
.SelectNodes("//*//strong//a")
47-
.ToList();
48-
49-
foreach (var item in articleRuleLinks.Where(x => x.Attributes.Count == 1 && x.InnerText.Contains("(S", StringComparison.Ordinal)))
40+
"documentation/SpecialRules.md",
41+
"documentation/SpacingRules.md",
42+
"documentation/ReadabilityRules.md",
43+
"documentation/OrderingRules.md",
44+
"documentation/NamingRules.md",
45+
"documentation/MaintainabilityRules.md",
46+
"documentation/LayoutRules.md",
47+
"documentation/DocumentationRules.md",
48+
"documentation/AlternativeRules.md",
49+
};
50+
51+
using var httpClient = new HttpClient();
52+
foreach (var rulePath in ruleFiles)
5053
{
51-
var rules = await GetRules(item);
54+
var rules = await GetRulesFromMarkdown(
55+
rulePath,
56+
httpClient);
5257
foreach (var rule in rules)
5358
{
5459
data.Rules.Add(rule);
@@ -57,65 +62,48 @@ protected override async Task ReCollect(AnalyzerProviderBaseRuleData data)
5762
}
5863

5964
[SuppressMessage("Design", "MA0051:Method is too long", Justification = "OK.")]
60-
private static async Task<List<Rule>> GetRules(HtmlNode item)
65+
private static async Task<List<Rule>> GetRulesFromMarkdown(
66+
string rulePath,
67+
HttpClient httpClient)
6168
{
62-
var link = $"https://github.com{item.Attributes["href"].Value}";
63-
var web = new HtmlWeb();
64-
var htmlDoc = await web
65-
.LoadFromWebAsync(link)
69+
var linkUri = new Uri(RawContentBaseUri, rulePath);
70+
var markdown = await httpClient
71+
.GetStringAsync(linkUri)
6672
.ConfigureAwait(false);
6773

68-
var embeddedNode = htmlDoc.DocumentNode.SelectSingleNode("//script[@data-target='react-app.embeddedData']");
69-
if (embeddedNode is not null)
70-
{
71-
var dynamicJson = new DynamicJson(embeddedNode.InnerText);
72-
var html = dynamicJson.GetValue("payload.blob.richText")?.ToString();
73-
74-
htmlDoc.LoadHtml(html);
75-
}
76-
77-
var articleNode = htmlDoc.DocumentNode.SelectNodes("//article[@class='markdown-body entry-content container-lg']")[0];
78-
var articleTableRows = articleNode
79-
.SelectNodes("//*//table[1]//tr")
80-
.ToList();
81-
82-
var category = articleNode
83-
.Descendants("h3")
84-
.First().InnerText;
74+
var category = ExtractCategoryFromMarkdown(markdown);
75+
var tableRows = ExtractTableRowsFromMarkdown(markdown);
8576

8677
var i = category.IndexOf(" Rules", StringComparison.Ordinal);
8778
if (i > 0)
8879
{
89-
category = category.Substring(0, i);
80+
category = category[..i];
9081
}
9182

83+
var baseUrl = "/DotNetAnalyzers/StyleCopAnalyzers/blob/master/documentation/";
84+
9285
var rules = new List<Rule>();
93-
foreach (var row in articleTableRows)
86+
foreach (var row in tableRows)
9487
{
95-
if (row.SelectNodes("td") is null)
96-
{
97-
continue;
98-
}
99-
100-
var cells = row
101-
.SelectNodes("td")
102-
.ToList();
103-
104-
if (cells.Count <= 0)
88+
var columns = row.Split('|', StringSplitOptions.TrimEntries);
89+
if (columns.Length < 3)
10590
{
10691
continue;
10792
}
10893

109-
var aHrefNode = cells[TableColumnId].SelectSingleNode("a");
110-
if (aHrefNode is null)
94+
var idColumn = columns[TableColumnId];
95+
var match = RuleIdRegex().Match(idColumn);
96+
if (!match.Success)
11197
{
11298
continue;
11399
}
114100

115-
var code = aHrefNode.InnerText;
116-
var title = HtmlEntity.DeEntitize(cells[TableColumnTitle].InnerText).NormalizePascalCase();
117-
var helpLink = $"https://github.com{aHrefNode.Attributes["href"].Value}";
118-
var description = cells[TableColumnDescription].InnerText;
101+
var code = match.Groups["code"].Value;
102+
var relativeLink = match.Groups["link"].Value;
103+
var titleTrimmed = columns[TableColumnTitle].Trim();
104+
var title = titleTrimmed.NormalizePascalCase();
105+
var description = columns[TableColumnDescription].Trim();
106+
var helpLink = $"https://github.com{baseUrl}{relativeLink}";
119107

120108
rules.Add(
121109
new Rule(
@@ -128,4 +116,37 @@ private static async Task<List<Rule>> GetRules(HtmlNode item)
128116

129117
return rules;
130118
}
119+
120+
private static string ExtractCategoryFromMarkdown(string markdown)
121+
{
122+
var match = CategoryRegex().Match(markdown);
123+
return match.Success ? match.Groups["category"].Value.Trim() : "Unknown";
124+
}
125+
126+
private static List<string> ExtractTableRowsFromMarkdown(string markdown)
127+
{
128+
var lines = markdown.Split('\n');
129+
var tableRows = new List<string>();
130+
var inTable = false;
131+
132+
foreach (var line in lines)
133+
{
134+
var trimmedLine = line.Trim();
135+
if (trimmedLine.Contains('|', StringComparison.Ordinal))
136+
{
137+
inTable = true;
138+
if (!trimmedLine.Contains("---", StringComparison.Ordinal) &&
139+
!trimmedLine.Contains("Identifier", StringComparison.OrdinalIgnoreCase))
140+
{
141+
tableRows.Add(trimmedLine);
142+
}
143+
}
144+
else if (inTable && !string.IsNullOrWhiteSpace(trimmedLine))
145+
{
146+
break;
147+
}
148+
}
149+
150+
return tableRows;
151+
}
131152
}

test/Atc.CodingRules.AnalyzerProviders.Tests/Providers/StyleCopAnalyzersProviderTests.cs

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,16 @@ public sealed class StyleCopAnalyzersProviderTests
1111
public async Task CollectBaseRules(
1212
ProviderCollectingMode providerCollectingMode)
1313
{
14-
// Arrange
15-
var provider = new StyleCopAnalyzersProvider(NullLogger.Instance);
14+
AnalyzerProviderBaseRuleData? actual = null;
1615

17-
// Act
18-
var actual = await provider.CollectBaseRules(providerCollectingMode);
16+
await RetryHelper.ExecuteWithRetryAsync(async () =>
17+
{
18+
// Arrange
19+
var provider = new StyleCopAnalyzersProvider(NullLogger.Instance);
20+
21+
// Act
22+
actual = await provider.CollectBaseRules(providerCollectingMode);
23+
});
1924

2025
// Assert
2126
Assert.NotNull(actual);

0 commit comments

Comments
 (0)