Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,12 @@ public IObservable<FileSystemInfo> GetDirectoryScanner(DirectoryInfo root, Concu
return Task.CompletedTask;
}

PatternMatchingUtility.FilePatternMatcher fileIsMatch = null;
PatternMatchingUtility.CompiledMatcher fileIsMatch = null;
var patternsArray = filePatterns?.ToArray();

if (filePatterns == null || !filePatterns.Any())
if (patternsArray is { Length: > 0 })
{
fileIsMatch = span => true;
}
else
{
fileIsMatch = PatternMatchingUtility.GetFilePatternMatcher(filePatterns);
fileIsMatch = PatternMatchingUtility.Compile(patternsArray);
}

var sw = Stopwatch.StartNew();
Expand Down Expand Up @@ -100,7 +97,7 @@ public IObservable<FileSystemInfo> GetDirectoryScanner(DirectoryInfo root, Concu
{
ShouldIncludePredicate = (ref FileSystemEntry entry) =>
{
if (!entry.IsDirectory && fileIsMatch(entry.FileName))
if (!entry.IsDirectory && (fileIsMatch == null || fileIsMatch.IsMatch(entry.FileName)))
{
return true;
}
Expand Down Expand Up @@ -210,46 +207,26 @@ public void Initialize(DirectoryInfo root, ExcludeDirectoryPredicate directoryEx

public IObservable<FileSystemInfo> Subscribe(DirectoryInfo root, IEnumerable<string> patterns)
{
var patternArray = patterns.ToArray();

if (this.pendingScans.TryGetValue(root, out var scannerObservable))
{
this.logger.LogDebug("Logging patterns {Patterns} for {Root}", string.Join(":", patterns), root.FullName);

var inner = scannerObservable.Value.Where(fsi =>
{
if (fsi is FileInfo fi)
{
return this.MatchesAnyPattern(fi, patternArray);
}
else
{
return true;
}
});

return inner;
}

throw new InvalidOperationException("Subscribe called without initializing scanner");
var patternsArray = patterns as string[] ?? patterns.ToArray();
var compiled = PatternMatchingUtility.Compile(patternsArray);
return this.Subscribe(root, patternsArray, compiled);
}

public IObservable<ProcessRequest> GetFilteredComponentStreamObservable(DirectoryInfo root, IEnumerable<string> patterns, IComponentRecorder componentRecorder)
{
var observable = this.Subscribe(root, patterns).OfType<FileInfo>().SelectMany(f => patterns.Select(sp => new
{
SearchPattern = sp,
File = f,
})).Where(x =>
{
var searchPattern = x.SearchPattern;
var fileName = x.File.Name;
var patternsArray = patterns as string[] ?? patterns.ToArray();
var compiled = PatternMatchingUtility.Compile(patternsArray);

return this.pathUtilityService.MatchesPattern(searchPattern, fileName);
}).Where(x => x.File.Exists)
var observable = this.Subscribe(root, patternsArray, compiled).OfType<FileInfo>()
.Select(f => new
{
File = f,
MatchedPattern = compiled.GetMatchingPattern(f.Name),
})
.Where(x => x.MatchedPattern != null && x.File.Exists)
.Select(x =>
{
var lazyComponentStream = new LazyComponentStream(x.File, x.SearchPattern, this.logger);
var lazyComponentStream = new LazyComponentStream(x.File, x.MatchedPattern, this.logger);
return new ProcessRequest
{
ComponentStream = lazyComponentStream,
Expand Down Expand Up @@ -280,14 +257,31 @@ private FileSystemInfo Transform(ref FileSystemEntry entry)
return entry.ToFileSystemInfo();
}

private IObservable<FileSystemInfo> CreateDirectoryWalker(DirectoryInfo di, ExcludeDirectoryPredicate directoryExclusionPredicate, int minimumConnectionCount, IEnumerable<string> filePatterns)
private IObservable<FileSystemInfo> Subscribe(DirectoryInfo root, string[] patterns, PatternMatchingUtility.CompiledMatcher compiled)
{
return this.GetDirectoryScanner(di, new ConcurrentDictionary<string, bool>(), directoryExclusionPredicate, filePatterns, true).Replay() // Returns a replay subject which will republish anything found to new subscribers.
.AutoConnect(minimumConnectionCount); // Specifies that this connectable observable should start when minimumConnectionCount subscribe.
if (this.pendingScans.TryGetValue(root, out var scannerObservable))
{
this.logger.LogDebug("Logging patterns {Patterns} for {Root}", string.Join(":", patterns), root.FullName);

var inner = scannerObservable.Value.Where(fsi =>
{
if (fsi is FileInfo fi)
{
return compiled.IsMatch(fi.Name.AsSpan());
}

return true;
});

return inner;
}

throw new InvalidOperationException("Subscribe called without initializing scanner");
}

private bool MatchesAnyPattern(FileInfo fi, params string[] searchPatterns)
private IObservable<FileSystemInfo> CreateDirectoryWalker(DirectoryInfo di, ExcludeDirectoryPredicate directoryExclusionPredicate, int minimumConnectionCount, IEnumerable<string> filePatterns)
{
return searchPatterns != null && searchPatterns.Any(sp => this.pathUtilityService.MatchesPattern(sp, fi.Name));
return this.GetDirectoryScanner(di, new ConcurrentDictionary<string, bool>(), directoryExclusionPredicate, filePatterns, true).Replay() // Returns a replay subject which will republish anything found to new subscribers.
.AutoConnect(minimumConnectionCount); // Specifies that this connectable observable should start when minimumConnectionCount subscribe.
}
}
34 changes: 3 additions & 31 deletions src/Microsoft.ComponentDetection.Common/PathUtilityService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ namespace Microsoft.ComponentDetection.Common;

using System;
using System.IO;
using System.IO.Enumeration;
using Microsoft.ComponentDetection.Contracts;
using Microsoft.Extensions.Logging;

Expand All @@ -22,21 +21,6 @@ internal class PathUtilityService : IPathUtilityService

public PathUtilityService(ILogger<PathUtilityService> logger) => this.logger = logger;

public static bool MatchesPattern(string searchPattern, ref FileSystemEntry fse)
{
if (searchPattern.StartsWith('*') && fse.FileName.EndsWith(searchPattern.AsSpan()[1..], StringComparison.OrdinalIgnoreCase))
{
return true;
}

if (searchPattern.EndsWith('*') && fse.FileName.StartsWith(searchPattern.AsSpan()[..^1], StringComparison.OrdinalIgnoreCase))
{
return true;
}

return fse.FileName.Equals(searchPattern.AsSpan(), StringComparison.OrdinalIgnoreCase);
}

public string GetParentDirectory(string path) => Path.GetDirectoryName(path);

public bool IsFileBelowAnother(string aboveFilePath, string belowFilePath)
Expand All @@ -48,21 +32,6 @@ public bool IsFileBelowAnother(string aboveFilePath, string belowFilePath)
return (aboveDirectoryPath.Length != belowDirectoryPath.Length) && belowDirectoryPath.StartsWith(aboveDirectoryPath);
}

public bool MatchesPattern(string searchPattern, string fileName)
{
if (searchPattern.StartsWith('*') && fileName.EndsWith(searchPattern[1..], StringComparison.OrdinalIgnoreCase))
{
return true;
}

if (searchPattern.EndsWith('*') && fileName.StartsWith(searchPattern[..^1], StringComparison.OrdinalIgnoreCase))
{
return true;
}

return searchPattern.Equals(fileName, StringComparison.OrdinalIgnoreCase);
}

public string ResolvePhysicalPath(string path)
{
var directoryInfo = new DirectoryInfo(path);
Expand All @@ -75,6 +44,9 @@ public string ResolvePhysicalPath(string path)
return fileInfo.Exists ? this.ResolvePathFromInfo(fileInfo) : null;
}

[Obsolete("Use PatternMatchingUtility.MatchesPattern instead.")]
public bool MatchesPattern(string pattern, string fileName) => PatternMatchingUtility.MatchesPattern(pattern, fileName);

private string ResolvePathFromInfo(FileSystemInfo info) => info.LinkTarget ?? info.FullName;

public string NormalizePath(string path)
Expand Down
99 changes: 76 additions & 23 deletions src/Microsoft.ComponentDetection.Common/PatternMatchingUtility.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,38 +2,91 @@ namespace Microsoft.ComponentDetection.Common;

using System;
using System.Collections.Generic;
using System.IO.Enumeration;
using System.Linq;

public static class PatternMatchingUtility
{
public delegate bool FilePatternMatcher(ReadOnlySpan<char> span);
public static bool MatchesPattern(string pattern, string fileName)
{
ArgumentNullException.ThrowIfNull(pattern);
ArgumentNullException.ThrowIfNull(fileName);

return IsPatternMatch(pattern, fileName.AsSpan());
}

public static FilePatternMatcher GetFilePatternMatcher(IEnumerable<string> patterns)
/// <summary>
/// Returns the first matching pattern for <paramref name="fileName"/>.
/// Earlier patterns in <paramref name="patterns"/> have higher priority when multiple match.
/// </summary>
/// <returns>The first matching pattern, or <see langword="null"/> if no patterns match.</returns>
public static string? GetMatchingPattern(string fileName, IEnumerable<string> patterns)
{
var matchers = patterns.Select<string, FilePatternMatcher>(pattern => pattern switch
{
_ when pattern.StartsWith('*') && pattern.EndsWith('*') =>
pattern.Length <= 2
? _ => true
: span => span.Contains(pattern.AsSpan(1, pattern.Length - 2), StringComparison.Ordinal),
_ when pattern.StartsWith('*') =>
span => span.EndsWith(pattern.AsSpan(1), StringComparison.Ordinal),
_ when pattern.EndsWith('*') =>
span => span.StartsWith(pattern.AsSpan(0, pattern.Length - 1), StringComparison.Ordinal),
_ => span => span.Equals(pattern.AsSpan(), StringComparison.Ordinal),
}).ToList();

return span =>
ArgumentNullException.ThrowIfNull(fileName);
ArgumentNullException.ThrowIfNull(patterns);

return Compile(patterns).GetMatchingPattern(fileName.AsSpan());
}

public static CompiledMatcher Compile(IEnumerable<string> patterns)
{
ArgumentNullException.ThrowIfNull(patterns);
return patterns is string[] array ? Compile(array) : new(patterns.ToArray());
}

public static CompiledMatcher Compile(string[] patterns)
{
ArgumentNullException.ThrowIfNull(patterns);
return new(patterns);
}

private static string? GetFirstMatchingPattern(ReadOnlySpan<char> fileName, string[] patterns)
{
foreach (var pattern in patterns)
{
foreach (var matcher in matchers)
if (IsPatternMatch(pattern, fileName))
{
if (matcher(span))
{
return true;
}
return pattern;
}
}

return false;
};
return null;
}

private static bool IsPatternMatch(string pattern, ReadOnlySpan<char> fileName) =>
FileSystemName.MatchesSimpleExpression(pattern, fileName, ignoreCase: true);

public sealed class CompiledMatcher
{
private readonly string[] patterns;

public CompiledMatcher(IEnumerable<string> patterns)
: this(patterns is string[] arr ? arr : (patterns ?? throw new ArgumentNullException(nameof(patterns))).ToArray())
{
}

internal CompiledMatcher(string[] patterns)
{
ArgumentNullException.ThrowIfNull(patterns);
this.patterns = (string[])patterns.Clone();
ValidatePatternElements(this.patterns);
}

public bool IsMatch(ReadOnlySpan<char> fileName) => GetFirstMatchingPattern(fileName, this.patterns) is not null;

/// <summary>
/// Returns the first matching pattern for <paramref name="fileName"/>.
/// Earlier patterns in the compiled set have higher priority when multiple match.
/// </summary>
/// <returns>The first matching pattern, or <see langword="null"/> if no patterns match.</returns>
public string? GetMatchingPattern(ReadOnlySpan<char> fileName) => GetFirstMatchingPattern(fileName, this.patterns);

private static void ValidatePatternElements(string[] patterns)
{
foreach (var pattern in patterns)
{
ArgumentNullException.ThrowIfNull(pattern);
}
}
}
}
26 changes: 7 additions & 19 deletions src/Microsoft.ComponentDetection.Common/SafeFileEnumerable.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@ namespace Microsoft.ComponentDetection.Common;

public class SafeFileEnumerable : IEnumerable<MatchedFile>
{
private readonly IEnumerable<string> searchPatterns;
private readonly ExcludeDirectoryPredicate directoryExclusionPredicate;
private readonly DirectoryInfo directory;
private readonly IPathUtilityService pathUtilityService;
private readonly bool recursivelyScanDirectories;
private readonly Func<FileInfo, bool> fileMatchingPredicate;
private readonly PatternMatchingUtility.CompiledMatcher compiledMatcher;

private readonly EnumerationOptions enumerationOptions;

Expand All @@ -27,11 +27,11 @@ public SafeFileEnumerable(DirectoryInfo directory, IEnumerable<string> searchPat
{
this.directory = directory;
this.logger = logger;
this.searchPatterns = searchPatterns;
this.directoryExclusionPredicate = directoryExclusionPredicate;
this.recursivelyScanDirectories = recursivelyScanDirectories;
this.pathUtilityService = pathUtilityService;
this.enumeratedDirectories = previouslyEnumeratedDirectories;
this.compiledMatcher = PatternMatchingUtility.Compile(searchPatterns);

this.enumerationOptions = new EnumerationOptions()
{
Expand All @@ -58,14 +58,10 @@ public IEnumerator<MatchedFile> GetEnumerator()
throw new InvalidOperationException("Encountered directory when expecting a file");
}

var foundPattern = entry.FileName.ToString();
foreach (var searchPattern in this.searchPatterns)
{
if (PathUtilityService.MatchesPattern(searchPattern, ref entry))
{
foundPattern = searchPattern;
}
}
// Pattern priority is first-match-wins: earlier entries in searchPatterns
// are treated as higher priority when multiple patterns match.
var foundPattern = this.compiledMatcher.GetMatchingPattern(entry.FileName)
?? entry.FileName.ToString();
Comment on lines +61 to +64
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd mention it in the PR description alongside the case sensitivity note.


return new MatchedFile() { File = fi, Pattern = foundPattern };
},
Expand All @@ -78,15 +74,7 @@ public IEnumerator<MatchedFile> GetEnumerator()
return false;
}

foreach (var searchPattern in this.searchPatterns)
{
if (PathUtilityService.MatchesPattern(searchPattern, ref entry))
{
return true;
}
}

return false;
return this.compiledMatcher.IsMatch(entry.FileName);
},
ShouldRecursePredicate = (ref FileSystemEntry entry) =>
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,10 @@ public abstract class FileComponentDetector : IComponentDetector
/// <inheritdoc />
public abstract string Id { get; }

/// <summary> Gets the search patterns used to produce the list of valid folders to scan. These patterns are evaluated with .Net's Directory.EnumerateFiles function. </summary>
/// <summary>
/// Gets the search patterns used to produce the list of valid files to scan.
/// The first pattern that matches a given file will be used to determine how that file is processed, so more specific patterns should be listed before more general ones. Wildcards are accepted.
/// </summary>
public abstract IList<string> SearchPatterns { get; }

/// <summary>Gets the categories this detector is considered a member of. Used by the DetectorCategories arg to include detectors.</summary>
Expand Down
Loading
Loading