diff --git a/src/CodeClash.API/Controllers/Contests/ContestController.cs b/src/CodeClash.API/Controllers/Contests/ContestController.cs index 89aee9e..9f4429f 100644 --- a/src/CodeClash.API/Controllers/Contests/ContestController.cs +++ b/src/CodeClash.API/Controllers/Contests/ContestController.cs @@ -3,6 +3,7 @@ using CodeClash.Application.Contests.GetAllContests; using CodeClash.Application.Contests.GetContest; using CodeClash.Application.Contests.RegisterInContest; +using CodeClash.Application.Plagiarism.GetContestPlagiarismCases; using CodeClash.Domain.Premitives.Responses; using MediatR; using Microsoft.AspNetCore.Authorization; @@ -36,7 +37,7 @@ public async Task GetAllContests( var result = await sender.Send(new GetAllContestsQuery(), cancellationToken); return result.IsSuccess - ? Ok(result.Value) + ? Ok(result) : BadRequest(result.Error); } @@ -44,7 +45,7 @@ public async Task GetAllContests( [Authorize] [HttpPost] public async Task CreateContest( - CreateContestCommand command, + [FromBody] CreateContestCommand command, CancellationToken cancellationToken) { var result = await sender.Send(command, cancellationToken); @@ -69,11 +70,11 @@ public async Task RegisterInContest( : BadRequest(result); } - [HttpPost("{contestId:guid}/problems/{problemId:guid}")] [Authorize] + [HttpPost("{contestId:guid}/problems/{problemId:guid}")] public async Task AddProblem( - Guid contestId, - Guid problemId, + [FromRoute] Guid contestId, + [FromRoute] Guid problemId, CancellationToken cancellationToken) { var result = await sender.Send( @@ -97,4 +98,22 @@ public async Task AddProblem( _ => BadRequest(result) }; } + + [Authorize] + [HttpGet("{contestId:guid}/plagiarisms")] + [ProducesResponseType(typeof(GetContestPlagiarismCasesResponse), StatusCodes.Status200OK)] + [ProducesResponseType(StatusCodes.Status404NotFound)] + public async Task GetPlagiarismCases( + [FromRoute] Guid contestId, + [FromQuery] decimal threshold, + [FromQuery] List problemIds, + CancellationToken cancellationToken) + { + var query = new GetContestPlagiarismCasesQuery(contestId, threshold, problemIds); + var result = await sender.Send(query, cancellationToken); + + return result.IsSuccess + ? Ok(result) + : NotFound(result); + } } diff --git a/src/CodeClash.API/Controllers/Plagiarisms/PlagiarismController.cs b/src/CodeClash.API/Controllers/Plagiarisms/PlagiarismController.cs new file mode 100644 index 0000000..605d743 --- /dev/null +++ b/src/CodeClash.API/Controllers/Plagiarisms/PlagiarismController.cs @@ -0,0 +1,19 @@ +using CodeClash.Application.Abstractions.Plagiarism; +using CodeClash.Domain.Requests; +using Microsoft.AspNetCore.Mvc; + +namespace CodeClash.API.Controllers.Plagiarisms; + +[ApiController] +[Route("plagiarisms")] +public sealed class PlagiarismController( + IPlagiarismService plagiarismService) : ControllerBase +{ + [HttpPost("similarity")] + [ProducesResponseType(typeof(CodeSimilarityRequest), StatusCodes.Status200OK)] + public IActionResult GetSimilarity([FromBody] CodeSimilarityRequest request) + { + var similarity = plagiarismService.GetSimilarity(request.Code1, request.Code2); + return Ok(similarity); + } +} diff --git a/src/CodeClash.API/Extensions/SeedDataExtension.cs b/src/CodeClash.API/Extensions/SeedDataExtension.cs index 2d069c1..5db29be 100644 --- a/src/CodeClash.API/Extensions/SeedDataExtension.cs +++ b/src/CodeClash.API/Extensions/SeedDataExtension.cs @@ -205,7 +205,10 @@ private static async Task SeedElasticAsync( if (success) { - logger.LogInformation("Seeded {Count} problems into Elasticsearch.", documents.Count); + if (logger.IsEnabled(LogLevel.Information)) + { + logger.LogInformation("Seeded {Count} problems into Elasticsearch.", documents.Count); + } } else { diff --git a/src/CodeClash.Application/Abstractions/Plagiarism/IPlagiarismService.cs b/src/CodeClash.Application/Abstractions/Plagiarism/IPlagiarismService.cs new file mode 100644 index 0000000..3e33008 --- /dev/null +++ b/src/CodeClash.Application/Abstractions/Plagiarism/IPlagiarismService.cs @@ -0,0 +1,15 @@ +using CodeClash.Application.DTO; + +namespace CodeClash.Application.Abstractions.Plagiarism; + +public interface IPlagiarismService +{ + Task> GetPlagiarismCases( + Guid contestId, + List ProblemIds, + decimal threshold); + + decimal GetSimilarity( + string code1, + string code2); // for testing purposes +} diff --git a/src/CodeClash.Application/Behaviors/LoggingBehavior.cs b/src/CodeClash.Application/Behaviors/LoggingBehavior.cs index 30640aa..5781360 100644 --- a/src/CodeClash.Application/Behaviors/LoggingBehavior.cs +++ b/src/CodeClash.Application/Behaviors/LoggingBehavior.cs @@ -39,6 +39,7 @@ public async Task Handle( try { +#pragma warning disable CA1873 // Log command execution start logger.LogInformation("Executing request {Request}", name); diff --git a/src/CodeClash.Application/DTO/PlagiarismCaseDto.cs b/src/CodeClash.Application/DTO/PlagiarismCaseDto.cs new file mode 100644 index 0000000..aa84328 --- /dev/null +++ b/src/CodeClash.Application/DTO/PlagiarismCaseDto.cs @@ -0,0 +1,9 @@ +namespace CodeClash.Application.DTO; + +public sealed class PlagiarismCaseDto +{ + public SubmitDto FirstSubmission { get; set; } + public SubmitDto SecondSubmission { get; set; } + public decimal Similarity { get; set; } + public Guid ProblemId { get; set; } +} diff --git a/src/CodeClash.Application/DTO/SubmissionDto.cs b/src/CodeClash.Application/DTO/SubmissionDto.cs new file mode 100644 index 0000000..b8b4735 --- /dev/null +++ b/src/CodeClash.Application/DTO/SubmissionDto.cs @@ -0,0 +1,11 @@ +using CodeClash.Domain.Premitives; + +namespace CodeClash.Application.DTO; + +public sealed class SubmissionDto +{ + public string UserId { get; set; } + public string Code { get; set; } + public DateTime SubmissionDate { get; set; } + public Language Language { get; set; } +} diff --git a/src/CodeClash.Application/DTO/SubmitDto.cs b/src/CodeClash.Application/DTO/SubmitDto.cs new file mode 100644 index 0000000..6cc7290 --- /dev/null +++ b/src/CodeClash.Application/DTO/SubmitDto.cs @@ -0,0 +1,14 @@ +using CodeClash.Domain.Premitives; + +namespace CodeClash.Application.DTO; + +public sealed class SubmitDto +{ + public string UserId { get; set; } + public Guid ProblemId { get; set; } + public Guid? ContestId { get; set; } + public string Code { get; set; } + public Language Language { get; set; } + public DateTime SubmissionDate { get; set; } + public SubmissionResult Result { get; set; } +} diff --git a/src/CodeClash.Application/Mapping/PlagiarismMappings.cs b/src/CodeClash.Application/Mapping/PlagiarismMappings.cs new file mode 100644 index 0000000..54ae968 --- /dev/null +++ b/src/CodeClash.Application/Mapping/PlagiarismMappings.cs @@ -0,0 +1,18 @@ +using CodeClash.Application.DTO; +using CodeClash.Domain.Models.Submits; + +namespace CodeClash.Application.Mapping; + +public static class PlagiarismMappings +{ + public static SubmitDto ToDto(this Submit submit) => new() + { + UserId = submit.UserId, + ProblemId = submit.ProblemId, + ContestId = submit.ContestId, + Code = submit.Code, + Language = submit.Language, + SubmissionDate = submit.SubmissionDate, + Result = submit.Result + }; +} diff --git a/src/CodeClash.Application/Plagiarism/GetContestPlagiarismCases/GetContestPlagiarismCasesHandler.cs b/src/CodeClash.Application/Plagiarism/GetContestPlagiarismCases/GetContestPlagiarismCasesHandler.cs new file mode 100644 index 0000000..e909426 --- /dev/null +++ b/src/CodeClash.Application/Plagiarism/GetContestPlagiarismCases/GetContestPlagiarismCasesHandler.cs @@ -0,0 +1,25 @@ +using CodeClash.Application.Abstractions.Messaging; +using CodeClash.Application.Abstractions.Plagiarism; +using CodeClash.Domain.Premitives; + +namespace CodeClash.Application.Plagiarism.GetContestPlagiarismCases; + +internal sealed class GetContestPlagiarismCasesHandler( + IPlagiarismService plagiarismService) : IQueryHandler +{ + public async Task> Handle( + GetContestPlagiarismCasesQuery request, + CancellationToken cancellationToken) + { + var cases = (await plagiarismService.GetPlagiarismCases( + request.ContestId, + request.ProblemIds, + request.Threshold)).ToList(); + + return Result.Success(new GetContestPlagiarismCasesResponse( + request.ContestId, + request.Threshold, + request.ProblemIds, + cases)); + } +} diff --git a/src/CodeClash.Application/Plagiarism/GetContestPlagiarismCases/GetContestPlagiarismCasesQuery.cs b/src/CodeClash.Application/Plagiarism/GetContestPlagiarismCases/GetContestPlagiarismCasesQuery.cs new file mode 100644 index 0000000..0db6ec6 --- /dev/null +++ b/src/CodeClash.Application/Plagiarism/GetContestPlagiarismCases/GetContestPlagiarismCasesQuery.cs @@ -0,0 +1,8 @@ +using CodeClash.Application.Abstractions.Messaging; + +namespace CodeClash.Application.Plagiarism.GetContestPlagiarismCases; + +public sealed record GetContestPlagiarismCasesQuery( + Guid ContestId, + decimal Threshold, + List ProblemIds) : IQuery; diff --git a/src/CodeClash.Application/Plagiarism/GetContestPlagiarismCases/GetContestPlagiarismCasesResponse.cs b/src/CodeClash.Application/Plagiarism/GetContestPlagiarismCases/GetContestPlagiarismCasesResponse.cs new file mode 100644 index 0000000..f5bc87e --- /dev/null +++ b/src/CodeClash.Application/Plagiarism/GetContestPlagiarismCases/GetContestPlagiarismCasesResponse.cs @@ -0,0 +1,9 @@ +using CodeClash.Application.DTO; + +namespace CodeClash.Application.Plagiarism.GetContestPlagiarismCases; + +public sealed record GetContestPlagiarismCasesResponse( + Guid ContestId, + decimal Threshold, + List ProblemIds, + List Cases); diff --git a/src/CodeClash.Domain/Abstractions/ISubmissionRepository.cs b/src/CodeClash.Domain/Abstractions/ISubmissionRepository.cs index 5ac8fcf..f0a3c17 100644 --- a/src/CodeClash.Domain/Abstractions/ISubmissionRepository.cs +++ b/src/CodeClash.Domain/Abstractions/ISubmissionRepository.cs @@ -2,6 +2,7 @@ using CodeClash.Domain.Premitives; namespace CodeClash.Domain.Abstractions; + public interface ISubmissionRepository { Task GetByIdAsync(Guid id); @@ -17,4 +18,6 @@ public interface ISubmissionRepository Task> GetUserSubmissionsAsync(string userId); Task GetSubmissionIfAuthorized(string userId, Guid submissionId); + + Task> GetContestACSubmissionsByProblemIdsAsync(Guid contestId, List problemIds); } diff --git a/src/CodeClash.Domain/Requests/CodeSimilarityRequest.cs b/src/CodeClash.Domain/Requests/CodeSimilarityRequest.cs new file mode 100644 index 0000000..e6eea5f --- /dev/null +++ b/src/CodeClash.Domain/Requests/CodeSimilarityRequest.cs @@ -0,0 +1,7 @@ +namespace CodeClash.Domain.Requests; + +public sealed class CodeSimilarityRequest +{ + public string Code1 { get; set; } + public string Code2 { get; set; } +} diff --git a/src/CodeClash.Infrastructure/DependencyInjection.cs b/src/CodeClash.Infrastructure/DependencyInjection.cs index 3dc9893..1529b2e 100644 --- a/src/CodeClash.Infrastructure/DependencyInjection.cs +++ b/src/CodeClash.Infrastructure/DependencyInjection.cs @@ -7,6 +7,7 @@ using CodeClash.Application.Abstractions.Execution; using CodeClash.Application.Abstractions.File; using CodeClash.Application.Abstractions.Identity; +using CodeClash.Application.Abstractions.Plagiarism; using CodeClash.Application.Abstractions.Roles; using CodeClash.Application.Helpers; using CodeClash.Domain.Abstractions; @@ -26,6 +27,7 @@ using StackExchange.Redis; namespace CodeClash.Infrastructure; + public static class DependencyInjection { public static IServiceCollection AddInfrastructure( @@ -64,6 +66,8 @@ public static IServiceCollection AddInfrastructure( services.AddScoped(); + services.AddScoped(); + services.AddScoped(); services.AddScoped(); diff --git a/src/CodeClash.Infrastructure/Implementation/ElasticService.cs b/src/CodeClash.Infrastructure/Implementation/ElasticService.cs index e588d15..0e95474 100644 --- a/src/CodeClash.Infrastructure/Implementation/ElasticService.cs +++ b/src/CodeClash.Infrastructure/Implementation/ElasticService.cs @@ -7,6 +7,7 @@ using Nest; namespace CodeClash.Infrastructure.Implementation; + internal sealed class ElasticService : IElasticService { private readonly ElasticClient _client; @@ -98,6 +99,7 @@ private async Task EnsureIndexAsync(string indexName) where T : class } else { +#pragma warning disable CA1873 _logger.LogInformation("Index '{IndexName}' created successfully.", indexName); } } diff --git a/src/CodeClash.Infrastructure/Implementation/PlagiarismService.cs b/src/CodeClash.Infrastructure/Implementation/PlagiarismService.cs new file mode 100644 index 0000000..8ebea5f --- /dev/null +++ b/src/CodeClash.Infrastructure/Implementation/PlagiarismService.cs @@ -0,0 +1,270 @@ +using System.Text.RegularExpressions; +using CodeClash.Application.Abstractions.Plagiarism; +using CodeClash.Application.DTO; +using CodeClash.Application.Mapping; +using CodeClash.Domain.Abstractions; + +namespace CodeClash.Infrastructure.Implementation; + +/// +/// Detects potential plagiarism between accepted submissions +/// using N-Grams, FNV-1 hashing, Winnowing fingerprinting, +/// and Jaccard similarity. +/// +/// Reference: Fowler–Noll–Vo Hash Function specification. +/// https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function#FNV_hash_parameters +/// +internal sealed partial class PlagiarismService( + ISubmissionRepository submissionRepository) : IPlagiarismService +{ + // FNV-1 prime constant used for hashing. + private const uint FNV_PRIME = 16777619; + + // Initial offset basis for the FNV-1 hash algorithm. + private const uint OFFSET_BASIS = 2166136261; + + /// Size of each N-Gram token. + /// Ex: N = 6 => "abcdefghi" => "abcdef", "bcdefg", "cdefgh", "defghi". + private const uint N = 6; + + // Winnowing sliding window size. + // The minimum hash in each window becomes a fingerprint. + private const uint WINDOW_SIZE = 5; + + /// + /// Compares accepted submissions belonging to the same problem + /// and reports pairs whose similarity exceeds the given threshold. + /// + public async Task> GetPlagiarismCases( + Guid contestId, + List ProblemIds, + decimal threshold) + { + // Get all accepted submissions for the selected contest problems. + var submissions = await submissionRepository + .GetContestACSubmissionsByProblemIdsAsync(contestId, ProblemIds); + + var plagiarismCases = new List(); + + // Compare submissions only within the same problem. + var groups = submissions.GroupBy(s => s.ProblemId); + + foreach (var group in groups) + { + var subList = group.ToList(); + + // Compare every unique pair once. + // Example: A-B, A-C, B-C + for (var i = 0; i < subList.Count; i++) + { + for (int j = i + 1; j < subList.Count; j++) + { + // Ignore comparisons from the same user. + if (subList[i].UserId == subList[j].UserId) + { + continue; + } + + var similarity = CalculateJaccardSimilarity( + subList[i].Code, + subList[j].Code); + + // Record the pair if similarity exceeds threshold. + if (similarity >= threshold) + { + plagiarismCases.Add(new PlagiarismCaseDto + { + FirstSubmission = subList[i].ToDto(), + SecondSubmission = subList[j].ToDto(), + Similarity = similarity, + ProblemId = subList[i].ProblemId + }); + } + } + } + } + + return plagiarismCases; + } + + /// + /// Computes similarity percentage between two source codes. + /// The comparison is performed on Winnowing fingerprints + /// using the Jaccard similarity metric. + /// + private decimal CalculateJaccardSimilarity( + string code1, + string code2) + { + // Normalize code to reduce the impact of formatting differences. + code1 = PreProcess(code1); + code2 = PreProcess(code2); + + // Break source code into overlapping N-Grams. + var ngrams1 = GenerateN_Grams(code1); + var ngrams2 = GenerateN_Grams(code2); + + // Convert N-Grams into numeric hashes. + var hashes1 = HashN_grams(ngrams1); + var hashes2 = HashN_grams(ngrams2); + + // Select representative hashes using the Winnowing algorithm. + var fingerprints1 = GetFingerPrints(hashes1); + var fingerprints2 = GetFingerPrints(hashes2); + + // Jaccard = |Intersection| / |Union| + var intersection = fingerprints1.Intersect(fingerprints2).Count(); + var union = fingerprints1.Union(fingerprints2).Count(); + + return (decimal)intersection / union * 100; + } + + /// + /// Normalizes source code before comparison. + /// - Converts to lowercase. + /// - Removes comments. + /// - Removes whitespace. + /// + private static string PreProcess( + string code) + { + code = code.ToLower(); + + // Remove single-line and multi-line comments. + code = Regex.Replace( + code, + @"(//.*?$)|(/\*.*?\*/)", "", + RegexOptions.Multiline); + + // Remove all whitespace characters. + code = Regex.Replace(code, @"\s+", ""); + + return code; + } + + /// + /// Generates overlapping character N-Grams. + /// Example: + /// Input : "abcdef" + /// Output : ["abc", "bcd", "cde", "def"] + /// + private static List GenerateN_Grams( + string code) + { + var ngrams = new List(); + for (int i = 0; i < code.Length - N + 1; i++) + { + ngrams.Add(code.Substring(i, (int)N)); + } + return ngrams; + } + /// + /// Computes a 32-bit Fnv1a hash for an N-Gram. + /// + /// The hash converts a string into a compact numeric value, + /// allowing faster storage and comparison during plagiarism detection. + /// + /// Example: + /// "abc" -> 440920331 + /// + private static uint Fnv1a(string word) + { + // Start with the standard FNV offset basis. + uint hash = OFFSET_BASIS; + foreach (var c in word) + { + // Mix the current character into the hash value. + hash ^= c; + + // Multiply by the FNV prime to spread the bits + // and reduce the chance of collisions. + hash *= FNV_PRIME; + } + + // Return the final hash representing the entire string. + return hash; + } + + /// + /// Converts all generated N-Grams into FNV-1 hash values. + /// Working with numeric hashes is more efficient than + /// comparing raw string N-Grams. + /// + private static List HashN_grams(List ngrams) + { + var hashes = new List(); + foreach (var ngram in ngrams) + { + // Generate a numeric hash for the current N-Gram. + hashes.Add(Fnv1a(ngram)); + } + return hashes; + } + + /// + /// Applies the Winnowing algorithm. + /// Instead of comparing all hashes, Winnowing keeps only + /// representative hashes (fingerprints). + /// For each sliding window, the minimum hash is selected. + /// This dramatically reduces comparison cost while preserving + /// similarity detection accuracy. + /// + private static List GetFingerPrints(List hashs) + { + var fingerPrints = new List(); + + // Stores hashes in sorted order so the minimum hash + // can be retrieved in O(1) using window.Min. + var window = new SortedSet<(uint value, uint index)>(); + + // Tracks already selected fingerprints to avoid duplicates. + var count = new Dictionary<(uint, uint), uint>(); + + uint i = 0; + + // Build the initial window. + for (; i < WINDOW_SIZE; i++) + { + window.Add((hashs[(int)i], i)); + } + + // The minimum hash of the first window is the first fingerprint. + fingerPrints.Add(window.Min.value); + + count[window.Min] = 1; + + // Slide the window one hash at a time. + for (; i < hashs.Count; i++) + { + // Add new hash entering the window. + window.Add((hashs[(int)i], i)); + + // Remove hash leaving the window. + window.Remove((hashs[(int)(i - WINDOW_SIZE)], i - WINDOW_SIZE)); + + if (!count.TryGetValue(window.Min, out uint value)) + { + value = 0; + count[window.Min] = value; + } + + count[window.Min] = ++value; + + // Add a fingerprint only the first time it becomes + // the minimum hash of a window. + if (value == 1) + { + fingerPrints.Add(window.Min.value); + } + } + + return fingerPrints; + } + + public decimal GetSimilarity( + string code1, + string code2) + { + return CalculateJaccardSimilarity(code1, code2); + } +} diff --git a/src/CodeClash.Infrastructure/Repositories/SubmissionRepository.cs b/src/CodeClash.Infrastructure/Repositories/SubmissionRepository.cs index cc164a4..452b93b 100644 --- a/src/CodeClash.Infrastructure/Repositories/SubmissionRepository.cs +++ b/src/CodeClash.Infrastructure/Repositories/SubmissionRepository.cs @@ -4,6 +4,7 @@ using Microsoft.EntityFrameworkCore; namespace CodeClash.Infrastructure.Repositories; + internal sealed class SubmissionRepository : ISubmissionRepository { private readonly ApplicationDbContext _context; @@ -98,4 +99,15 @@ public async Task> GetUserSubmissionsAsync( return submission; } + + public async Task> GetContestACSubmissionsByProblemIdsAsync( + Guid contestId, + List problemIds) + { + return await _context.Submits + .Where(s => s.ContestId == contestId && + problemIds.Contains(s.ProblemId) && + s.Result == SubmissionResult.Accepted) + .ToListAsync(); + } }