Skip to content

Commit 2691c49

Browse files
committed
Improve scan_for_virus robustness, avoid per-file DB lookups, fix typo, and add missing-resource test
Signed-off-by: dikshaa2909 <dikshadeware@gmail.com>
1 parent 807b070 commit 2691c49

File tree

2 files changed

+82
-10
lines changed

2 files changed

+82
-10
lines changed

scanpipe/pipes/clamav.py

Lines changed: 36 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,18 +20,22 @@
2020
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
2121
# Visit https://github.com/aboutcode-org/scancode.io for support and download.
2222

23+
import logging
2324
from pathlib import Path
2425

2526
from django.conf import settings
2627

2728
import clamd
2829

30+
logger = logging.getLogger(__name__)
31+
2932

3033
def scan_for_virus(project):
3134
"""
3235
Run a ClamAV scan to detect virus infection.
33-
Create one Project error message per found virus and store the detection data
34-
on the related codebase resource ``extra_data`` field.
36+
- Avoid crashes when ClamAV reports files not indexed in CodebaseResource.
37+
- Avoid per-file DB queries by preloading valid resource paths.
38+
- Record a project-level error for any detected virus.
3539
"""
3640
if settings.CLAMD_USE_TCP:
3741
clamd_socket = clamd.ClamdNetworkSocket(settings.CLAMD_TCP_ADDR)
@@ -43,17 +47,37 @@ def scan_for_virus(project):
4347
except clamd.ClamdError as e:
4448
raise Exception(f"Error with the ClamAV service: {e}")
4549

50+
# Preload all valid indexed resource paths
51+
valid_paths = set(project.codebaseresources.values_list("path", flat=True))
52+
53+
missing_resources = []
54+
4655
for resource_location, results in scan_response.items():
4756
status, reason = results
57+
58+
if status != "FOUND":
59+
continue
60+
4861
resource_path = Path(resource_location).relative_to(project.codebase_path)
62+
resource_path_str = str(resource_path)
63+
64+
if resource_path_str not in valid_paths:
65+
missing_resources.append(resource_path_str)
66+
logger.warning(
67+
"ClamAV detected virus in non-indexed file: %s",
68+
resource_path_str,
69+
)
70+
continue
71+
72+
resource = project.codebaseresources.filter(path=resource_path_str).first()
4973

50-
resource = project.codebaseresources.get(path=resource_path)
5174
virus_report = {
52-
"calmav": {
75+
"clamav": {
5376
"status": status,
5477
"reason": reason,
5578
}
5679
}
80+
5781
resource.update_extra_data({"virus_report": virus_report})
5882

5983
project.add_error(
@@ -62,6 +86,13 @@ def scan_for_virus(project):
6286
details={
6387
"status": status,
6488
"reason": reason,
65-
"resource_path": str(resource_path),
89+
"resource_path": resource_path_str,
6690
},
6791
)
92+
93+
if missing_resources:
94+
project.add_error(
95+
description="ClamAV detected virus in files not indexed in DB",
96+
model="ScanForVirus",
97+
details={"missing_resources": missing_resources},
98+
)

scanpipe/tests/pipes/test_clamav.py

Lines changed: 46 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,19 +37,31 @@ class ScanPipeClamAVPipesTest(TestCase):
3737
def test_scanpipe_pipes_clamav_scan_for_virus(self, mock_multiscan):
3838
project = Project.objects.create(name="project")
3939
r1 = make_resource_file(project=project, path="eicar.zip")
40-
r2 = make_resource_file(project=project, path="eicar.zip-extract/eicar.com")
40+
r2 = make_resource_file(
41+
project=project,
42+
path="eicar.zip-extract/eicar.com",
43+
)
4144

4245
mock_multiscan.return_value = {
4346
r1.location: ("FOUND", "Win.Test.EICAR_HDB-1"),
4447
r2.location: ("FOUND", "Win.Test.EICAR_HDB-1"),
4548
}
4649

4750
clamav.scan_for_virus(project)
51+
4852
self.assertEqual(2, len(project.projectmessages.all()))
53+
4954
error_message = project.projectmessages.all()[0]
5055
self.assertEqual("error", error_message.severity)
51-
self.assertEqual("Virus detected", error_message.description)
52-
self.assertEqual("ScanForVirus", error_message.model)
56+
self.assertEqual(
57+
"Virus detected",
58+
error_message.description,
59+
)
60+
self.assertEqual(
61+
"ScanForVirus",
62+
error_message.model,
63+
)
64+
5365
expected_details = {
5466
"reason": "Win.Test.EICAR_HDB-1",
5567
"status": "FOUND",
@@ -60,10 +72,39 @@ def test_scanpipe_pipes_clamav_scan_for_virus(self, mock_multiscan):
6072
resource1 = project.codebaseresources.first()
6173
expected_virus_report_extra_data = {
6274
"virus_report": {
63-
"calmav": {
75+
"clamav": {
6476
"status": "FOUND",
6577
"reason": "Win.Test.EICAR_HDB-1",
6678
}
6779
}
6880
}
69-
self.assertEqual(expected_virus_report_extra_data, resource1.extra_data)
81+
self.assertEqual(
82+
expected_virus_report_extra_data,
83+
resource1.extra_data,
84+
)
85+
86+
@mock.patch("clamd.ClamdNetworkSocket.multiscan")
87+
def test_scanpipe_pipes_clamav_missing_resource_does_not_crash(
88+
self, mock_multiscan
89+
):
90+
project = Project.objects.create(name="project")
91+
92+
r1 = make_resource_file(
93+
project=project,
94+
path="indexed.txt",
95+
)
96+
97+
mock_multiscan.return_value = {
98+
r1.location: ("FOUND", "Test.Virus"),
99+
str(project.codebase_path / "non_indexed.txt"): (
100+
"FOUND",
101+
"Test.Virus",
102+
),
103+
}
104+
105+
clamav.scan_for_virus(project)
106+
107+
self.assertEqual(
108+
2,
109+
len(project.projectmessages.all()),
110+
)

0 commit comments

Comments
 (0)