66import logging
77
88import requests
9+ from problog import get_evaluatable
10+ from problog .logic import Term
11+ from problog .program import PrologString
912from sqlalchemy import ForeignKey , String
1013from sqlalchemy .orm import Mapped , mapped_column
1114
@@ -65,184 +68,6 @@ class MaliciousMetadataFacts(CheckFacts):
6568 }
6669
6770
68- # This list contains the heuristic analyzer classes
69- # When implementing new analyzer, appending the classes to this list
70- ANALYZERS : list = [
71- EmptyProjectLinkAnalyzer ,
72- SourceCodeRepoAnalyzer ,
73- OneReleaseAnalyzer ,
74- HighReleaseFrequencyAnalyzer ,
75- UnchangedReleaseAnalyzer ,
76- CloserReleaseJoinDateAnalyzer ,
77- SuspiciousSetupAnalyzer ,
78- WheelAbsenceAnalyzer ,
79- AnomalousVersionAnalyzer ,
80- ]
81-
82-
83- # The HeuristicResult sequence is aligned with the sequence of ANALYZERS list
84- SUSPICIOUS_COMBO : dict [
85- tuple [
86- HeuristicResult ,
87- HeuristicResult ,
88- HeuristicResult ,
89- HeuristicResult ,
90- HeuristicResult ,
91- HeuristicResult ,
92- HeuristicResult ,
93- HeuristicResult ,
94- HeuristicResult ,
95- ],
96- float ,
97- ] = {
98- (
99- HeuristicResult .FAIL , # Empty Project
100- HeuristicResult .SKIP , # Source Code Repo
101- HeuristicResult .FAIL , # One Release
102- HeuristicResult .SKIP , # High Release Frequency
103- HeuristicResult .SKIP , # Unchanged Release
104- HeuristicResult .FAIL , # Closer Release Join Date
105- HeuristicResult .FAIL , # Suspicious Setup
106- HeuristicResult .FAIL , # Wheel Absence
107- HeuristicResult .FAIL , # Anomalous Version
108- # No project link, only one release, and the maintainer released it shortly
109- # after account registration.
110- # The setup.py file contains suspicious imports and .whl file isn't present.
111- # Anomalous version has no effect.
112- ): Confidence .HIGH ,
113- (
114- HeuristicResult .FAIL , # Empty Project
115- HeuristicResult .SKIP , # Source Code Repo
116- HeuristicResult .FAIL , # One Release
117- HeuristicResult .SKIP , # High Release Frequency
118- HeuristicResult .SKIP , # Unchanged Release
119- HeuristicResult .FAIL , # Closer Release Join Date
120- HeuristicResult .FAIL , # Suspicious Setup
121- HeuristicResult .FAIL , # Wheel Absence
122- HeuristicResult .PASS , # Anomalous Version
123- # No project link, only one release, and the maintainer released it shortly
124- # after account registration.
125- # The setup.py file contains suspicious imports and .whl file isn't present.
126- # Anomalous version has no effect.
127- ): Confidence .HIGH ,
128- (
129- HeuristicResult .FAIL , # Empty Project
130- HeuristicResult .SKIP , # Source Code Repo
131- HeuristicResult .PASS , # One Release
132- HeuristicResult .FAIL , # High Release Frequency
133- HeuristicResult .FAIL , # Unchanged Release
134- HeuristicResult .FAIL , # Closer Release Join Date
135- HeuristicResult .FAIL , # Suspicious Setup
136- HeuristicResult .FAIL , # Wheel Absence
137- HeuristicResult .SKIP , # Anomalous Version
138- # No project link, frequent releases of multiple versions without modifying the content,
139- # and the maintainer released it shortly after account registration.
140- # The setup.py file contains suspicious imports and .whl file isn't present.
141- ): Confidence .HIGH ,
142- (
143- HeuristicResult .FAIL , # Empty Project
144- HeuristicResult .SKIP , # Source Code Repo
145- HeuristicResult .PASS , # One Release
146- HeuristicResult .FAIL , # High Release Frequency
147- HeuristicResult .PASS , # Unchanged Release
148- HeuristicResult .FAIL , # Closer Release Join Date
149- HeuristicResult .FAIL , # Suspicious Setup
150- HeuristicResult .FAIL , # Wheel Absence
151- HeuristicResult .SKIP , # Anomalous Version
152- # No project link, frequent releases of multiple versions,
153- # and the maintainer released it shortly after account registration.
154- # The setup.py file contains suspicious imports and .whl file isn't present.
155- ): Confidence .HIGH ,
156- (
157- HeuristicResult .FAIL , # Empty Project
158- HeuristicResult .SKIP , # Source Code Repo
159- HeuristicResult .PASS , # One Release
160- HeuristicResult .FAIL , # High Release Frequency
161- HeuristicResult .FAIL , # Unchanged Release
162- HeuristicResult .FAIL , # Closer Release Join Date
163- HeuristicResult .PASS , # Suspicious Setup
164- HeuristicResult .PASS , # Wheel Absence
165- HeuristicResult .SKIP , # Anomalous Version
166- # No project link, frequent releases of multiple versions without modifying the content,
167- # and the maintainer released it shortly after account registration. Presence/Absence of
168- # .whl file has no effect
169- ): Confidence .MEDIUM ,
170- (
171- HeuristicResult .FAIL , # Empty Project
172- HeuristicResult .SKIP , # Source Code Repo
173- HeuristicResult .PASS , # One Release
174- HeuristicResult .FAIL , # High Release Frequency
175- HeuristicResult .FAIL , # Unchanged Release
176- HeuristicResult .FAIL , # Closer Release Join Date
177- HeuristicResult .PASS , # Suspicious Setup
178- HeuristicResult .FAIL , # Wheel Absence
179- HeuristicResult .SKIP , # Anomalous Version
180- # No project link, frequent releases of multiple versions without modifying the content,
181- # and the maintainer released it shortly after account registration. Presence/Absence of
182- # .whl file has no effect
183- ): Confidence .MEDIUM ,
184- (
185- HeuristicResult .PASS , # Empty Project
186- HeuristicResult .FAIL , # Source Code Repo
187- HeuristicResult .PASS , # One Release
188- HeuristicResult .FAIL , # High Release Frequency
189- HeuristicResult .PASS , # Unchanged Release
190- HeuristicResult .FAIL , # Closer Release Join Date
191- HeuristicResult .FAIL , # Suspicious Setup
192- HeuristicResult .FAIL , # Wheel Absence
193- HeuristicResult .SKIP , # Anomalous Version
194- # No source code repo, frequent releases of multiple versions,
195- # and the maintainer released it shortly after account registration.
196- # The setup.py file contains suspicious imports and .whl file isn't present.
197- ): Confidence .HIGH ,
198- (
199- HeuristicResult .FAIL , # Empty Project
200- HeuristicResult .SKIP , # Source Code Repo
201- HeuristicResult .FAIL , # One Release
202- HeuristicResult .SKIP , # High Release Frequency
203- HeuristicResult .SKIP , # Unchanged Release
204- HeuristicResult .FAIL , # Closer Release Join Date
205- HeuristicResult .PASS , # Suspicious Setup
206- HeuristicResult .PASS , # Wheel Absence
207- HeuristicResult .FAIL , # Anomalous Version
208- # No project link, only one release, and the maintainer released it shortly
209- # after account registration.
210- # The setup.py file has no effect and .whl file is present.
211- # The version number is anomalous.
212- ): Confidence .MEDIUM ,
213- (
214- HeuristicResult .FAIL , # Empty Project
215- HeuristicResult .SKIP , # Source Code Repo
216- HeuristicResult .FAIL , # One Release
217- HeuristicResult .SKIP , # High Release Frequency
218- HeuristicResult .SKIP , # Unchanged Release
219- HeuristicResult .FAIL , # Closer Release Join Date
220- HeuristicResult .FAIL , # Suspicious Setup
221- HeuristicResult .PASS , # Wheel Absence
222- HeuristicResult .FAIL , # Anomalous Version
223- # No project link, only one release, and the maintainer released it shortly
224- # after account registration.
225- # The setup.py file has no effect and .whl file is present.
226- # The version number is anomalous.
227- ): Confidence .MEDIUM ,
228- (
229- HeuristicResult .FAIL , # Empty Project
230- HeuristicResult .SKIP , # Source Code Repo
231- HeuristicResult .FAIL , # One Release
232- HeuristicResult .SKIP , # High Release Frequency
233- HeuristicResult .SKIP , # Unchanged Release
234- HeuristicResult .FAIL , # Closer Release Join Date
235- HeuristicResult .SKIP , # Suspicious Setup
236- HeuristicResult .PASS , # Wheel Absence
237- HeuristicResult .FAIL , # Anomalous Version
238- # No project link, only one release, and the maintainer released it shortly
239- # after account registration.
240- # The setup.py file has no effect and .whl file is present.
241- # The version number is anomalous.
242- ): Confidence .MEDIUM ,
243- }
244-
245-
24671class DetectMaliciousMetadataCheck (BaseCheck ):
24772 """This check analyzes the metadata of a package for malicious behavior."""
24873
@@ -303,6 +128,41 @@ def validate_malware(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[boo
303128 is_malware , detail_info = sourcecode_analyzer .analyze ()
304129 return is_malware , detail_info
305130
131+ def evaluate_heuristic_results (self , heuristic_results : dict [Heuristics , HeuristicResult ]) -> float | None :
132+ """Analyse the heuristic results to determine the maliciousness of the package.
133+
134+ Parameters
135+ ----------
136+ heuristic_results: dict[Heuristics, HeuristicResult]
137+ Dictionary of Heuristic keys with HeuristicResult values, results of each heuristic scan.
138+
139+ Returns
140+ -------
141+ float | None
142+ Returns the confidence associated with the detected malicious combination, otherwise None if no associated
143+ malicious combination was triggered.
144+ """
145+ facts_list : list [str ] = []
146+ for heuristic , result in heuristic_results .items ():
147+ if result == HeuristicResult .SKIP :
148+ facts_list .append (f"0.0::{ heuristic .value } ." )
149+ elif result == HeuristicResult .PASS :
150+ facts_list .append (f"{ heuristic .value } :- true." )
151+ else : # HeuristicResult.FAIL
152+ facts_list .append (f"{ heuristic .value } :- false." )
153+
154+ facts = "\n " .join (facts_list )
155+ problog_code = f"{ facts } \n \n { self .malware_rules_problog_model } "
156+ logger .debug ("Problog model used for evaluation:\n %s" , problog_code )
157+
158+ problog_model = PrologString (problog_code )
159+ problog_results : dict [Term , float ] = get_evaluatable ().create_from (problog_model ).evaluate ()
160+
161+ confidence : float | None = problog_results .get (Term (self .problog_result_access ))
162+ if confidence == 0.0 :
163+ return None # no rules were triggered
164+ return confidence
165+
306166 def run_heuristics (
307167 self , pypi_package_json : PyPIPackageJsonAsset
308168 ) -> tuple [dict [Heuristics , HeuristicResult ], dict [str , JsonType ]]:
@@ -326,7 +186,7 @@ def run_heuristics(
326186 results : dict [Heuristics , HeuristicResult ] = {}
327187 detail_info : dict [str , JsonType ] = {}
328188
329- for _analyzer in ANALYZERS :
189+ for _analyzer in self . analyzers :
330190 analyzer : BaseHeuristicAnalyzer = _analyzer ()
331191 logger .debug ("Instantiating %s" , _analyzer .__name__ )
332192
@@ -418,8 +278,7 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
418278 except HeuristicAnalyzerValueError :
419279 return CheckResultData (result_tables = [], result_type = CheckResultType .UNKNOWN )
420280
421- result_combo : tuple = tuple (result .values ())
422- confidence : float | None = SUSPICIOUS_COMBO .get (result_combo , None )
281+ confidence = self .evaluate_heuristic_results (result )
423282 result_type = CheckResultType .FAILED
424283 if confidence is None :
425284 confidence = Confidence .HIGH
@@ -448,5 +307,66 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
448307 # Return UNKNOWN result for unsupported ecosystems.
449308 return CheckResultData (result_tables = [], result_type = CheckResultType .UNKNOWN )
450309
310+ # This list contains the heuristic analyzer classes
311+ # When implementing new analyzer, appending the classes to this list
312+ analyzers : list = [
313+ EmptyProjectLinkAnalyzer ,
314+ SourceCodeRepoAnalyzer ,
315+ OneReleaseAnalyzer ,
316+ HighReleaseFrequencyAnalyzer ,
317+ UnchangedReleaseAnalyzer ,
318+ CloserReleaseJoinDateAnalyzer ,
319+ SuspiciousSetupAnalyzer ,
320+ WheelAbsenceAnalyzer ,
321+ AnomalousVersionAnalyzer ,
322+ ]
323+
324+ problog_result_access = "result"
325+
326+ malware_rules_problog_model = f"""
327+ % Heuristic groupings
328+ % These are common combinations of heuristics that are used in many of the rules, thus themselves representing
329+ % certain behaviors. When changing or adding rules here, if there are frequent combinations of particular
330+ % heuristics, group them together here.
331+
332+ % Maintainer has recently joined, publishing an undetailed page with no links.
333+ quickUndetailed :- not { Heuristics .EMPTY_PROJECT_LINK .value } , not { Heuristics .CLOSER_RELEASE_JOIN_DATE .value } .
334+
335+ % Maintainer releases a suspicious setup.py and forces it to run by omitting a .whl file.
336+ forceSetup :- not { Heuristics .SUSPICIOUS_SETUP .value } , not { Heuristics .WHEEL_ABSENCE .value } .
337+
338+ % Suspicious Combinations
339+
340+ % Package released recently with little detail, forcing the setup.py to run.
341+ { Confidence .HIGH .value } ::high :- quickUndetailed, forceSetup, not { Heuristics .ONE_RELEASE .value } .
342+ { Confidence .HIGH .value } ::high :- quickUndetailed, forceSetup, not { Heuristics .HIGH_RELEASE_FREQUENCY .value } .
343+
344+ % Package released recently with little detail, with some more refined trust markers introduced: project links,
345+ % multiple different releases, but there is no source code repository matching it and the setup is suspicious.
346+ { Confidence .HIGH .value } ::high :- not { Heuristics .SOURCE_CODE_REPO .value } ,
347+ not { Heuristics .HIGH_RELEASE_FREQUENCY .value } ,
348+ not { Heuristics .CLOSER_RELEASE_JOIN_DATE .value } ,
349+ { Heuristics .UNCHANGED_RELEASE .value } ,
350+ forceSetup.
351+
352+ % Package released recently with little detail, with multiple releases as a trust marker, but frequent and with
353+ % the same code.
354+ { Confidence .MEDIUM .value } ::medium :- quickUndetailed,
355+ not { Heuristics .HIGH_RELEASE_FREQUENCY .value } ,
356+ not { Heuristics .UNCHANGED_RELEASE .value } ,
357+ { Heuristics .SUSPICIOUS_SETUP .value } .
358+
359+ % Package released recently with little detail and an anomalous version number for a single-release package.
360+ { Confidence .MEDIUM .value } ::medium :- quickUndetailed,
361+ not { Heuristics .ONE_RELEASE .value } ,
362+ { Heuristics .WHEEL_ABSENCE .value } ,
363+ not { Heuristics .ANOMALOUS_VERSION .value } .
364+
365+ { problog_result_access } :- high.
366+ { problog_result_access } :- medium.
367+
368+ query({ problog_result_access } ).
369+ """
370+
451371
452372registry .register (DetectMaliciousMetadataCheck ())
0 commit comments