1+ #
2+ # Copyright (c) nexB Inc. and others. All rights reserved.
3+ # VulnerableCode is a trademark of nexB Inc.
4+ # SPDX-License-Identifier: Apache-2.0
5+ # See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+ # See https://github.com/aboutcode-org/vulnerablecode for support or download.
7+ # See https://aboutcode.org for more information about nexB OSS projects.
8+ #
9+ import requests
10+ from bs4 import BeautifulSoup
11+ from packageurl import PackageURL
12+
13+ from vulnerabilities .importer import AdvisoryData
14+ from vulnerabilities .importer import Importer
15+ from vulnerabilities .importer import VulnerabilityReference
16+
17+
18+ class LiferayImporter (Importer ):
19+ """
20+ Importer for Liferay advisories.
21+ """
22+ spdx_license_identifier = "CC-BY-SA-4.0" # License for Liferay's data
23+
24+ def fetch (self ):
25+ """
26+ Fetches the HTML content from the Liferay Known Vulnerabilities page.
27+ """
28+ url = "https://liferay.dev/portal/security/known-vulnerabilities"
29+ response = requests .get (url )
30+ response .raise_for_status ()
31+ return response .text
32+
33+ def parse (self , html ):
34+ """
35+ Parses the fetched HTML and extracts vulnerability data.
36+ Returns a list of AdvisoryData objects.
37+ """
38+ soup = BeautifulSoup (html , "html.parser" )
39+ advisories = []
40+
41+ # Locate the table. (Adjust the selector if the page structure changes.)
42+ table = soup .find ("table" )
43+ if not table :
44+ return advisories
45+
46+ # Iterate over each row in the table body.
47+ tbody = table .find ("tbody" )
48+ if not tbody :
49+ return advisories
50+
51+ for row in tbody .find_all ("tr" ):
52+ cells = row .find_all ("td" )
53+ if len (cells ) < 5 :
54+ continue
55+
56+ # Extract each field by cell order.
57+ vulnerability_id = cells [0 ].get_text (strip = True )
58+ affected_versions = cells [1 ].get_text (strip = True )
59+ description = cells [2 ].get_text (strip = True )
60+ severity = cells [3 ].get_text (strip = True )
61+
62+ # Extract references – there may be multiple links in the cell.
63+ references = []
64+ for a in cells [4 ].find_all ("a" , href = True ):
65+ ref_url = a ["href" ].strip ()
66+ if ref_url :
67+ references .append (VulnerabilityReference (url = ref_url ))
68+
69+ # Create PackageURL objects for affected versions.
70+ affected_packages = []
71+ for version in affected_versions .split ("," ):
72+ version = version .strip ()
73+ if version :
74+ affected_packages .append (
75+ PackageURL (
76+ type = "liferay" ,
77+ name = "liferay-portal" ,
78+ version = version ,
79+ )
80+ )
81+
82+ # Create an AdvisoryData object.
83+ advisories .append (
84+ AdvisoryData (
85+ aliases = [vulnerability_id ],
86+ summary = description ,
87+ affected_packages = affected_packages ,
88+ references = references ,
89+ severity = severity ,
90+ )
91+ )
92+
93+ return advisories
94+
95+ def advisory_data (self ):
96+ """
97+ Fetches and parses the data, returning a list of AdvisoryData objects.
98+ """
99+ html = self .fetch ()
100+ return self .parse (html )
0 commit comments