|
| 1 | +# |
| 2 | +# Copyright (c) nexB Inc. and others. All rights reserved. |
| 3 | +# ScanCode is a trademark of nexB Inc. |
| 4 | +# SPDX-License-Identifier: Apache-2.0 |
| 5 | +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. |
| 6 | +# See https://github.com/nexB/scancode-toolkit for support or download. |
| 7 | +# See https://aboutcode.org for more information about nexB OSS projects. |
| 8 | +# |
| 9 | + |
| 10 | +import logging |
| 11 | +import os |
| 12 | +import uuid |
| 13 | +import sys |
| 14 | + |
| 15 | +from fnmatch import fnmatchcase |
| 16 | + |
| 17 | +import attr |
| 18 | +import saneyaml |
| 19 | + |
| 20 | +from commoncode import filetype |
| 21 | +from commoncode.fileutils import as_posixpath |
| 22 | +from commoncode.datautils import choices |
| 23 | +from commoncode.datautils import Boolean |
| 24 | +from commoncode.datautils import Date |
| 25 | +from commoncode.datautils import Integer |
| 26 | +from commoncode.datautils import List |
| 27 | +from commoncode.datautils import Mapping |
| 28 | +from commoncode.datautils import String |
| 29 | +from commoncode.resource import Resource |
| 30 | +from license_expression import combine_expressions |
| 31 | +from license_expression import Licensing |
| 32 | +from packageurl import normalize_qualifiers |
| 33 | +from packageurl import PackageURL |
| 34 | + |
| 35 | +try: |
| 36 | + from typecode import contenttype |
| 37 | +except ImportError: |
| 38 | + contenttype = None |
| 39 | + |
| 40 | +try: |
| 41 | + from packagedcode import licensing |
| 42 | +except ImportError: |
| 43 | + licensing = None |
| 44 | + |
| 45 | +# FIXME: what if licensing is not importable? |
| 46 | +from packagedcode.licensing import get_declared_license_expression_spdx |
| 47 | + |
| 48 | +""" |
| 49 | +This module contain data models for package and dependencies, abstracting and |
| 50 | +normalizing the small differences that exist across different package types |
| 51 | +(aka. ecosystems), manifest file formats and tools. |
| 52 | + |
| 53 | +A package is a unit of code that is provisioned and installable. More commonly a |
| 54 | +package is stored in an archive and found in a package repository, though it can |
| 55 | +be as simple as a single file such as a script or may be stored in a VCS |
| 56 | +repository such as git. |
| 57 | + |
| 58 | +A package contains: |
| 59 | + |
| 60 | + - package information and metadata in some "manifest" file, |
| 61 | + - a payload such as code, documentation, or data. |
| 62 | + |
| 63 | + |
| 64 | +Structured package information come in three primary kinds: |
| 65 | + |
| 66 | +- "metadata" such as a name, version or description, |
| 67 | + |
| 68 | +- "dependencies" on other packages either potential with version requirements or |
| 69 | + resolved and locked with concrete versions), and |
| 70 | + |
| 71 | +- "build" and packaging scripts and instructions. |
| 72 | + |
| 73 | +Package types combine these in one or more manifest or script that we |
| 74 | +collectively call datafiles. For instance a Maven POM XML file contains combined |
| 75 | +metadata, dependencies and build instructions in an XML file while a pip |
| 76 | +requirements.txt file contains only dependencies. |
| 77 | + |
| 78 | +These package "data" files come in many different shapes: |
| 79 | + |
| 80 | +- Manifest files proper such as a Maven POM, NPM package.json and several others. |
| 81 | +- Dependency lockfiles such as pip requirements.txt or Go go.sum. |
| 82 | +- Build scripts such as Makefile. |
| 83 | +- Various structured or semi-structured metadata files in JSON, YAML or plain text |
| 84 | +- Property files that supplement manifests such as a pom.properties |
| 85 | +- Structured data headers or sections in binaries such as in an ELF, LKM or |
| 86 | + Windows PE; or the header of an RPM archive. |
| 87 | +- Code tags or conventional variables such JavaDoc tags or Python __copyright__ |
| 88 | + magic variables and variable in Yocto/Bitbake. |
| 89 | +- In JSON datafiles (or similar) fetched from registry or package repository APIs. |
| 90 | + |
| 91 | +We handle package information at two levels: |
| 92 | + |
| 93 | +- First, we parse manifests or lockfiles in a common package data model. |
| 94 | + |
| 95 | +- Second, we assemble lists of top-level Package and Dependency by aggregating |
| 96 | + the data from one or more parsed package datafiles. |
| 97 | + |
| 98 | +The key models defined here are: |
| 99 | + |
| 100 | +- PackageData: a class holding package data as parsed from a package datafile |
0 commit comments