Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,18 @@ repos:
- id: bandit
name: bandit
args: ["-c", ".bandit"]

- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.17.1
hooks:
- id: mypy
name: mypy
args: []
additional_dependencies:
[
"types-jsonschema",
"types-tqdm",
"types-tabulate",
"scipy-stubs",
"matplotlib", # There are no official stubs for matplotlib
]
14 changes: 10 additions & 4 deletions codebasin/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
# Copyright (C) 2019-2024 Intel Corporation
# SPDX-License-Identifier: BSD-3-Clause
from __future__ import annotations

import importlib.metadata
import os
import shlex
import typing
import warnings
from collections.abc import Iterable
from pathlib import Path
Expand Down Expand Up @@ -152,7 +155,7 @@ def __iter__(self):
yield from self.commands

@classmethod
def from_json(cls, instance: list):
def from_json(cls, instance: list) -> CompilationDatabase:
"""
Parameters
----------
Expand All @@ -174,7 +177,10 @@ def from_json(cls, instance: list):
return cls(commands)

@classmethod
def from_file(cls, filename: str | os.PathLike[str]):
def from_file(
cls,
filename: str | os.PathLike[str],
) -> CompilationDatabase:
"""
Parameters
---------
Expand All @@ -194,8 +200,8 @@ def from_file(cls, filename: str | os.PathLike[str]):
A CompilationDatbase corresponding to the provided JSON file.
"""
with open(filename) as f:
db = codebasin.util._load_json(f, schema_name="compiledb")
return CompilationDatabase.from_json(db)
db: object = codebasin.util._load_json(f, schema_name="compiledb")
return CompilationDatabase.from_json(typing.cast(list, db))


class CodeBase:
Expand Down
4 changes: 2 additions & 2 deletions codebasin/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,10 @@ def _help_string(*lines: str, is_long=False, is_last=False):

# argparse.HelpFormatter indents by 24 characters.
# We cannot override this directly, but can delete them with backspaces.
lines = ["\b" * 20 + x for x in lines]
modified_lines = ["\b" * 20 + x for x in lines]

# The additional space is required for argparse to respect newlines.
result += "\n".join(lines)
result += "\n".join(modified_lines)

if not is_last:
result += "\n "
Expand Down
24 changes: 13 additions & 11 deletions codebasin/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
Contains functions to build up a configuration dictionary,
defining a specific code base configuration.
"""
from __future__ import annotations

import argparse
import logging
Expand All @@ -12,16 +13,17 @@
import re
import string
import tomllib
from collections.abc import Sequence
from dataclasses import asdict, dataclass, field
from itertools import chain
from pathlib import Path
from typing import Self
from typing import Any

from codebasin import CompilationDatabase, util

log = logging.getLogger(__name__)

_compilers = None
_compilers = {}
Comment thread
Pennycook marked this conversation as resolved.


class _StoreSplitAction(argparse.Action):
Expand All @@ -45,9 +47,9 @@ def __call__(
self,
parser: argparse.ArgumentParser,
namespace: argparse.Namespace,
values: str,
option_string: str,
):
values: str | Sequence[Any] | None,
option_string: str | None = None,
) -> None:
if not isinstance(values, str):
raise TypeError("store_split expects string values")
split_values = values.split(self.sep)
Expand Down Expand Up @@ -84,9 +86,9 @@ def __call__(
self,
parser: argparse.ArgumentParser,
namespace: argparse.Namespace,
value: str,
option_string: str,
):
value: str | Sequence[Any] | None,
option_string: str | None = None,
) -> None:
if not isinstance(value, str):
raise TypeError("extend_match expects string value")
matches = re.findall(self.pattern, value)
Expand Down Expand Up @@ -118,7 +120,7 @@ class _CompilerMode:
include_files: list[str] = field(default_factory=list)

@classmethod
def from_toml(cls, toml: object) -> Self:
def from_toml(cls, toml: dict[str, Any]) -> _CompilerMode:
return _CompilerMode(**toml)


Expand All @@ -131,7 +133,7 @@ class _CompilerPass:
modes: list[str] = field(default_factory=list)

@classmethod
def from_toml(cls, toml: object) -> Self:
def from_toml(cls, toml: dict[str, Any]) -> _CompilerPass:
return _CompilerPass(**toml)


Expand All @@ -144,7 +146,7 @@ class _Compiler:
passes: dict[str, _CompilerPass] = field(default_factory=dict)

@classmethod
def from_toml(cls, toml: object) -> Self:
def from_toml(cls, toml: dict[str, Any]) -> _Compiler:
kwargs = toml.copy()
if "parser" in kwargs:
for option in kwargs["parser"]:
Expand Down
6 changes: 4 additions & 2 deletions codebasin/coverage/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,11 +128,13 @@ def _compute(args: argparse.Namespace):
with open(filename, "rb") as f:
digest = hashlib.file_digest(f, "sha512")

used_lines = []
unused_lines = []
used_lines: list[int] = []
unused_lines: list[int] = []
tree = state.get_tree(filename)
association = state.get_map(filename)
for node in [n for n in tree.walk() if isinstance(n, CodeNode)]:
if not node.lines:
continue
if association[node] == frozenset([]):
unused_lines.extend(node.lines)
else:
Expand Down
2 changes: 1 addition & 1 deletion codebasin/finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def get_setmap(self, codebase: CodeBase) -> dict[frozenset, int]:
dict[frozenset, int]
The number of lines associated with each platform set.
"""
setmap = collections.defaultdict(int)
setmap: dict[frozenset, int] = collections.defaultdict(int)
for fn in codebase:
# Don't count symlinks if their target is in the code base.
# The target will be counted separately.
Expand Down
2 changes: 1 addition & 1 deletion codebasin/preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -620,7 +620,7 @@ class CodeNode(Node):
end_line: int = field(default=-1, init=False)
num_lines: int = field(default=0, init=False)
source: str | None = field(default=None, init=False, repr=False)
lines: list[str] | None = field(
lines: list[int] | None = field(
Comment thread
Pennycook marked this conversation as resolved.
default_factory=list,
init=False,
repr=False,
Expand Down
40 changes: 20 additions & 20 deletions codebasin/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import sys
import warnings
from collections import defaultdict
from collections.abc import Iterable
from collections.abc import Sequence
from pathlib import Path
from typing import Self, TextIO

Expand Down Expand Up @@ -129,11 +129,11 @@ def average_coverage(
if len(platforms) == 0:
return float("nan")

total = sum([coverage(setmap, [p]) for p in platforms])
total = sum([coverage(setmap, {p}) for p in platforms])
return total / len(platforms)


def distance(setmap, p1, p2):
def distance(setmap, p1, p2) -> float:
"""
Compute distance between two platforms
"""
Expand All @@ -148,14 +148,14 @@ def distance(setmap, p1, p2):
return d


def divergence(setmap):
def divergence(setmap) -> float:
"""
Compute code divergence as defined by Harrell and Kitson
i.e. average of pair-wise distances between platform sets
"""
platforms = extract_platforms(setmap)

d = 0
d = 0.0
npairs = 0
for p1, p2 in it.combinations(platforms, 2):
d += distance(setmap, p1, p2)
Expand All @@ -166,14 +166,14 @@ def divergence(setmap):
return d / float(npairs)


def summary(setmap: defaultdict[str, int], stream: TextIO = sys.stdout):
def summary(setmap: dict[frozenset[str], int], stream: TextIO = sys.stdout):
"""
Produce a summary report for the platform set, including
a breakdown of SLOC per platform subset, code divergence, etc.

Parameters
----------
setmap: defaultdict[str, int]
setmap: dict[frozenset[str], int]
The setmap used to compute the summary report.

stream: TextIO, default: sys.stdout
Expand Down Expand Up @@ -214,7 +214,7 @@ def summary(setmap: defaultdict[str, int], stream: TextIO = sys.stdout):

def clustering(
output_name: str,
setmap: defaultdict[str, int],
setmap: dict[frozenset[str], int],
stream: TextIO = sys.stdout,
):
"""
Expand All @@ -225,7 +225,7 @@ def clustering(
output_name: str
The filename for the dendrogram.

setmap: defaultdict[str, int]
setmap: dict[frozenset[str], int]
The setmap used to compute the clustering statistics.

stream: TextIO, default: sys.stdout
Expand Down Expand Up @@ -313,7 +313,7 @@ def find_duplicates(codebase: CodeBase) -> list[set[Path]]:
A list of all sets of Paths with identical contents.
"""
# Search for possible matches using a hash, ignoring symlinks.
possible_matches = {}
possible_matches: dict[str, set] = {}
for path in codebase:
path = Path(path)
if path.is_symlink():
Expand Down Expand Up @@ -486,15 +486,15 @@ def is_symlink(self):
def _platforms_str(
self,
all_platforms: set[str],
labels: Iterable[str] = string.ascii_uppercase,
labels: Sequence[str] = string.ascii_uppercase,
) -> str:
"""
Parameters
----------
all_platforms: set[str]
The set of all platforms.

labels: Iterable[str], default: string.ascii_uppercase
labels: Sequence[str], default: string.ascii_uppercase
The labels to use in place of real platform names.

Returns
Expand Down Expand Up @@ -605,7 +605,7 @@ def __init__(self, rootdir: str | os.PathLike[str]):
def insert(
self,
filename: str | os.PathLike[str],
setmap: defaultdict[str, int],
setmap: dict[frozenset[str], int],
):
"""
Insert a new file into the tree, creating as many nodes as necessary.
Expand Down Expand Up @@ -653,7 +653,7 @@ def _print(
prefix: str = "",
connector: str = "",
fancy: bool = True,
levels: int = None,
levels: int | None = None,
):
"""
Recursive helper function to print all nodes in a FileTree.
Expand Down Expand Up @@ -740,7 +740,7 @@ def _print(

return lines

def write_to(self, stream: TextIO, levels: int = None):
def write_to(self, stream: TextIO, levels: int | None = None):
"""
Write the FileTree to the specified stream.

Expand All @@ -766,7 +766,7 @@ def files(
*,
stream: TextIO = sys.stdout,
prune: bool = False,
levels: int = None,
levels: int | None = None,
):
"""
Produce a file tree representing the code base.
Expand Down Expand Up @@ -796,7 +796,7 @@ def files(
# Build up a tree from the list of files.
tree = FileTree(codebase.directories[0])
for f in codebase:
setmap = defaultdict(int)
setmap: dict[frozenset[str], int] = defaultdict(int)
if state:
association = state.get_map(f)
for node in filter(
Expand Down Expand Up @@ -828,10 +828,10 @@ def files(
]
legend += ["[" + " | ".join(header) + "]"]
legend += [""]
legend = "\n".join(legend)
legend_string = "\n".join(legend)
if not stream.isatty():
legend = _strip_colors(legend)
print(legend, file=stream)
legend_string = _strip_colors(legend_string)
print(legend_string, file=stream)

# Print the tree.
tree.write_to(stream, levels=levels)
Loading