Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 36 additions & 3 deletions rocrate_validator/cli/commands/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from rich.rule import Rule

from rocrate_validator.utils import log as logging
from rocrate_validator import services
from rocrate_validator import constants, services
from rocrate_validator.cli.commands.errors import handle_error
from rocrate_validator.cli.main import cli
from rocrate_validator.cli.ui.text.validate import ValidationCommandView
Expand Down Expand Up @@ -205,6 +205,29 @@ def validate_uri(ctx, param, value):
show_default=True,
help="Width of the output line",
)
@click.option(
'--cache-max-age',
type=click.INT,
default=constants.DEFAULT_HTTP_CACHE_MAX_AGE,
show_default=True,
help="Maximum age of the HTTP cache in seconds ([bold green]-1[/bold green] for no expiration)",
)
Comment on lines +208 to +214
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a way to set this so that the cache never expires? Set to -1?
(I see that -1 in the code but it should be documented if it's allowed here)

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes — the option is -1.

I’ve also added a note in the CLI help text to document that -1 is supported and means the cache never expires.

@click.option(
'--cache-path',
type=click.Path(),
default=None,
show_default=True,
help="Path to the HTTP cache directory",
)
@click.option(
'-nc',
'--no-cache',
is_flag=True,
help="Disable the HTTP cache",
default=False,
show_default=True,
hidden=True
)
@click.pass_context
def validate(ctx,
profiles_path: Path = DEFAULT_PROFILES_PATH,
Expand All @@ -223,7 +246,10 @@ def validate(ctx,
verbose: bool = False,
output_format: str = "text",
output_file: Optional[Path] = None,
output_line_width: Optional[int] = None):
output_line_width: Optional[int] = None,
cache_max_age: int = constants.DEFAULT_HTTP_CACHE_MAX_AGE,
cache_path: Optional[Path] = None,
no_cache: bool = False):
"""
[magenta]rocrate-validator:[/magenta] Validate a RO-Crate against a profile
"""
Expand All @@ -247,6 +273,11 @@ def validate(ctx,
logger.debug("fail_fast: %s", fail_fast)
logger.debug("no fail fast: %s", not fail_fast)

# Cache settings
logger.debug("cache_max_age: %s", cache_max_age)
logger.debug("cache_path: %s", os.path.abspath(cache_path) if cache_path else None)
logger.debug("no_cache: %s", no_cache)

if rocrate_uri:
logger.debug("rocrate_path: %s", os.path.abspath(rocrate_uri))

Expand Down Expand Up @@ -282,7 +313,9 @@ def validate(ctx,
"rocrate_relative_root_path": relative_root_path,
"abort_on_first": fail_fast,
"skip_checks": skip_checks_list,
"metadata_only": metadata_only
"metadata_only": metadata_only,
"cache_max_age": cache_max_age if not no_cache else -1,
"cache_path": cache_path
}

# Print the application header
Expand Down
2 changes: 1 addition & 1 deletion rocrate_validator/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,5 +87,5 @@
JSON_OUTPUT_FORMAT_VERSION = "0.2"

# Http Cache Settings
DEFAULT_HTTP_CACHE_TIMEOUT = 60
DEFAULT_HTTP_CACHE_MAX_AGE = 300 # in seconds
DEFAULT_HTTP_CACHE_PATH_PREFIX = '/tmp/rocrate_validator_cache'
20 changes: 15 additions & 5 deletions rocrate_validator/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@
import enum_tools
from rdflib import RDF, RDFS, Graph, Namespace, URIRef

from rocrate_validator.utils import log as logging
from rocrate_validator import __version__
from rocrate_validator.constants import (DEFAULT_ONTOLOGY_FILE,
from rocrate_validator.constants import (DEFAULT_HTTP_CACHE_MAX_AGE,
DEFAULT_ONTOLOGY_FILE,
DEFAULT_PROFILE_IDENTIFIER,
DEFAULT_PROFILE_README_FILE,
IGNORED_PROFILE_DIRECTORIES,
Expand All @@ -48,11 +48,13 @@
ROCrateMetadataNotFoundError)
from rocrate_validator.events import Event, EventType, Publisher, Subscriber
from rocrate_validator.rocrate import ROCrate
from rocrate_validator.utils.collections import (MapIndex)
from rocrate_validator.utils import log as logging
from rocrate_validator.utils.collections import MapIndex, MultiIndexMap
from rocrate_validator.utils.http import HttpRequester
from rocrate_validator.utils.paths import get_profiles_path
from rocrate_validator.utils.python_helpers import get_requirement_name_from_file
from rocrate_validator.utils.python_helpers import \
get_requirement_name_from_file
from rocrate_validator.utils.uri import URI
from rocrate_validator.utils.collections import MultiIndexMap

# set the default profiles path
DEFAULT_PROFILES_PATH = get_profiles_path()
Expand Down Expand Up @@ -2388,11 +2390,19 @@ class ValidationSettings:
metadata_dict: dict = None
#: Verbose output
verbose: bool = False
#: Cache max age in seconds
cache_max_age: Optional[int] = DEFAULT_HTTP_CACHE_MAX_AGE
#: Cache path
cache_path: Optional[Path] = None

def __post_init__(self):
# if requirement_severity is a str, convert to Severity
if isinstance(self.requirement_severity, str):
self.requirement_severity = Severity[self.requirement_severity]
# initialize the HTTP cache
HttpRequester.initialize_cache(cache_path=self.cache_path, cache_max_age=self.cache_max_age)
logger.debug("HTTP cache initialized at %s with max age %s seconds",
self.cache_path, self.cache_max_age)

def to_dict(self):
"""
Expand Down
71 changes: 57 additions & 14 deletions rocrate_validator/utils/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

import atexit
import os
import random
import string
import threading
from typing import Optional

import requests

Expand All @@ -34,8 +37,9 @@ class HttpRequester:
_instance = None
_lock = threading.Lock()

def __new__(cls):
def __new__(cls, *args, **kwargs) -> HttpRequester:
if cls._instance is None:
logger.debug(f"Creating instance of {cls.__name__} with args: {args}, kwargs: {kwargs}")
with cls._lock:
if cls._instance is None:
logger.debug(f"Creating instance of {cls.__name__}")
Expand All @@ -44,40 +48,59 @@ def __new__(cls):
logger.debug(f"Instance created: {cls._instance.__class__.__name__}")
return cls._instance

def __init__(self):
def __init__(self,
cache_max_age: int = constants.DEFAULT_HTTP_CACHE_MAX_AGE,
cache_path: Optional[str] = None):
logger.debug(f"Initializing instance of {self.__class__.__name__} {self}")
# check if the instance is already initialized
if not hasattr(self, "_initialized"):
# check if the instance is already initialized
with self._lock:
if not getattr(self, "_initialized", False):
# set the initialized flag
self._initialized = False
# store the parameters
try:
logger.debug(f"Setting cache_max_age to {cache_max_age}")
self.cache_max_age = int(cache_max_age)
except ValueError:
raise TypeError("cache_max_age must be an integer")
self.cache_path_prefix = cache_path
# flag to indicate if the cache is permanent or temporary
self.permanent_cache = cache_path is not None
# initialize the session
self.__initialize_session__()
self.__initialize_session__(cache_max_age, cache_path)
# set the initialized flag
self._initialized = True
else:
logger.debug(f"Instance of {self} already initialized")

def __initialize_session__(self):
def __initialize_session__(self, cache_max_age: int, cache_path: Optional[str] = None):
# initialize the session
self.session = None
logger.debug(f"Initializing instance of {self.__class__.__name__}")
assert not self._initialized, "Session already initialized"
# check if requests_cache is installed
# and set up the cached session
try:
if constants.DEFAULT_HTTP_CACHE_TIMEOUT > 0:
if cache_max_age >= 0:
from requests_cache import CachedSession

# Generate a random path for the cache
# to avoid conflicts with other instances
random_suffix = ''.join(random.choices(string.ascii_letters + string.digits, k=8))
# If cache_path is not provided, use the default path prefix
if not cache_path:
# Generate a random path for the cache
# to avoid conflicts with other instances
random_suffix = ''.join(random.choices(string.ascii_letters + string.digits, k=8))
cache_path = constants.DEFAULT_HTTP_CACHE_PATH_PREFIX + f"_{random_suffix}"
logger.debug(f"Using default cache path: {cache_path}")
else:
logger.debug(f"Using provided cache path: {cache_path}")
self.permanent_cache = True
# Initialize the session with a cache
self.session = CachedSession(
# Cache name with random suffix
cache_name=f"{constants.DEFAULT_HTTP_CACHE_PATH_PREFIX}_{random_suffix}",
expire_after=constants.DEFAULT_HTTP_CACHE_TIMEOUT, # Cache expiration time in seconds
cache_name=cache_path,
expire_after=cache_max_age, # Cache expiration time in seconds
backend='sqlite', # Use SQLite backend
allowable_methods=('GET',), # Cache GET
allowable_codes=(200, 302, 404) # Cache responses with these status codes
Expand All @@ -86,15 +109,23 @@ def __initialize_session__(self):
logger.warning("requests_cache is not installed. Using requests instead.")
except Exception as e:
logger.error("Error initializing requests_cache: %s", e)
logger.warning("Using requests instead of requests_cache")
# if requests_cache is not installed or an error occurred, use requests
# instead of requests_cache

# if requests_cache is not installed or an error occurred,
# use requests instead of requests_cache
# and create a new session
if not self.session:
logger.debug("Using requests instead of requests_cache")
logger.debug("Cache disabled: using requests instead of requests_cache")
self.session = requests.Session()

def __del__(self):
"""
Destructor to clean up the cache file used by CachedSession.
"""
logger.debug(f"Deleting instance of {self.__class__.__name__}")
if hasattr(self, "permanent_cache") and not self.permanent_cache:
self.cleanup()

def cleanup(self):
"""
Destructor to clean up the cache file used by CachedSession.
"""
Expand All @@ -119,3 +150,15 @@ def __getattr__(self, name):
if name.upper() in {"GET", "POST", "PUT", "DELETE", "HEAD", "OPTIONS", "PATCH"}:
return getattr(self.session, name.lower())
raise AttributeError(f"'{self.__class__.__name__}' object has no attribute '{name}'")

@classmethod
def initialize_cache(cls,
cache_max_age: int = constants.DEFAULT_HTTP_CACHE_MAX_AGE,
cache_path: Optional[str] = None) -> HttpRequester:
"""
Initialize the HttpRequester singleton with cache settings.

:param max_age: The maximum age of the cache in seconds.
:param cache_path: The path to the cache directory.
"""
return cls(cache_max_age=cache_max_age, cache_path=cache_path)
Loading