Skip to content
Draft
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ Github = "https://github.com/ad-freiburg/qlever"

[project.scripts]
"qlever" = "qlever.qlever_main:main"
"qjena" = "qlever.qlever_main:main"
"qoxigraph" = "qlever.qlever_main:main"
"qlever-old" = "qlever.qlever_old:main"

[tool.setuptools]
license-files = ["LICENSE"]
Expand Down
40 changes: 40 additions & 0 deletions src/qjena/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Use an official OpenJDK runtime as a parent image
FROM openjdk:21-jdk-slim

RUN apt-get update && apt-get install -y wget unzip jq

WORKDIR /opt

# Download and extract Apache Jena Fuseki and Apache Jena using the latest version dynamically
RUN LATEST_VERSION=$(wget -qO- https://dlcdn.apache.org/jena/binaries/ \
| grep -oP 'apache-jena-\K[0-9]+\.[0-9]+\.[0-9]+' \
| sort -V \
| tail -n 1) \
&& wget https://dlcdn.apache.org/jena/binaries/apache-jena-fuseki-${LATEST_VERSION}.zip \
&& unzip apache-jena-fuseki-${LATEST_VERSION}.zip \
&& rm -f apache-jena-fuseki-${LATEST_VERSION}.zip \
&& wget https://dlcdn.apache.org/jena/binaries/apache-jena-${LATEST_VERSION}.zip \
&& unzip apache-jena-${LATEST_VERSION}.zip \
&& rm -f apache-jena-${LATEST_VERSION}.zip \
&& mv apache-jena-${LATEST_VERSION} /opt/apache-jena \
&& mv apache-jena-fuseki-${LATEST_VERSION} /opt/apache-jena-fuseki

# Set ownership to the user passed by UID and GID
ARG UID
ARG GID
RUN if [ "${UID:-}" != "" ] && [ "${GID:-}" != "" ]; then \
chown -R ${UID}:${GID} /opt; \
fi

# Ensure the bin and fuseki folders are in PATH
ENV JENA_HOME="/opt/apache-jena"
ENV FUSEKI_HOME="/opt/apache-jena-fuseki"
ENV PATH="${JENA_HOME}/bin:${FUSEKI_HOME}:${PATH}"

# Make sure scripts are executable
RUN chmod +x /opt/apache-jena/bin/*

RUN chmod +x /opt/apache-jena-fuseki/fuseki-server.jar

# Set entrypoint
CMD ["bash"]
Empty file added src/qjena/__init__.py
Empty file.
Empty file added src/qjena/commands/__init__.py
Empty file.
17 changes: 17 additions & 0 deletions src/qjena/commands/example_queries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from __future__ import annotations

from qlever.commands.example_queries import (
ExampleQueriesCommand as QleverExampleQueriesCommand,
)


class ExampleQueriesCommand(QleverExampleQueriesCommand):
def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
return {"data": ["name"], "server": ["host_name", "port"], "ui": ["ui_config"]}

def execute(self, args) -> bool:
if not args.sparql_endpoint:
args.sparql_endpoint = (
f"{args.host_name}:{args.port}/{args.name}/query"
)
return super().execute(args)
1 change: 1 addition & 0 deletions src/qjena/commands/extract_queries.py
1 change: 1 addition & 0 deletions src/qjena/commands/get_data.py
140 changes: 140 additions & 0 deletions src/qjena/commands/index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
from __future__ import annotations

import glob
import shlex
from pathlib import Path

from qlever.command import QleverCommand
from qlever.containerize import Containerize
from qlever.log import log
from qlever.util import binary_exists, run_command


class IndexCommand(QleverCommand):
def __init__(self):
self.script_name = "qjena"

def description(self) -> str:
return "Build the index for a given RDF dataset"

def should_have_qleverfile(self) -> bool:
return True

def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
return {
"data": ["name", "format"],
"index": ["input_files"],
"runtime": ["system", "image", "index_container"],
}

def additional_arguments(self, subparser):
subparser.add_argument(
"--index-binary",
type=str,
default="tdb2.xloader",
help=(
"The binary for building the index (default: tdb2.xloader) "
"(this requires that you have apache-jena installed "
"on your machine)"
),
)

@staticmethod
def build_image(build_cmd: str, system: str, image: str) -> bool:
try:
run_command(build_cmd, show_output=True)
return True
except Exception as e:
log.error(f"Building the {system} image {image} failed: {e}")
return False

@staticmethod
def wrap_cmd_in_container(args, cmd: str) -> str:
return Containerize().containerize_command(
cmd=cmd,
container_system=args.system,
run_subcommand="run --rm",
image_name=args.image,
container_name=args.index_container,
volumes=[("$(pwd)", "/opt/data")],
working_directory="/opt/data",
)

def execute(self, args) -> bool:
system = args.system
input_files = args.input_files

index_cmd = f"{args.index_binary} --loc index {input_files}"
index_cmd += f" | tee {args.name}.index-log.txt"

if args.system == "native":
cmd_to_show = index_cmd
else:
index_cmd = self.wrap_cmd_in_container(args, index_cmd)
dockerfile_dir = Path(__file__).parent.parent
dockerfile_path = dockerfile_dir / "Dockerfile"
build_cmd = (
f"{system} build -f {dockerfile_path} -t {args.image} --build-arg "
f"UID=$(id -u) --build-arg GID=$(id -g) {dockerfile_dir}"
)
image_id = run_command(
f"{system} images -q {args.image}", return_output=True
)
cmd_to_show = (
f"{build_cmd}\n\n{index_cmd}" if not image_id else index_cmd
)

# Show the command line.
self.show(cmd_to_show, only_show=args.show)
if args.show:
return True

# Check if all of the input files exist.
for pattern in shlex.split(input_files):
if len(glob.glob(pattern)) == 0:
log.error(f'No file matching "{pattern}" found')
log.info("")
log.info(
f"Did you call `{self.script_name} get-data`? If you did, "
"check GET_DATA_CMD and INPUT_FILES in the Qleverfile"
)
return False

# When running natively, check if the binary exists and works.
if args.system == "native":
if not binary_exists(args.index_binary, "index-binary"):
return False
else:
if Containerize().is_running(args.system, args.index_container):
log.info(
f"{args.system} container {args.index_container} is still up, "
"which means that data loading is in progress. Please wait..."
)
return False

if not image_id:
build_successful = self.build_image(
build_cmd, system, args.image
)
if not build_successful:
return False
else:
log.info(f"{args.image} image present on the system\n")

index_dir = Path("index/Data-0001")
if index_dir.exists() and any(index_dir.iterdir()):
log.error(
"Index files found in index/Data-0001 directory "
"which shows presence of a previous index\n"
)
log.info("Aborting the index operation...")
return False

# Run the index command.
try:
run_command(index_cmd, show_output=True)
except Exception as e:
log.error(f"Building the index failed: {e}")
return False

return True
1 change: 1 addition & 0 deletions src/qjena/commands/log.py
13 changes: 13 additions & 0 deletions src/qjena/commands/query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from __future__ import annotations

from qoxigraph.commands.query import QueryCommand as QoxigraphQueryCommand


class QueryCommand(QoxigraphQueryCommand):
def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
return {"data": ["name"], "server": ["port", "access_token"]}

def execute(self, args) -> bool:
if not args.sparql_endpoint:
args.sparql_endpoint = f"localhost:{args.port}/{args.name}/query"
super().execute(args)
14 changes: 14 additions & 0 deletions src/qjena/commands/setup_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from __future__ import annotations

from qoxigraph.commands.setup_config import (
SetupConfigCommand as QoxigraphSetupConfigCommand,
)


class SetupConfigCommand(QoxigraphSetupConfigCommand):
"""
Should behave exactly the same as setup-config command in qoxigraph,
just with a different Docker image name
"""

IMAGE = "adfreiburg/qjena"
Loading