Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ Github = "https://github.com/ad-freiburg/qlever"

[project.scripts]
"qlever" = "qlever.qlever_main:main"
"qmdb" = "qlever.qlever_main:main"
"qoxigraph" = "qlever.qlever_main:main"
"qlever-old" = "qlever.qlever_old:main"

[tool.setuptools]
license-files = ["LICENSE"]
Expand Down
63 changes: 63 additions & 0 deletions src/qmdb/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# Clone the MillenniumDB repository as the first step
FROM alpine:3.18 AS clone
WORKDIR /mdb-src

RUN apk --no-cache add git
RUN git clone --depth 1 https://github.com/MillenniumDB/MillenniumDB.git .
RUN ls -l /mdb-src

# Build stage
FROM alpine:3.18 AS build
WORKDIR /mdb

# Install necessary build tools and dependencies
RUN apk --no-cache add cmake \
make \
g++ \
openssl-dev \
boost1.82-dev \
ncurses-dev \
icu-dev

# Use files from the cloned repository
COPY --from=clone /mdb-src/src src
COPY --from=clone /mdb-src/CMakeLists.txt CMakeLists.txt
COPY --from=clone /mdb-src/third_party/antlr4-runtime-4.13.1 third_party/antlr4-runtime-4.13.1

# Build MillenniumDB
RUN cmake -B build -D CMAKE_BUILD_TYPE=Release -D CMAKE_INSTALL_PREFIX=./ && \
cmake --build build -j $(($(getconf _NPROCESSORS_ONLN)-1)) --target install

COPY --from=clone /mdb-src/browser browser

# Final minimal stage (to minimize image size)
FROM alpine:3.18 AS final
WORKDIR /data

# Install runtime dependencies
RUN apk --no-cache add libstdc++ \
libgcc \
openssl \
musl-locales \
libncursesw \
less \
bash \
icu-libs

# Copy the binaries and browser from the build stage
COPY --from=build /mdb/build/bin /usr/bin
COPY --from=build /mdb/browser /browser

# Set ownership to the user passed by UID and GID
ARG UID
ARG GID
RUN if [ "${UID:-}" != "" ] && [ "${GID:-}" != "" ]; then \
chown -R ${UID}:${GID} /data; \
fi

# Expose necessary volumes and environment variables
VOLUME /data
ENV MDB_BROWSER=/browser

# Default command to run in the container
CMD ["bash"]
Empty file added src/qmdb/__init__.py
Empty file.
Empty file added src/qmdb/commands/__init__.py
Empty file.
12 changes: 12 additions & 0 deletions src/qmdb/commands/example_queries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from __future__ import annotations

from qlever.commands.example_queries import (
ExampleQueriesCommand as QleverExampleQueriesCommand,
)


class ExampleQueriesCommand(QleverExampleQueriesCommand):
def execute(self, args) -> bool:
if not args.sparql_endpoint:
args.sparql_endpoint = f"{args.host_name}:{args.port}/sparql"
return super().execute(args)
1 change: 1 addition & 0 deletions src/qmdb/commands/extract_queries.py
1 change: 1 addition & 0 deletions src/qmdb/commands/get_data.py
140 changes: 140 additions & 0 deletions src/qmdb/commands/index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
from __future__ import annotations

import glob
import shlex
from pathlib import Path

from qlever.command import QleverCommand
from qlever.containerize import Containerize
from qlever.log import log
from qlever.util import binary_exists, run_command


class IndexCommand(QleverCommand):
def __init__(self):
self.script_name = "qmdb"

def description(self) -> str:
return "Build the index for a given RDF dataset"

def should_have_qleverfile(self) -> bool:
return True

def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
return {
"data": ["name", "format"],
"index": ["input_files"],
"runtime": ["system", "image", "index_container"],
}

def additional_arguments(self, subparser):
subparser.add_argument(
"--index-binary",
type=str,
default="mdb-import",
help=(
"The binary for building the index (default: mdb-import) "
"(this requires that you have Millennium DB built from source "
"on your machine)"
),
)

@staticmethod
def build_image(build_cmd: str, system: str, image: str) -> bool:
try:
run_command(build_cmd, show_output=True)
return True
except Exception as e:
log.error(f"Building the {system} image {image} failed: {e}")
return False

@staticmethod
def wrap_cmd_in_container(args, cmd: str) -> str:
return Containerize().containerize_command(
cmd=cmd,
container_system=args.system,
run_subcommand="run --rm",
image_name=args.image,
container_name=args.index_container,
volumes=[("$(pwd)", "/data")],
working_directory="/data",
)

def execute(self, args) -> bool:
system = args.system
input_files = args.input_files

index_cmd = f"{args.index_binary} {input_files} index"
index_cmd += f" | tee {args.name}.index-log.txt"

if args.system == "native":
cmd_to_show = index_cmd
else:
index_cmd = self.wrap_cmd_in_container(args, index_cmd)
dockerfile_dir = Path(__file__).parent.parent
dockerfile_path = dockerfile_dir / "Dockerfile"
build_cmd = (
f"{system} build -f {dockerfile_path} -t {args.image} --build-arg "
f"UID=$(id -u) --build-arg GID=$(id -g) {dockerfile_dir}"
)
image_id = run_command(
f"{system} images -q {args.image}", return_output=True
)
cmd_to_show = (
f"{build_cmd}\n\n{index_cmd}" if not image_id else index_cmd
)

# Show the command line.
self.show(cmd_to_show, only_show=args.show)
if args.show:
return True

# Check if all of the input files exist.
for pattern in shlex.split(input_files):
if len(glob.glob(pattern)) == 0:
log.error(f'No file matching "{pattern}" found')
log.info("")
log.info(
f"Did you call `{self.script_name} get-data`? If you did, "
"check GET_DATA_CMD and INPUT_FILES in the Qleverfile"
)
return False

# When running natively, check if the binary exists and works.
if args.system == "native":
if not binary_exists(args.index_binary, "index-binary"):
return False
else:
if Containerize().is_running(args.system, args.index_container):
log.info(
f"{args.system} container {args.index_container} is still up, "
"which means that data loading is in progress. Please wait..."
)
return False

if not image_id:
build_successful = self.build_image(
build_cmd, system, args.image
)
if not build_successful:
return False
else:
log.info(f"{args.image} image present on the system\n")

index_dir = Path("index")
if index_dir.exists() and any(index_dir.iterdir()):
log.error(
"Index files found in index directory "
"which shows presence of a previous index\n"
)
log.info("Aborting the index operation...")
return False

# Run the index command.
try:
run_command(index_cmd, show_output=True)
except Exception as e:
log.error(f"Building the index failed: {e}")
return False

return True
1 change: 1 addition & 0 deletions src/qmdb/commands/log.py
10 changes: 10 additions & 0 deletions src/qmdb/commands/query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from __future__ import annotations

from qoxigraph.commands.query import QueryCommand as QoxigraphQueryCommand


class QueryCommand(QoxigraphQueryCommand):
def execute(self, args) -> bool:
if not args.sparql_endpoint:
args.sparql_endpoint = f"localhost:{args.port}/sparql"
super().execute(args)
14 changes: 14 additions & 0 deletions src/qmdb/commands/setup_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from __future__ import annotations

from qoxigraph.commands.setup_config import (
SetupConfigCommand as QoxigraphSetupConfigCommand,
)


class SetupConfigCommand(QoxigraphSetupConfigCommand):
"""
Should behave exactly the same as setup-config command in qoxigraph,
just with a different Docker image name
"""

IMAGE = "adfreiburg/qmdb"
Loading