Skip to content
124 changes: 105 additions & 19 deletions appstore/api/v1/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,50 +127,77 @@ def search_for_gpu_reservation(reservations):
# we are providing minimum reservations to the front end from the spec.
return 0

def validate_request_resources(request_cpu, request_gpu, request_memory, request_ephemeral, minimum_resources, maximum_resources):
def validate_request_resources(request_cpu, request_gpu, request_memory, request_ephemeral, minimum_resources, maximum_resources, username=None, app_id=None):
"""
Validate requested resources against minimum and maximum limits.

Logs validation failures with user and app context for monitoring.
"""
# Helper to log validation failures
def log_validation_failure(resource_type, requested, limit, limit_type, reason):
context = []
if username:
context.append(f"user={username}")
if app_id:
context.append(f"app_id={app_id}")
context.append(f"resource={resource_type}")
context.append(f"requested={requested}")
context.append(f"{limit_type}={limit}")
context.append(f"reason={reason}")

logger.warning(f"Resource validation failed: {' '.join(context)}")

if request_cpu is not None and request_cpu < float(minimum_resources.cpus):
log_validation_failure("cpu", request_cpu, minimum_resources.cpus, "minimum", "below_minimum")
return Response(
f"Invalid resources requested. Cannot allocate more than {minimum_resources.cpus} cpus.",
status=drf_status.HTTP_400_BAD_REQUEST
)

if request_cpu is not None and request_cpu > float(maximum_resources.cpus):
log_validation_failure("cpu", request_cpu, maximum_resources.cpus, "maximum", "exceeds_maximum")
return Response(
f"Invalid resources requested. Cannot allocate more than {maximum_resources.cpus} cpus.",
status=drf_status.HTTP_400_BAD_REQUEST
)

if request_gpu is not None and request_gpu < int(minimum_resources.gpus):
log_validation_failure("gpu", request_gpu, minimum_resources.gpus, "minimum", "below_minimum")
return Response(
f"Invalid resources requested. Cannot allocate less than {minimum_resources.gpus} gpus.",
status=drf_status.HTTP_400_BAD_REQUEST
)

if request_gpu is not None and request_gpu > int(maximum_resources.gpus):
log_validation_failure("gpu", request_gpu, maximum_resources.gpus, "maximum", "exceeds_maximum")
return Response(
f"Invalid resources requested. Cannot allocate more than {maximum_resources.gpus} gpus.",
status=drf_status.HTTP_400_BAD_REQUEST
)

if request_memory is not None and to_bytes(request_memory) < to_bytes(minimum_resources.memory):
log_validation_failure("memory", request_memory, minimum_resources.memory, "minimum", "below_minimum")
return Response(
f"Invalid resources requested. Cannot allocate less than {minimum_resources.memory} memory.",
status=drf_status.HTTP_400_BAD_REQUEST
)

if request_memory is not None and to_bytes(request_memory) > to_bytes(maximum_resources.memory):
log_validation_failure("memory", request_memory, maximum_resources.memory, "maximum", "exceeds_maximum")
return Response(
f"Invalid resources requested. Cannot allocate more than {maximum_resources.memory} memory.",
status=drf_status.HTTP_400_BAD_REQUEST
)

if request_ephemeral is not None and to_bytes(request_ephemeral) < to_bytes(minimum_resources.ephemeralStorage):
log_validation_failure("ephemeral_storage", request_ephemeral, minimum_resources.ephemeralStorage, "minimum", "below_minimum")
return Response(
f"Invalid resources requested. Cannot allocate less than {minimum_resources.ephemeralStorage} ephemeral storage.",
status=drf_status.HTTP_400_BAD_REQUEST
)

if request_ephemeral is not None and to_bytes(request_ephemeral) > to_bytes(maximum_resources.ephemeralStorage):
log_validation_failure("ephemeral_storage", request_ephemeral, maximum_resources.ephemeralStorage, "maximum", "exceeds_maximum")
return Response(
f"Invalid resources requested. Cannot allocate more than {maximum_resources.ephemeralStorage} ephemeral storage.",
status=drf_status.HTTP_400_BAD_REQUEST
Expand Down Expand Up @@ -348,7 +375,7 @@ def list(self, request):
continue

apps = {key: value for key, value in sorted(apps.items())}
logging.debug(f"apps:\n${apps}")
logger.debug(f"apps:\n${apps}")
serializer = self.get_serializer(data=apps)
serializer.is_valid()
if serializer.errors:
Expand Down Expand Up @@ -395,7 +422,7 @@ def retrieve(self, request, app_id: Optional[str]=None):
)
)
)
logging.debug(f"app:\n${app}")
logger.debug(f"app:\n${app}")

serializer = self.get_serializer(data=asdict(app))
serializer.is_valid()
Expand Down Expand Up @@ -552,11 +579,11 @@ def create(self, request):
username = request.user.get_username()

serializer = self.get_serializer(data=request.data)
logging.debug("checking if request is valid")
logger.debug("checking if request is valid")
serializer.is_valid(raise_exception=True)
logging.debug("creating resource_request")
logger.debug("creating resource_request")
resource_request = serializer.create(serializer.validated_data)
logging.debug(f"resource_request: {resource_request}")
logger.debug(f"resource_request: {resource_request}")
irods_enabled = os.environ.get("IROD_HOST",'').strip()
# TODO update social query to fetch user.

Expand All @@ -566,7 +593,15 @@ def create(self, request):
os.environ["NFSRODS_UID"] = str(nfs_id)

# We will update this later once a system id for the app exists
identity_token = UserIdentityToken.objects.create(user=request.user)
try:
identity_token = UserIdentityToken.objects.create(user=request.user)
logger.debug(f"Created identity token for user {username}")
except Exception as e:
logger.error(f"Failed to create identity token for user {username}: {type(e).__name__}: {str(e)}")
return Response(
{"message": "Failed to create authentication token"},
status=drf_status.HTTP_500_INTERNAL_SERVER_ERROR,
)
principal = Principal(username, identity_token.token, None)

app_id = serializer.data["app_id"]
Expand Down Expand Up @@ -594,7 +629,11 @@ def create(self, request):
request_memory = to_bytes(resource_request.memory)
request_ephemeral = to_bytes(resource_request.ephemeralStorage)

validation_response = validate_request_resources(request_cpu, request_gpu, request_memory, request_ephemeral, minimum_resources, maximum_resources)
validation_response = validate_request_resources(
request_cpu, request_gpu, request_memory, request_ephemeral,
minimum_resources, maximum_resources,
username=username, app_id=app_id
)
if validation_response is not None:
return validation_response

Expand All @@ -605,8 +644,24 @@ def create(self, request):
host = get_host(request)
system = tycho.start(principal, app_id, resource_request.resources, host, env)

identity_token.consumer_id = identity_token.compute_app_consumer_id(system.identifier)
identity_token.save()
try:
identity_token.consumer_id = identity_token.compute_app_consumer_id(system.identifier)
identity_token.save()
logger.debug(f"Updated identity token with consumer_id {identity_token.consumer_id} for user {username}")
except Exception as e:
logger.error(
f"Failed to save identity token for user {username}, app {app_id}, "
f"system {system.identifier}: {type(e).__name__}: {str(e)}"
)
# Clean up the system that was started
try:
tycho.delete({"name": system.services[0].identifier})
except Exception as cleanup_error:
logger.error(f"Failed to cleanup system after token save failure: {str(cleanup_error)}")
return Response(
{"message": "Failed to save authentication token"},
status=drf_status.HTTP_500_INTERNAL_SERVER_ERROR,
)

s = InstanceSpec(
principal.username,
Expand All @@ -631,6 +686,15 @@ def create(self, request):
# for the user.
logger.error(f"Failed to launch app { app_id } for user { username }; exception: { str(e) }")
tycho.delete({"name": system.services[0].identifier})
# Clean up the identity token since instance won't be tracked
try:
identity_token.delete()
logger.debug(f"Deleted identity token for failed instance launch: user={username}, app={app_id}")
except Exception as token_error:
logger.error(
f"Failed to delete identity token after instance validation failure: "
f"user={username}, app={app_id}, error={str(token_error)}"
)
return Response(
serializer.errors, status=drf_status.HTTP_400_BAD_REQUEST
)
Expand All @@ -639,7 +703,14 @@ def create(self, request):
# potentially created instance rather than leaving it hanging.
logger.error(f"Failed to launch app { app_id } for user { username }; null instance spec")
tycho.delete({"name": system.services[0].identifier})
identity_token.delete()
try:
identity_token.delete()
logger.debug(f"Deleted identity token for null instance spec: user={username}, app={app_id}")
except Exception as token_error:
logger.error(
f"Failed to delete identity token after null instance spec: "
f"user={username}, app={app_id}, error={str(token_error)}"
)
return Response(
{"message": "failed to submit app start."},
status=drf_status.HTTP_500_INTERNAL_SERVER_ERROR,
Expand Down Expand Up @@ -694,9 +765,18 @@ def destroy(self, request, sid=None):
logger.info(f"Terminating app id { sid } for user { request.user.username }")
response = tycho.delete({"name": serializer.validated_data["sid"]})
# Delete all the tokens the user had associated with that app
consumer_id = UserIdentityToken.compute_app_consumer_id(serializer.validated_data["sid"])
tokens = UserIdentityToken.objects.filter(user=request.user, consumer_id=consumer_id)
tokens.delete()
try:
consumer_id = UserIdentityToken.compute_app_consumer_id(serializer.validated_data["sid"])
tokens = UserIdentityToken.objects.filter(user=request.user, consumer_id=consumer_id)
token_count = tokens.count()
tokens.delete()
logger.info(f"Deleted {token_count} identity token(s) for terminated instance: user={request.user.username}, sid={sid}")
except Exception as token_error:
logger.error(
f"Failed to delete identity tokens for terminated instance: "
f"user={request.user.username}, sid={sid}, error={type(token_error).__name__}: {str(token_error)}"
)
# Continue anyway since the app was terminated successfully
# TODO How can we avoid this sleep? Do we need an immediate response beyond
# a successful submission? Can we do a follow up with Web Sockets or SSE
# to the front end?
Expand Down Expand Up @@ -745,8 +825,12 @@ def partial_update(self, request, sid=None):
request_gpu = float(data["gpu"]) if "gpu" in data else None
request_memory = to_bytes(data["memory"]) if "memory" in data else None
request_ephemeral = None

validation_response = validate_request_resources(request_cpu, request_gpu, request_memory, request_ephemeral, minimum_resources, maximum_resources)

validation_response = validate_request_resources(
request_cpu, request_gpu, request_memory, request_ephemeral,
minimum_resources, maximum_resources,
username=username, app_id=app_id
)
if validation_response is not None:
return validation_response

Expand Down Expand Up @@ -815,6 +899,8 @@ def list(self, request):

@action(methods=["POST"], detail=False)
def logout(self, request):
username = request.user.username if request.user.is_authenticated else "anonymous"
logger.info(f"User {username} logged out via API")
logout(request)
data = {"success": "Successfully logged out"}
return Response(data=data, status=status.HTTP_200_OK)
Expand Down
19 changes: 16 additions & 3 deletions appstore/appstore/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from django.conf import settings
from django.forms import ValidationError

logger = logging.getLogger(__file__)
logger = logging.getLogger(__name__)

class RestrictEmailAdapter(DefaultAccountAdapter):
def clean_email(self, email):
Expand Down Expand Up @@ -50,7 +50,10 @@ def get_connect_redirect_url(self, request, socialaccount):
return self._login_url(request)

def get_logout_redirect_url(self, request):
username = request.user.username if request.user.is_authenticated else "anonymous"
frontend = request.session.get("helx_frontend", "unknown")
url = self._logout_url(request)
logger.info(f"User {username} logging out from {frontend} frontend, redirecting to {url}")
# Unset and let the frontend set it again on landing
# Using get incase the session is cleared between login and logout to prevent
# an error and returning of the route
Expand All @@ -71,7 +74,17 @@ def on_authentication_error(self, request, provider, error=None, exception=None,
error_code = error.name if error else "unknown"
exception_str = str(exception) if exception else "No exception details"

logger.info(f"User failed to login using allauth:\nprovider id: { provider_id}\nerror code: { error_code }\nexception: { exception_str }")

# Extract IP address for security tracking
x_forwarded_for = request.META.get('HTTP_X_FORWARDED_FOR')
if x_forwarded_for:
ip_address = x_forwarded_for.split(',')[0].strip()
else:
ip_address = request.META.get('REMOTE_ADDR', 'unknown')

logger.warning(
f"Social login failed: provider={provider_id} error={error_code} "
f"ip={ip_address} exception={exception_str}"
)

# Note: this is a no-op, since this hook is unimplemented in the default (super) adapter class.
return super().on_authentication_error(request, provider, error, exception, extra_context)
63 changes: 63 additions & 0 deletions appstore/appstore/json_formatter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import json
import logging
import traceback
from datetime import datetime


class JSONFormatter(logging.Formatter):
"""
Custom JSON formatter for structured logging.

Outputs log records as JSON objects with standardized fields for easy parsing
by log aggregation tools (ELK, Splunk, CloudWatch, etc.).
"""

def format(self, record):
"""Format log record as JSON."""
log_data = {
"timestamp": datetime.utcfromtimestamp(record.created).isoformat() + "Z",
"level": record.levelname,
"logger": record.name,
"message": record.getMessage(),
"module": record.module,
"function": record.funcName,
"line": record.lineno,
"thread": record.threadName,
"process": record.process,
}

# Add exception info if present
if record.exc_info:
log_data["exception"] = {
"type": record.exc_info[0].__name__ if record.exc_info[0] else None,
"message": str(record.exc_info[1]) if record.exc_info[1] else None,
"traceback": traceback.format_exception(*record.exc_info),
}

# Add any extra fields that were passed to the logger
# (e.g., logger.info("message", extra={"user": "john"}))
if hasattr(record, "__dict__"):
# Standard log record attributes to exclude from extra fields
standard_attrs = {
"name", "msg", "args", "created", "filename", "funcName",
"levelname", "levelno", "lineno", "module", "msecs",
"message", "pathname", "process", "processName", "relativeCreated",
"thread", "threadName", "exc_info", "exc_text", "stack_info",
}

extra_fields = {}
for key, value in record.__dict__.items():
if key not in standard_attrs and not key.startswith("_"):
try:
# Ensure value is JSON serializable
json.dumps(value)
extra_fields[key] = value
except (TypeError, ValueError):
# If not serializable, convert to string
extra_fields[key] = str(value)

if extra_fields:
log_data["extra"] = extra_fields

# Return as compact JSON (one line per log entry)
return json.dumps(log_data, default=str)
Loading
Loading