Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
144 changes: 106 additions & 38 deletions lib/Service/DirectoryService.php
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ function ($resolve) use ($directoryUrl) {
*/
public function getUniqueDirectories(bool $availableOnly = false, bool $defaultOnly = false): array
{
// Check cache validity (5 minute TTL)
// Check cache validity (5 minute TTL).
$currentTime = time();
if ($this->cachedUniqueDirectories !== null && ($currentTime - $this->cacheTimestamp) < 300) {
return $this->cachedUniqueDirectories;
Expand Down Expand Up @@ -231,7 +231,7 @@ public function getUniqueDirectories(bool $availableOnly = false, bool $defaultO

$listings = $objectService->searchObjects($query);

// Removed redundant logging
// Removed redundant logging.
// Build unique directory URLs using URL as key to automatically handle duplicates.
foreach ($listings as $listing) {
$listingData = $listing->jsonSerialize();
Expand All @@ -257,23 +257,15 @@ public function getUniqueDirectories(bool $availableOnly = false, bool $defaultO
// Skip non-default listings
}

// Check for publications URL in the object data (primary) or directory URL (fallback)
// Check for publications URL in the object data (primary) or directory URL (fallback).
if (isset($objectData['publications']) && !empty($objectData['publications'])) {
$uniqueDirectoryUrls[$objectData['publications']] = $objectData['publications'];
// Removed redundant logging
}
// Fallback: check for directory URL at top level (backwards compatibility)
else if (isset($listingData['directory']) && !empty($listingData['directory'])) {
$uniqueDirectoryUrls[$listingData['directory']] = $listingData['directory'];
// Removed redundant logging
} else if (isset($objectData['directory']) && !empty($objectData['directory'])) {
// Fallback: convert directory URL to publications URL
$publicationsUrl = str_replace('/api/directory', '/api/publications', rtrim($objectData['directory'], '/'));
$uniqueDirectoryUrls[$publicationsUrl] = $publicationsUrl;
// Removed redundant logging
} else {
// Removed redundant logging
// Removed redundant logging.
}
// If no publications URL found, skip this listing.
// We used to have fallback logic here that would try to use the directory field,
// but that often pointed to the source directory (where we got the listing from)
// rather than the catalog's own API, causing circular queries.
}//end foreach
} catch (\Exception $e) {
// Removed redundant logging
Expand Down Expand Up @@ -594,42 +586,67 @@ public function syncListing(array $listingData, string $sourceDirectoryUrl): arr
}

// Clean up listing data to match schema
// Keep the @self metadata for UUID handling, but clean it up
// Keep the @self metadata for UUID handling, but clean it up.
$uuid = null;
if (isset($listingData['@self']['id']) === true) {
$uuid = $listingData['@self']['id'];
} else if (isset($listingData['id']) === true) {
$uuid = $listingData['id'];
} else if (isset($listingData['catalog']) === true) {
// Use catalog as UUID if no explicit ID is provided
// Use catalog as UUID if no explicit ID is provided.
$uuid = $listingData['catalog'];
}

// Remove @self metadata from the object data (but keep UUID for saveObject)
// Extract API endpoints from @self.relations BEFORE we unset @self.
// These endpoints tell us where the actual catalog's API is hosted.
if (isset($listingData['@self']['relations']) && is_array($listingData['@self']['relations'])) {
$relations = $listingData['@self']['relations'];

// Extract publications endpoint.
if (isset($relations['publications']) && !empty($relations['publications'])) {
$listingData['publications'] = $relations['publications'];
}

// Extract search endpoint (alternative).
if (isset($relations['search']) && !empty($relations['search'])) {
$listingData['search'] = $relations['search'];
}

// Extract directory endpoint from relations (the actual catalog's directory URL).
// This is different from $sourceDirectoryUrl which is where we got the listing from.
if (isset($relations['directory']) && !empty($relations['directory'])) {
$listingData['catalogDirectory'] = $relations['directory'];
}
}

// Remove @self metadata from the object data (but keep UUID for saveObject).
unset($listingData['@self']);

// Set directory URL in listing data for reference
// Detect or generate publication endpoint BEFORE we overwrite directory field.
// If not already extracted from relations, try to detect from available data.
if (empty($listingData['publications'])) {
$listingData['publications'] = $this->detectPublicationEndpoint($listingData);
}

// Set sourceDirectory URL in listing data for reference (where we got this listing from).
$listingData['directory'] = $sourceDirectoryUrl;

// Set lastSync as ISO string format instead of DateTime object
// Set lastSync as ISO string format instead of DateTime object.
$listingData['lastSync'] = (new \DateTime())->format('c');

// Catalog field is already present from external listing data
// Set summary to 'unknown' if empty (required field)
// Catalog field is already present from external listing data.
// Set summary to 'unknown' if empty (required field).
if (empty($listingData['summary'])) {
$listingData['summary'] = 'unknown';
}

// Count schemas if available
// Count schemas if available.
if (isset($listingData['schemas']) && is_array($listingData['schemas'])) {
$listingData['schemaCount'] = count($listingData['schemas']);
} else {
$listingData['schemaCount'] = 0;
}

// Detect or generate publication endpoint
$listingData['publications'] = $this->detectPublicationEndpoint($listingData);

// Check if listing already exists to determine action type
$existingListings = $objectService->searchObjects(
[
Expand Down Expand Up @@ -762,10 +779,10 @@ public function syncListing(array $listingData, string $sourceDirectoryUrl): arr
*/
public function getPublications(array $guzzleConfig = [], bool $includeDefault = false): array
{
// Get directories based on criteria
// Get directories based on criteria.
$directories = $this->getUniqueDirectories(availableOnly: true, defaultOnly: $includeDefault);

// Removed redundant logging
// Removed redundant logging.
if (empty($directories)) {
// Removed redundant logging
return [
Expand Down Expand Up @@ -1007,30 +1024,30 @@ private function updateListingStatus($objectService, string $listingRegister, st
*/
private function detectPublicationEndpoint(array $listingData): ?string
{
// Check if listing already has a publication endpoint
// Check if listing already has a publication endpoint.
if (!empty($listingData['publications'])) {
return $listingData['publications'];
}

// Check if listing already has a publication endpoint (alternative field name)
// Check if listing already has a publication endpoint (alternative field name).
if (!empty($listingData['publication'])) {
return $listingData['publication'];
}

// Try to generate from search endpoint
// Try to generate from search endpoint.
if (!empty($listingData['search'])) {
// Replace 'search' with 'publications' in the URL
// Replace 'search' with 'publications' in the URL.
$publicationEndpoint = str_replace('/search', '/publications', $listingData['search']);

// Also handle cases where 'search' might be a query parameter or different pattern
// Also handle cases where 'search' might be a query parameter or different pattern.
if ($publicationEndpoint === $listingData['search']) {
// Try replacing 'search' anywhere in the URL path
// Try replacing 'search' anywhere in the URL path.
$publicationEndpoint = preg_replace('/\/search(?=\/|$)/', '/publications', $listingData['search']);
}

// If still no change, try a more generic approach
// If still no change, try a more generic approach.
if ($publicationEndpoint === $listingData['search']) {
// Parse URL and replace 'search' in path segments
// Parse URL and replace 'search' in path segments.
$urlParts = parse_url($listingData['search']);
if ($urlParts && isset($urlParts['path'])) {
$pathSegments = explode('/', trim($urlParts['path'], '/'));
Expand All @@ -1054,12 +1071,63 @@ function ($segment) {
}
}//end if

// Only return if we actually made a change
// Only return if we actually made a change.
if ($publicationEndpoint !== $listingData['search']) {
return $publicationEndpoint;
}
}//end if

// Try to construct from catalogDirectory (the actual catalog's directory endpoint from relations).
// Format: Replace /api/directory with /api/publications.
if (!empty($listingData['catalogDirectory'])) {
$catalogDir = $listingData['catalogDirectory'];
// Replace /api/directory with /api/publications.
$publicationEndpoint = str_replace('/api/directory', '/api/publications', $catalogDir);
if ($publicationEndpoint !== $catalogDir) {
return $publicationEndpoint;
}
}

// Try to construct from directory hostname (fallback for listings without proper relations).
// Format: https://{directory-host}/apps/opencatalogi/api/publications
if (!empty($listingData['directory'])) {
$directory = $listingData['directory'];

// If directory is just a hostname (e.g., "directory.opencatalogi.nl" or "opencatalogi.nl")
// construct the full publications URL.
if (strpos($directory, '://') === false) {
// No protocol, assume HTTPS and add standard OpenCatalogi API path.
return 'https://'.$directory.'/apps/opencatalogi/api/publications';
} else {
// Directory is a full URL, extract the base and construct publications endpoint.
$urlParts = parse_url($directory);
if ($urlParts && isset($urlParts['host'])) {
$publicationEndpoint = $urlParts['scheme'].'://'.$urlParts['host'];
if (isset($urlParts['port']) === true) {
$publicationEndpoint .= ':'.$urlParts['port'];
}
$publicationEndpoint .= '/apps/opencatalogi/api/publications';
return $publicationEndpoint;
}
}
}//end if

// Try to infer hostname from catalog title or name.
// For catalogs named like "OpenCatalogi.nl", "Example.com", try using that as hostname.
$title = ($listingData['title'] ?? $listingData['name'] ?? '');
if (!empty($title)) {
// Check if title looks like a domain name (contains a dot and no spaces).
if (strpos($title, '.') !== false && strpos($title, ' ') === false) {
// Looks like a domain, try using it.
$hostname = strtolower(trim($title));
// Remove any trailing slashes or paths.
$hostname = preg_replace('#[/\\\\].*$#', '', $hostname);
if (!empty($hostname)) {
return 'https://'.$hostname.'/apps/opencatalogi/api/publications';
}
}
}

return null;

}//end detectPublicationEndpoint()
Expand Down
Loading