Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 86 additions & 1 deletion surfsense_backend/app/connectors/bookstack_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,12 +155,77 @@ def make_api_request(
except requests.exceptions.RequestException as e:
raise Exception(f"BookStack API request failed: {e!s}") from e

def get_all_pages(self, count: int = 500) -> list[dict[str, Any]]:
def get_all_shelves(self, count: int = 500) -> list[dict[str, Any]]:
"""
Fetch all shelves from BookStack with pagination.

Args:
count: Number of records per request (max 500)

Returns:
List of shelf objects
"""
all_shelves = []
offset = 0

while True:
params = {
"count": min(count, 500),
"offset": offset,
}

result = self.make_api_request("shelves", params)

if not isinstance(result, dict) or "data" not in result:
raise Exception("Invalid response from BookStack API")

shelves = result["data"]
all_shelves.extend(shelves)

logger.info(f"Fetched {len(shelves)} shelves (offset: {offset})")

total = result.get("total", 0)
if offset + len(shelves) >= total:
break

offset += len(shelves)

logger.info(f"Total shelves fetched: {len(all_shelves)}")
return all_shelves

def build_book_to_shelf_map(self) -> dict[int, int]:
"""
Build a mapping from book_id to shelf_id.

Fetches all shelves and their book listings to create
a lookup table used for filtering pages by shelf.

Returns:
Dict mapping book_id -> shelf_id
"""
book_to_shelf = {}
shelves = self.get_all_shelves()

for shelf in shelves:
shelf_id = shelf["id"]
shelf_detail = self.make_api_request(f"shelves/{shelf_id}")
if isinstance(shelf_detail, dict):
for book in shelf_detail.get("books", []):
book_to_shelf[book["id"]] = shelf_id

return book_to_shelf

def get_all_pages(
self,
count: int = 500,
excluded_shelf_ids: list[int] | None = None,
) -> list[dict[str, Any]]:
"""
Fetch all pages from BookStack with pagination.

Args:
count: Number of records per request (max 500)
excluded_shelf_ids: Optional list of shelf IDs whose pages should be excluded

Returns:
List of page objects
Expand Down Expand Up @@ -195,6 +260,15 @@ def get_all_pages(self, count: int = 500) -> list[dict[str, Any]]:

offset += len(pages)

# Filter by excluded shelves if specified
if excluded_shelf_ids:
book_to_shelf = self.build_book_to_shelf_map()
excluded = set(excluded_shelf_ids)
all_pages = [
p for p in all_pages
if book_to_shelf.get(p.get("book_id")) not in excluded
]

logger.info(f"Total pages fetched: {len(all_pages)}")
return all_pages

Expand Down Expand Up @@ -268,6 +342,7 @@ def get_pages_by_date_range(
start_date: str,
end_date: str,
count: int = 500,
excluded_shelf_ids: list[int] | None = None,
) -> tuple[list[dict[str, Any]], str | None]:
"""
Fetch pages updated within a specific date range.
Expand All @@ -278,6 +353,7 @@ def get_pages_by_date_range(
start_date: Start date in YYYY-MM-DD format
end_date: End date in YYYY-MM-DD format (currently unused, for future use)
count: Number of records per request (max 500)
excluded_shelf_ids: Optional list of shelf IDs whose pages should be excluded

Returns:
Tuple of (list of page objects, error message or None)
Expand Down Expand Up @@ -316,6 +392,15 @@ def get_pages_by_date_range(

offset += len(pages)

# Filter by excluded shelves if specified
if excluded_shelf_ids and all_pages:
book_to_shelf = self.build_book_to_shelf_map()
excluded = set(excluded_shelf_ids)
all_pages = [
p for p in all_pages
if book_to_shelf.get(p.get("book_id")) not in excluded
]

if not all_pages:
return [], f"No pages found updated after {start_date}"

Expand Down
47 changes: 47 additions & 0 deletions surfsense_backend/app/routes/search_source_connectors_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,53 @@ async def list_github_repositories(
) from e


class BookStackCredentialsRequest(BaseModel):
"""Request model for BookStack API credentials."""
base_url: str = Field(..., description="BookStack instance base URL")
token_id: str = Field(..., description="BookStack API Token ID")
token_secret: str = Field(..., description="BookStack API Token Secret")


@router.post("/bookstack/shelves", response_model=list[dict[str, Any]])
async def list_bookstack_shelves(
creds: BookStackCredentialsRequest,
user: User = Depends(current_active_user),
):
"""
Fetches all shelves from a BookStack instance.
Used by the frontend to let users select which shelves to exclude from indexing.
"""
try:
from app.connectors.bookstack_connector import BookStackConnector

client = BookStackConnector(
base_url=creds.base_url,
token_id=creds.token_id,
token_secret=creds.token_secret,
)
shelves = client.get_all_shelves()

result = []
for shelf in shelves:
detail = client.make_api_request(f"shelves/{shelf['id']}")
books = detail.get("books", []) if isinstance(detail, dict) else []
result.append({
"id": shelf["id"],
"name": shelf["name"],
"book_count": len(books),
"books": [{"id": b["id"], "name": b["name"]} for b in books],
})
return result
except ValueError as e:
logger.error(f"BookStack credential validation failed for user {user.id}: {e!s}")
raise HTTPException(status_code=400, detail=f"Invalid BookStack credentials: {e!s}") from e
except Exception as e:
logger.error(f"Failed to fetch BookStack shelves for user {user.id}: {e!s}")
raise HTTPException(
status_code=500, detail=f"Failed to fetch BookStack shelves: {e!s}"
) from e


@router.post("/search-source-connectors", response_model=SearchSourceConnectorRead)
async def create_search_source_connector(
connector: SearchSourceConnectorCreate,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,9 @@ async def index_bookstack_pages(
bookstack_token_id = connector.config.get("BOOKSTACK_TOKEN_ID")
bookstack_token_secret = connector.config.get("BOOKSTACK_TOKEN_SECRET")

# Optional: shelf IDs to exclude from indexing
excluded_shelf_ids = connector.config.get("BOOKSTACK_EXCLUDED_SHELF_IDS", [])

if (
not bookstack_base_url
or not bookstack_token_id
Expand Down Expand Up @@ -148,7 +151,9 @@ async def index_bookstack_pages(
# Get pages within date range
try:
pages, error = bookstack_client.get_pages_by_date_range(
start_date=start_date_str, end_date=end_date_str
start_date=start_date_str,
end_date=end_date_str,
excluded_shelf_ids=excluded_shelf_ids,
)

if error:
Expand Down
Loading