@@ -450,7 +450,7 @@ def _compress_href(self, href: str | None, page_url: str | None = None) -> str:
450450 page_url: Current page URL (to detect same-domain links)
451451
452452 Returns:
453- Compressed href with meaningful path info (e.g., "/dp/ B0FC5SJNQX" or "/s?k= mouse")
453+ Compressed href with meaningful path info (e.g., "B0FC5SJNQX" or "mouse")
454454 """
455455 if not href :
456456 return ""
@@ -469,25 +469,25 @@ def _compress_href(self, href: str | None, page_url: str | None = None) -> str:
469469 )
470470
471471 if parsed .netloc and not is_same_domain :
472- # External link - show domain
472+ # External link - show domain (truncate to 10 chars)
473473 parts = parsed .netloc .split ("." )
474474 if len (parts ) >= 2 :
475- return parts [- 2 ][:15 ]
476- return parsed .netloc [:15 ]
475+ return parts [- 2 ][:10 ]
476+ return parsed .netloc [:10 ]
477477
478478 # Same domain or relative link - extract meaningful path
479479 path = parsed .path or ""
480480
481- # For product pages, extract key identifiers
481+ # For product pages, extract key identifiers (just the ID, not the path prefix)
482482 # Amazon: /dp/XXXXX, /gp/product/XXXXX
483483 # Generic: /product/XXX, /item/XXX, /p/XXX
484484 import re
485485 product_patterns = [
486- r"( /dp/[A-Z0-9]+)" , # Amazon product
487- r"( /gp/product/[A-Z0-9]+)" , # Amazon alt
488- r"( /product/[^/]+)" , # Generic product
489- r"( /item/[^/]+)" , # Generic item
490- r"( /p/[^/]+)" , # Short product
486+ r"/dp/( [A-Z0-9]+)" , # Amazon product
487+ r"/gp/product/( [A-Z0-9]+)" , # Amazon alt
488+ r"/product/( [^/]+)" , # Generic product
489+ r"/item/( [^/]+)" , # Generic item
490+ r"/p/( [^/]+)" , # Short product
491491 ]
492492 for pattern in product_patterns :
493493 match = re .search (pattern , path , re .IGNORECASE )
@@ -511,12 +511,11 @@ def _compress_href(self, href: str | None, page_url: str | None = None) -> str:
511511 if "/checkout" in path .lower ():
512512 return "/checkout"
513513
514- # Fallback: use last meaningful path segment
514+ # Fallback: use last meaningful path segment only (no leading slash)
515515 segments = [s for s in path .split ("/" ) if s and len (s ) > 1 ]
516516 if segments :
517- # Return last 2 segments for context (max 30 chars)
518- result = "/" + "/" .join (segments [- 2 :])
519- return result [:30 ]
517+ # Return only the last segment (max 30 chars)
518+ return segments [- 1 ][:30 ]
520519
521520 return path [:30 ] if path else ""
522521
0 commit comments