Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/lenskit/data/attributes.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,7 @@ def cat_matrix(
shape=(len(arr), len(vocab)),
)

matrix = matrix.copy()
matrix = normalize_matrix(matrix, normalize)

return matrix, vocab
Expand Down
2 changes: 2 additions & 0 deletions src/lenskit/metrics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from .predict import MAE, RMSE
from .ranking import (
DCG,
ILS,
NDCG,
RBP,
Entropy,
Expand Down Expand Up @@ -67,6 +68,7 @@
"quick_measure_model",
"least_item_promoted",
"rank_biased_overlap",
"ILS",
"Entropy",
"RankBiasedEntropy",
]
Expand Down
2 changes: 2 additions & 0 deletions src/lenskit/metrics/ranking/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from ._entropy import Entropy, RankBiasedEntropy
from ._gini import ExposureGini, ListGini
from ._hit import Hit
from ._ils import ILS
from ._map import AveragePrecision
from ._pop import MeanPopRank
from ._pr import Precision, Recall
Expand All @@ -39,4 +40,5 @@
"ExposureGini",
"Entropy",
"RankBiasedEntropy",
"ILS",
]
104 changes: 104 additions & 0 deletions src/lenskit/metrics/ranking/_ils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
# This file is part of LensKit.
# Copyright (C) 2018-2023 Boise State University.
# Copyright (C) 2023-2025 Drexel University.
# Licensed under the MIT license, see LICENSE.md for details.
# SPDX-License-Identifier: MIT

from __future__ import annotations

import numpy as np
import scipy.sparse as sps
from scipy.sparse import csr_array

from lenskit.data import Dataset, ItemList, Vocabulary

from ._base import ListMetric, RankingMetricBase


def intra_list_similarity(items: ItemList, vectors: np.ndarray | sps.spmatrix) -> float:
"""
Compute intra-list similarity between item vectors.

Args:
items: Item list to evaluate.
vectors: Matrix (dense or sparse) where each row is a
unit-normalized vector representing an item.

Returns:
Average pairwise cosine similarity or NaN if insufficient data.
"""

n = vectors.shape[0]

if len(items) == 0 or n == 0:
return np.nan

if n <= 1:
return 1.0

if sps.issparse(vectors):
similarity_matrix = np.array((vectors @ vectors.T).toarray())
else:
similarity_matrix = np.array(vectors @ vectors.T)

ils_score = np.sum(np.triu(similarity_matrix, 1)) / (n * (n - 1) / 2)

return float(ils_score)


class ILS(ListMetric, RankingMetricBase):
"""
Evaluate recommendation diversity using intra-list similarity (ILS).

This metric measures the average pairwise cosine similarity between item
vectors in a recommendation list. Lower values indicate more diverse
recommendations, while higher values indicate less diverse recommendations.

Args:
dataset: The LensKit dataset containing item entities and their attributes.
attribute: Name of the attribute or vector source (e.g., 'genre', 'tag').
n: Recommendation list length to evaluate.

Stability:
Caller
"""

attribute: str
_cat_matrix: np.ndarray | csr_array
_item_vocab: Vocabulary

def __init__(
self,
dataset: Dataset,
attribute: str,
n: int | None = None,
):
super().__init__(n)
self.attribute = attribute

# get items entity set / attribute set
items = dataset.entities("item")
attr_set = items.attribute(attribute)

# compute category matrix
self._cat_matrix, _ = attr_set.cat_matrix(normalize="unit")
self._item_vocab = items.vocabulary

@property
def label(self):
base = f"ILS({self.attribute})"
if self.n is not None:
return f"{base}@{self.n}"
return base

def measure_list(self, recs: ItemList, test: ItemList) -> float:
recs = self.truncate(recs)

item_nums = recs.numbers(vocabulary=self._item_vocab, missing="negative")
item_nums = item_nums[item_nums >= 0]
if len(item_nums) == 0:
return np.nan

vectors = self._cat_matrix[item_nums, :]

return intra_list_similarity(recs, vectors)
155 changes: 155 additions & 0 deletions tests/eval/test_ils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
# This file is part of LensKit.
# Copyright (C) 2018-2023 Boise State University.
# Copyright (C) 2023-2025 Drexel University.
# Licensed under the MIT license, see LICENSE.md for details.
# SPDX-License-Identifier: MIT

import numpy as np
import scipy.sparse as sps

from pytest import approx

from lenskit.data import ItemList
from lenskit.metrics import ILS
from lenskit.metrics.ranking._ils import intra_list_similarity


def test_ils_identical_items():
items = ItemList([1, 2, 3], ordered=True)
# all items have same category vector
dense = np.array([[1, 0, 0], [1, 0, 0], [1, 0, 0]])

result = intra_list_similarity(items, dense)
assert result == approx(1.0, abs=0.001)


def test_ils_orthogonal_items():
items = ItemList([1, 2, 3], ordered=True)
# orthogonal unit vectors
dense = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])

result = intra_list_similarity(items, dense)
assert result == approx(0.0, abs=0.001)


def test_ils_sparse_identical():
items = ItemList([1, 2, 3], ordered=True)
# sparse matrix
row = [0, 1, 2]
col = [0, 0, 0]
data = [1, 1, 1]
sparse = sps.csr_array((data, (row, col)), shape=(3, 3))

result = intra_list_similarity(items, sparse)
assert result == approx(1.0, abs=0.001)


def test_ils_sparse_orthogonal():
items = ItemList([1, 2, 3], ordered=True)
row = [0, 1, 2]
col = [0, 1, 2]
data = [1, 1, 1]
sparse = sps.csr_array((data, (row, col)), shape=(3, 3))

result = intra_list_similarity(items, sparse)
assert result == approx(0.0, abs=0.001)


def test_ils_partial_overlap():
items = ItemList([1, 2, 3], ordered=True)
# items 1 and 2 share category, item 3 different
dense = np.array([[1, 0], [1, 0], [0, 1]])

result = intra_list_similarity(items, dense)
# similarity: (1,2)=1.0, (1,3)=0.0, (2,3)=0.0
# average: (1.0 + 0.0 + 0.0) / 3 = 0.333
assert result == approx(0.333, abs=0.01)


def test_ils_empty_list():
items = ItemList([], ordered=True)
dense = np.array([]).reshape(0, 3)

result = intra_list_similarity(items, dense)
assert np.isnan(result)


def test_ils_single_item():
items = ItemList([1], ordered=True)
dense = np.array([[1, 0, 0]])

result = intra_list_similarity(items, dense)
assert result == 1.0


def test_ils_two_items_similar():
items = ItemList([1, 2], ordered=True)
dense = np.array([[1, 0], [1, 0]])

result = intra_list_similarity(items, dense)
assert result == approx(1.0, abs=0.001)


def test_ils_two_items_different():
items = ItemList([1, 2], ordered=True)
dense = np.array([[1, 0], [0, 1]])

result = intra_list_similarity(items, dense)
assert result == approx(0.0, abs=0.001)


# ILS class


def test_ils_class_label(ml_ds):
ils = ILS(ml_ds, "genres")
assert ils.label == "ILS(genres)"

ils10 = ILS(ml_ds, "genres", n=10)
assert ils10.label == "ILS(genres)@10"


def test_ils_class_measure_list(ml_ds):
ils = ILS(ml_ds, "genres", n=10)

# get some items from ml_ds
items = ml_ds.items.ids()[:15]
recs = ItemList(items, ordered=True)
truth = ItemList(items[:5])

val = ils.measure_list(recs, truth)
# ILS should be between 0 and 1
assert 0.0 <= val <= 1.0


def test_ils_class_measure_list_empty(ml_ds):
"""Test ILS with empty recommendation list"""
ils = ILS(ml_ds, "genres", n=10)

recs = ItemList([], ordered=True)
truth = ItemList([1, 2, 3])

result = ils.measure_list(recs, truth)
assert np.isnan(result)


def test_ils_large_sparse(ml_ds):
items = ItemList([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], ordered=True)
# 10 items, 5 categories, each item in unique category
row = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
col = [0, 1, 2, 3, 4, 0, 1, 2, 3, 4]
data = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
categories = sps.csr_array((data, (row, col)), shape=(10, 5))

ils = ILS(ml_ds, "genres", n=10)
result = ils.measure_list(items, categories)
assert 0.0 < result < 1.0


def test_ils_mixed_similarity(ml_ds):
items = ItemList([1, 2, 3, 4], ordered=True)
dense = np.array([[1, 0, 0], [0.9, 0.1, 0], [0, 0, 1], [0, 0.1, 0.9]])

ils = ILS(ml_ds, "genres")
result = ils.measure_list(items, dense)
assert 0.0 < result < 1.0
Loading