From 28676d845c95716583b29832ebe496d4a74d0641 Mon Sep 17 00:00:00 2001
From: "warren.jouanneau" <warren.jouanneau@malt.com>
Date: Tue, 17 Feb 2026 03:54:31 +0100
Subject: [PATCH 1/2] feat: freelancer project ranking

---
 src/workrb/tasks/__init__.py                  |   6 +
 src/workrb/tasks/ranking/__init__.py          |   6 +
 .../ranking/freelancer_project_matching.py    | 301 ++++++++++++++++++
 src/workrb/types.py                           |  16 +
 tests/test_freelancer_project_matching.py     |  19 ++
 5 files changed, 348 insertions(+)
 create mode 100644 src/workrb/tasks/ranking/freelancer_project_matching.py
 create mode 100644 tests/test_freelancer_project_matching.py

diff --git a/src/workrb/tasks/__init__.py b/src/workrb/tasks/__init__.py
index 7987260..8243711 100644
--- a/src/workrb/tasks/__init__.py
+++ b/src/workrb/tasks/__init__.py
@@ -8,6 +8,10 @@
 
 # Task implementations
 from .classification.job2skill import ESCOJob2SkillClassification
+from .ranking.freelancer_project_matching import (
+    ProjectCandidateRanking,
+    SearchQueryCandidateRanking,
+)
 from .ranking.job2skill import ESCOJob2SkillRanking
 from .ranking.job_similarity import JobTitleSimilarityRanking
 from .ranking.jobnorm import JobBERTJobNormRanking
@@ -39,4 +43,6 @@
     "TechSkillExtractRanking",
     "SkillSkapeExtractRanking",
     "SkillMatch1kSkillSimilarityRanking",
+    "ProjectCandidateRanking",
+    "SearchQueryCandidateRanking",
 ]
diff --git a/src/workrb/tasks/ranking/__init__.py b/src/workrb/tasks/ranking/__init__.py
index ce13977..8891e3d 100644
--- a/src/workrb/tasks/ranking/__init__.py
+++ b/src/workrb/tasks/ranking/__init__.py
@@ -7,6 +7,10 @@
 - Minimize external dependencies
 """
 
+from workrb.tasks.ranking.freelancer_project_matching import (
+    ProjectCandidateRanking,
+    SearchQueryCandidateRanking,
+)
 from workrb.tasks.ranking.job2skill import ESCOJob2SkillRanking
 from workrb.tasks.ranking.job_similarity import JobTitleSimilarityRanking
 from workrb.tasks.ranking.jobnorm import JobBERTJobNormRanking
@@ -26,6 +30,8 @@
     "HouseSkillExtractRanking",
     "JobBERTJobNormRanking",
     "JobTitleSimilarityRanking",
+    "ProjectCandidateRanking",
+    "SearchQueryCandidateRanking",
     "SkillMatch1kSkillSimilarityRanking",
     "SkillSkapeExtractRanking",
     "TechSkillExtractRanking",
diff --git a/src/workrb/tasks/ranking/freelancer_project_matching.py b/src/workrb/tasks/ranking/freelancer_project_matching.py
new file mode 100644
index 0000000..697201e
--- /dev/null
+++ b/src/workrb/tasks/ranking/freelancer_project_matching.py
@@ -0,0 +1,301 @@
+"""Project brief Freelancer candidate ranking task using a synthetic dataset provided by Malt."""
+
+from abc import ABC, abstractmethod
+from typing import Any
+
+import pandas as pd
+from datasets import load_dataset
+
+from workrb.registry import register_task
+from workrb.tasks.abstract.base import DatasetSplit, LabelType, Language
+from workrb.tasks.abstract.ranking_base import RankingDataset, RankingTask, RankingTaskGroup
+from workrb.types import ModelInputType
+
+
+class _BaseCandidateRanking(RankingTask, ABC):
+    """
+    Project brief Freelancer candidate ranking task based on Jouanneau et al. (2024) and Jouanneau et al. (2025).
+
+    This task evaluates a model's ability to rank freelancer candidates based on their profile
+    by semantic similarity (skill matching fit) to search query or project brief.
+    Given a query job title, the model must rank corpus job titles such that semantically similar ones
+    appear higher than non-similar ones.
+
+    Notes
+    -----
+    HuggingFace Dataset: https://huggingface.co/datasets/MaltCompany/Freelancer-Project-Matching
+
+    Languages: en, de, es, fr, nl.
+    + cross_lingual to test language agnostic matching
+
+    The dataset contains:
+    - ``queries``: 200 search queries
+    - ``briefs``: 200 project briefs (title + description)
+    - ``profiles``: 4019 candidate profiles to rank each containing:
+        - a headline
+        - a language
+        - a description
+        - a list of skills and soft_skills
+        - a list of experience:
+            - with a title
+            - a description
+            - a list of skills
+    - Skill matching fit ``brief_scores`` 200 * 4019 and ``query_score`` 200 * 4019
+
+    Each query or brief and profile pair are scored. The score indicates the skill matching fit.
+    A threshold is applied on the score to binarize the interactions into relevant and non-relevant pairs.
+    """
+
+    SUPPORTED_DATASET_LANGUAGES = [
+        Language.DE,
+        Language.EN,
+        Language.ES,
+        Language.FR,
+        Language.NL,
+        Language.CROSS,
+    ]
+
+    RELEVANCE_SCORE_THRESHOLD = 0.8
+
+    @property
+    def default_metrics(self) -> list[str]:
+        return ["map", "rp@5", "rp@10", "mrr"]
+
+    @property
+    def task_group(self) -> RankingTaskGroup:
+        """Job Title Similarity task group."""
+        return RankingTaskGroup.JOBSIM
+
+    @property
+    def supported_query_languages(self) -> list[Language]:
+        """Supported query languages."""
+        return [Language.EN]
+
+    @property
+    def supported_target_languages(self) -> list[Language]:
+        """Supported target languages."""
+        return self.SUPPORTED_DATASET_LANGUAGES
+
+    @property
+    def split_test_fraction(self) -> float:
+        """Fraction of data to use for test split."""
+        return 1.0
+
+    @property
+    def label_type(self) -> LabelType:
+        """Multi-label ranking for project-candidate skill job fit."""
+        return LabelType.MULTI_LABEL
+
+    @property
+    def target_input_type(self) -> ModelInputType:
+        """Target input type for profiles."""
+        return ModelInputType.PROFILE_STRING
+
+    @staticmethod
+    def _candidate_profile_to_str(candidate: dict[str, Any]) -> str:
+        experiences = "\n".join(
+            f"{exp['title']}\n{exp['description']}\n{', '.join(exp['skills'])}"
+            for exp in candidate["experiences"]
+        )
+        return (
+            f"{candidate['headline']}\n\n"
+            f"Bio:\n{candidate['description']}\n\n"
+            f"Skills:\n{','.join(candidate['skills'] + candidate['soft_skills'])}\n\n"
+            f"Experiences:\n{experiences}"
+        )
+
+    @staticmethod
+    @abstractmethod
+    def _input_to_str(query: dict[str, str]) -> str:
+        pass
+
+    def _load_and_format_data(
+        self,
+        split: DatasetSplit,
+        language: Language,
+        query_key: str,
+        score_key: str,
+        query_id_column: str,
+    ) -> RankingDataset:
+        if split != DatasetSplit.TEST:
+            raise ValueError(f"Split '{split}' not supported. Use TEST")
+
+        if language not in self.SUPPORTED_DATASET_LANGUAGES:
+            raise ValueError(f"Language '{language}' not supported.")
+
+        query_df = pd.DataFrame(
+            load_dataset("MaltCompany/Freelancer-Project-Matching", query_key)["test"]
+        )
+        candidate_df = pd.DataFrame(
+            load_dataset("MaltCompany/Freelancer-Project-Matching", "profiles")["test"]
+        )
+        score_df = pd.DataFrame(
+            load_dataset("MaltCompany/Freelancer-Project-Matching", score_key)["test"]
+        )
+
+        if language != Language.CROSS:
+            candidate_df = candidate_df[candidate_df["language"] == language.value]
+
+        # create labels
+        candidate_df = (
+            candidate_df.reset_index(drop=True).reset_index(names="idx").sort_values("idx")
+        )
+        # add labels to scores
+        score_df = candidate_df[["profile_id", "idx"]].merge(score_df, how="left", on="profile_id")
+
+        # threshold score and keep relevancy labels
+        score_df = (
+            score_df.sort_values("idx")
+            .groupby(by=query_id_column)
+            .apply(
+                lambda group: list(group[group["score"] >= self.RELEVANCE_SCORE_THRESHOLD]["idx"]),
+                include_groups=False,
+            )
+            .reset_index(name="relevancy_labels")
+        )
+
+        # make sure inputs coincide and process documents' strings
+        relevancy_labels = score_df.sort_values(query_id_column)["relevancy_labels"].tolist()
+        queries = [
+            self._input_to_str(q)
+            for q in query_df.sort_values(query_id_column).to_dict(orient="records")
+        ]
+        corpus = [self._candidate_profile_to_str(p) for p in candidate_df.to_dict(orient="records")]
+
+        return RankingDataset(queries, relevancy_labels, corpus, language=language)
+
+    @property
+    def citation(self) -> str:
+        """Job Title Similarity task citation."""
+        return """
+@article{jouanneau2024skill,
+    title={Skill matching at scale: freelancer-project alignment for efficient multilingual candidate retrieval},
+    author={Jouanneau, Warren and Palyart, Marc and Jouffroy, Emma},
+    booktitle = {Proceedings of the 4th Workshop on Recommender Systems for Human Resources
+      (RecSys in {HR} 2024), in conjunction with the 18th {ACM} Conference on
+      Recommender Systems},
+    year={2024}
+}
+@article{jouanneau2025efficient,
+  title={An Efficient Long-Context Ranking Architecture With Calibrated LLM Distillation: Application to Person-Job Fit},
+  author={Jouanneau, Warren and Jouffroy, Emma and Palyart, Marc},
+  booktitle = {Proceedings of the 5th Workshop on Recommender Systems for Human Resources
+      (RecSys in {HR} 2025), in conjunction with the 19th {ACM} Conference on
+      Recommender Systems},
+  year={2025}
+}
+"""
+
+
+@register_task()
+class ProjectCandidateRanking(_BaseCandidateRanking):
+    """
+    Project brief Freelancer candidate ranking task based on Jouanneau et al. (2024) and Jouanneau et al. (2025).
+
+    This task evaluates a model's ability to rank freelancer candidates (based on their profile) by semantic similarity
+    (skill matching fit) to projects briefs. Given a query job title, the model must rank corpus job titles such
+    that semantically similar ones appear higher than non-similar ones.
+
+    Notes
+    -----
+    HuggingFace Dataset: https://huggingface.co/datasets/MaltCompany/Freelancer-Project-Matching
+
+    Languages: en, de, es, fr, nl.
+    + cross_lingual to test language agnostic matching
+
+    The dataset contains:
+        - ``briefs``: 200 project briefs (title + description)
+        - ``profiles``: 4019 candidate profiles to rank each containing:
+            - a headline
+            - a language
+            - a description
+            - a list of skills and soft_skills
+            - a list of experience:
+                - with a title
+                - a description
+                - a list of skills
+        - Skill matching fit ``brief_scores`` 200 * 4019
+
+    Each brief and profile pair are scored. The score indicates the skill matching fit.
+    A threshold is applied on the score to binarize the interactions into relevant and non-relevant pairs.
+    """
+
+    @property
+    def name(self) -> str:
+        """Project candidate matching task name."""
+        return "Project-candidate matching"
+
+    @property
+    def description(self) -> str:
+        """Project candidate matching task description."""
+        return "Rank candidate's profile in a corpus based on their skill based job fit to query project briefs."
+
+    @property
+    def query_input_type(self) -> ModelInputType:
+        """Query input type for briefs."""
+        return ModelInputType.PROJECT_BRIEF_STRING
+
+    @staticmethod
+    def _input_to_str(input_dict: dict[str, str]) -> str:
+        return f"{input_dict['title']}\n\n{input_dict['description']}"
+
+    def load_monolingual_data(self, split: DatasetSplit, language: Language) -> RankingDataset:
+        """Load Job Title Similarity data from the HuggingFace dataset."""
+        return self._load_and_format_data(split, language, "briefs", "brief_scores", "brief_id")
+
+
+@register_task()
+class SearchQueryCandidateRanking(_BaseCandidateRanking):
+    """
+    Search query Freelancer candidate ranking task based on Jouanneau et al. (2024) and Jouanneau et al. (2025).
+
+    This task evaluates a model's ability to rank freelancer candidates (based on their profile) by semantic similarity
+    (skill matching fit) to search query. Given a query job title, the model must rank corpus job titles such
+    that semantically similar ones appear higher than non-similar ones.
+
+    Notes
+    -----
+    HuggingFace Dataset: https://huggingface.co/datasets/MaltCompany/Freelancer-Project-Matching
+
+    Languages: en, de, es, fr, nl.
+    + cross_lingual to test language agnostic matching
+
+    The dataset contains:
+        - ``queries``: 200 search queries
+        - ``profiles``: 4019 candidate profiles to rank each containing:
+            - a headline
+            - a language
+            - a description
+            - a list of skills and soft_skills
+            - a list of experience:
+                - with a title
+                - a description
+                - a list of skills
+        - Skill matching fit ``query_score`` 200 * 4019
+
+    Each query and profile pair are scored. The score indicates the skill matching fit.
+    A threshold is applied on the score to binarize the interactions into relevant and non-relevant pairs.
+    """
+
+    @property
+    def name(self) -> str:
+        """Project candidate matching task name."""
+        return "Project-candidate matching"
+
+    @property
+    def description(self) -> str:
+        """Project candidate matching task description."""
+        return "Rank candidate's profile in a corpus based on their skill based job fit to a search query."
+
+    @property
+    def query_input_type(self) -> ModelInputType:
+        """Query input type for briefs."""
+        return ModelInputType.SEARCH_QUERY_STRING
+
+    @staticmethod
+    def _input_to_str(input_dict: dict[str, str]) -> str:
+        return input_dict["value"]
+
+    def load_monolingual_data(self, split: DatasetSplit, language: Language) -> RankingDataset:
+        """Load Job Title Similarity data from the HuggingFace dataset."""
+        return self._load_and_format_data(split, language, "queries", "query_scores", "query_id")
diff --git a/src/workrb/types.py b/src/workrb/types.py
index 8acedc6..1ae5e83 100644
--- a/src/workrb/types.py
+++ b/src/workrb/types.py
@@ -37,6 +37,7 @@ class Language(str, Enum):
     JA = "ja"
     KO = "ko"
     ZH = "zh"
+    CROSS = "cross_lingual"
 
 
 class LabelType(str, Enum):
@@ -74,3 +75,18 @@ class ModelInputType(str, Enum):
     Sentence describing or containing a skill.
     For example, a skill description or job ad sentence.
     """
+
+    PROJECT_BRIEF_STRING = "brief_string"
+    """
+    A few sentence describing a project
+    """
+
+    SEARCH_QUERY_STRING = "search_query_string"
+    """
+    A few input word to search for a profile
+    """
+
+    PROFILE_STRING = "profile_string"
+    """
+    A few sentences describing an applicant profile
+    """
diff --git a/tests/test_freelancer_project_matching.py b/tests/test_freelancer_project_matching.py
new file mode 100644
index 0000000..7ea7c10
--- /dev/null
+++ b/tests/test_freelancer_project_matching.py
@@ -0,0 +1,19 @@
+import workrb
+from workrb.tasks.abstract.base import Language
+
+
+def test_freelancer_project_ranking_task_loads():
+    """Test that task loads without errors"""
+    task = workrb.tasks.ProjectCandidateRanking(split="test", languages=[Language.EN.value])
+    dataset = task.lang_datasets[Language.EN]
+
+    assert len(dataset.query_texts) > 0
+    assert len(dataset.target_space) > 0
+    assert len(dataset.target_indices) == len(dataset.query_texts)
+
+    task = workrb.tasks.SearchQueryCandidateRanking(split="test", languages=[Language.EN.value])
+    dataset = task.lang_datasets[Language.EN]
+
+    assert len(dataset.query_texts) > 0
+    assert len(dataset.target_space) > 0
+    assert len(dataset.target_indices) == len(dataset.query_texts)

From aae9d2d34fe151896d304fef3a96672c67278f11 Mon Sep 17 00:00:00 2001
From: "warren.jouanneau" <warren.jouanneau@malt.com>
Date: Fri, 20 Feb 2026 10:23:49 +0100
Subject: [PATCH 2/2] docs: added comments and doc (freelancer candidate
 matching)

---
 README.md                                     | 30 +++++-----
 .../ranking/freelancer_project_matching.py    |  8 +--
 src/workrb/types.py                           | 58 +++++++++++++++++--
 3 files changed, 74 insertions(+), 22 deletions(-)

diff --git a/README.md b/README.md
index 655f020..da4a7c3 100644
--- a/README.md
+++ b/README.md
@@ -201,20 +201,22 @@ lang_result_ci = summary["mean_per_language/en/f1_macro/ci_margin"]
 ## Supported tasks & models
 
 ### Tasks
-| Task Name | Label Type | Dataset Size (English) | Languages |
-| --- | --- | --- | --- |
-| **Ranking** 
-| Job to Skills WorkBench | multi_label | 3039 queries x 13939 targets | 28 |
-| Job Title Similarity | multi_label | 105 queries x 2619 targets | 11 |
-| Job Normalization | single_label | 15463 queries x 2942 targets | 28 |
-| Skill to Job WorkBench | multi_label | 13492 queries x 3039 targets | 28 |
-| Skill Extraction House | multi_label | 262 queries x 13891 targets | 28 |
-| Skill Extraction Tech | multi_label | 338 queries x 13891 targets | 28 |
-| Skill Extraction SkillSkape | multi_label | 1191 queries x 13891 targets | 28 |
-| Skill Similarity SkillMatch-1K | single_label | 900 queries x 2648 targets | 1 |
-| Skill Normalization ESCO | multi_label | 72008 queries x 13939 targets | 28 |
-| **Classification**
-| Job-Skill Classification | multi_label | 3039 samples, 13939 classes | 28  |
+| Task Name                      | Label Type | Dataset Size (English)              | Languages |
+|--------------------------------| --- |-------------------------------------|-----------|
+| **Ranking**                    
+| Job to Skills WorkBench        | multi_label | 3039 queries x 13939 targets        | 28        |
+| Job Title Similarity           | multi_label | 105 queries x 2619 targets          | 11        |
+| Job Normalization              | single_label | 15463 queries x 2942 targets        | 28        |
+| Skill to Job WorkBench         | multi_label | 13492 queries x 3039 targets        | 28        |
+| Skill Extraction House         | multi_label | 262 queries x 13891 targets         | 28        |
+| Skill Extraction Tech          | multi_label | 338 queries x 13891 targets         | 28        |
+| Skill Extraction SkillSkape    | multi_label | 1191 queries x 13891 targets        | 28        |
+| Skill Similarity SkillMatch-1K | single_label | 900 queries x 2648 targets          | 1         |
+| Skill Normalization ESCO       | multi_label | 72008 queries x 13939 targets       | 28        |
+| Query-Candidate Matching       | multi_label | 200 queries x 4019 (x-lang) targets | 5         |
+| Project-Candidate Matching     | multi_label | 200 queries x 4019 (x-lang) targets | 5         |
+| **Classification**             
+| Job-Skill Classification       | multi_label | 3039 samples, 13939 classes         | 28        |
 
 
 ### Models
diff --git a/src/workrb/tasks/ranking/freelancer_project_matching.py b/src/workrb/tasks/ranking/freelancer_project_matching.py
index 697201e..0d6d69e 100644
--- a/src/workrb/tasks/ranking/freelancer_project_matching.py
+++ b/src/workrb/tasks/ranking/freelancer_project_matching.py
@@ -89,11 +89,11 @@ def label_type(self) -> LabelType:
     @property
     def target_input_type(self) -> ModelInputType:
         """Target input type for profiles."""
-        return ModelInputType.PROFILE_STRING
+        return ModelInputType.CANDIDATE_PROFILE_STRING
 
     @staticmethod
     def _candidate_profile_to_str(candidate: dict[str, Any]) -> str:
-        experiences = "\n".join(
+        experiences = "\n\n".join(
             f"{exp['title']}\n{exp['description']}\n{', '.join(exp['skills'])}"
             for exp in candidate["experiences"]
         )
@@ -223,7 +223,7 @@ class ProjectCandidateRanking(_BaseCandidateRanking):
     @property
     def name(self) -> str:
         """Project candidate matching task name."""
-        return "Project-candidate matching"
+        return "Project-Candidate Matching"
 
     @property
     def description(self) -> str:
@@ -280,7 +280,7 @@ class SearchQueryCandidateRanking(_BaseCandidateRanking):
     @property
     def name(self) -> str:
         """Project candidate matching task name."""
-        return "Project-candidate matching"
+        return "Query-Candidate Matching"
 
     @property
     def description(self) -> str:
diff --git a/src/workrb/types.py b/src/workrb/types.py
index 1ae5e83..2ac3068 100644
--- a/src/workrb/types.py
+++ b/src/workrb/types.py
@@ -78,15 +78,65 @@ class ModelInputType(str, Enum):
 
     PROJECT_BRIEF_STRING = "brief_string"
     """
-    A few sentence describing a project
+    Full natural-language description of a freelance or consulting opportunity.
+
+    Context:
+        Used as the primary input to candidate-matching pipelines.
+        Represents a concrete project (not a permanent job position).
+
+    Content:
+        Typically includes business context, scope, required skills,
+        expectations, and optional soft requirements.
+
+    Example:
+        Lead Dev for Greenfield B2B Material Platform (Next.js/GraphQL)
+
+        We are Architech Innovations, a scale-up in the AEC tech space. ...
+        We are building a greenfield MVP from the ground up. ...
+        We are looking for a developer who have a proven track record ...
+        skills: Strong decision-making, ...
     """
 
     SEARCH_QUERY_STRING = "search_query_string"
     """
-    A few input word to search for a profile
+    Short keyword-based query used to retrieve candidate profiles.
+
+    Context:
+        Used when a full project brief is not available, or as a
+        lightweight input to narrow down candidates before deeper matching.
+
+    Content:
+        Minimal, high-signal keywords describing a role, skillset,
+        or professional focus.
+
+    Example:
+        IT financial controller
     """
 
-    PROFILE_STRING = "profile_string"
+    CANDIDATE_PROFILE_STRING = "candidate_profile_string"
     """
-    A few sentences describing an applicant profile
+    Structured natural-language summary of a candidate’s professional background.
+
+    Context:
+        Retrieved from the candidate database and evaluated for relevance
+        against PROJECT_BRIEF_TEXT or SEARCH_QUERY_TEXT.
+
+    Content:
+        Include title, bio, skills, experience history.
+
+    Example:
+        Expert Shopify Developer
+
+        Bio:
+        I build and optimize Shopify Plus environments, ...
+
+        Skills:
+        Shopify Plus,Headless Commerce, ...
+
+        Experiences:
+        Lead Shopify Developer
+        I led the development of a headless e-commerce site ...
+        Shopify Plus, ...
+
+        ...
     """