From 28676d845c95716583b29832ebe496d4a74d0641 Mon Sep 17 00:00:00 2001 From: "warren.jouanneau" Date: Tue, 17 Feb 2026 03:54:31 +0100 Subject: [PATCH 1/2] feat: freelancer project ranking --- src/workrb/tasks/__init__.py | 6 + src/workrb/tasks/ranking/__init__.py | 6 + .../ranking/freelancer_project_matching.py | 301 ++++++++++++++++++ src/workrb/types.py | 16 + tests/test_freelancer_project_matching.py | 19 ++ 5 files changed, 348 insertions(+) create mode 100644 src/workrb/tasks/ranking/freelancer_project_matching.py create mode 100644 tests/test_freelancer_project_matching.py diff --git a/src/workrb/tasks/__init__.py b/src/workrb/tasks/__init__.py index 7987260..8243711 100644 --- a/src/workrb/tasks/__init__.py +++ b/src/workrb/tasks/__init__.py @@ -8,6 +8,10 @@ # Task implementations from .classification.job2skill import ESCOJob2SkillClassification +from .ranking.freelancer_project_matching import ( + ProjectCandidateRanking, + SearchQueryCandidateRanking, +) from .ranking.job2skill import ESCOJob2SkillRanking from .ranking.job_similarity import JobTitleSimilarityRanking from .ranking.jobnorm import JobBERTJobNormRanking @@ -39,4 +43,6 @@ "TechSkillExtractRanking", "SkillSkapeExtractRanking", "SkillMatch1kSkillSimilarityRanking", + "ProjectCandidateRanking", + "SearchQueryCandidateRanking", ] diff --git a/src/workrb/tasks/ranking/__init__.py b/src/workrb/tasks/ranking/__init__.py index ce13977..8891e3d 100644 --- a/src/workrb/tasks/ranking/__init__.py +++ b/src/workrb/tasks/ranking/__init__.py @@ -7,6 +7,10 @@ - Minimize external dependencies """ +from workrb.tasks.ranking.freelancer_project_matching import ( + ProjectCandidateRanking, + SearchQueryCandidateRanking, +) from workrb.tasks.ranking.job2skill import ESCOJob2SkillRanking from workrb.tasks.ranking.job_similarity import JobTitleSimilarityRanking from workrb.tasks.ranking.jobnorm import JobBERTJobNormRanking @@ -26,6 +30,8 @@ "HouseSkillExtractRanking", "JobBERTJobNormRanking", "JobTitleSimilarityRanking", + "ProjectCandidateRanking", + "SearchQueryCandidateRanking", "SkillMatch1kSkillSimilarityRanking", "SkillSkapeExtractRanking", "TechSkillExtractRanking", diff --git a/src/workrb/tasks/ranking/freelancer_project_matching.py b/src/workrb/tasks/ranking/freelancer_project_matching.py new file mode 100644 index 0000000..697201e --- /dev/null +++ b/src/workrb/tasks/ranking/freelancer_project_matching.py @@ -0,0 +1,301 @@ +"""Project brief Freelancer candidate ranking task using a synthetic dataset provided by Malt.""" + +from abc import ABC, abstractmethod +from typing import Any + +import pandas as pd +from datasets import load_dataset + +from workrb.registry import register_task +from workrb.tasks.abstract.base import DatasetSplit, LabelType, Language +from workrb.tasks.abstract.ranking_base import RankingDataset, RankingTask, RankingTaskGroup +from workrb.types import ModelInputType + + +class _BaseCandidateRanking(RankingTask, ABC): + """ + Project brief Freelancer candidate ranking task based on Jouanneau et al. (2024) and Jouanneau et al. (2025). + + This task evaluates a model's ability to rank freelancer candidates based on their profile + by semantic similarity (skill matching fit) to search query or project brief. + Given a query job title, the model must rank corpus job titles such that semantically similar ones + appear higher than non-similar ones. + + Notes + ----- + HuggingFace Dataset: https://huggingface.co/datasets/MaltCompany/Freelancer-Project-Matching + + Languages: en, de, es, fr, nl. + + cross_lingual to test language agnostic matching + + The dataset contains: + - ``queries``: 200 search queries + - ``briefs``: 200 project briefs (title + description) + - ``profiles``: 4019 candidate profiles to rank each containing: + - a headline + - a language + - a description + - a list of skills and soft_skills + - a list of experience: + - with a title + - a description + - a list of skills + - Skill matching fit ``brief_scores`` 200 * 4019 and ``query_score`` 200 * 4019 + + Each query or brief and profile pair are scored. The score indicates the skill matching fit. + A threshold is applied on the score to binarize the interactions into relevant and non-relevant pairs. + """ + + SUPPORTED_DATASET_LANGUAGES = [ + Language.DE, + Language.EN, + Language.ES, + Language.FR, + Language.NL, + Language.CROSS, + ] + + RELEVANCE_SCORE_THRESHOLD = 0.8 + + @property + def default_metrics(self) -> list[str]: + return ["map", "rp@5", "rp@10", "mrr"] + + @property + def task_group(self) -> RankingTaskGroup: + """Job Title Similarity task group.""" + return RankingTaskGroup.JOBSIM + + @property + def supported_query_languages(self) -> list[Language]: + """Supported query languages.""" + return [Language.EN] + + @property + def supported_target_languages(self) -> list[Language]: + """Supported target languages.""" + return self.SUPPORTED_DATASET_LANGUAGES + + @property + def split_test_fraction(self) -> float: + """Fraction of data to use for test split.""" + return 1.0 + + @property + def label_type(self) -> LabelType: + """Multi-label ranking for project-candidate skill job fit.""" + return LabelType.MULTI_LABEL + + @property + def target_input_type(self) -> ModelInputType: + """Target input type for profiles.""" + return ModelInputType.PROFILE_STRING + + @staticmethod + def _candidate_profile_to_str(candidate: dict[str, Any]) -> str: + experiences = "\n".join( + f"{exp['title']}\n{exp['description']}\n{', '.join(exp['skills'])}" + for exp in candidate["experiences"] + ) + return ( + f"{candidate['headline']}\n\n" + f"Bio:\n{candidate['description']}\n\n" + f"Skills:\n{','.join(candidate['skills'] + candidate['soft_skills'])}\n\n" + f"Experiences:\n{experiences}" + ) + + @staticmethod + @abstractmethod + def _input_to_str(query: dict[str, str]) -> str: + pass + + def _load_and_format_data( + self, + split: DatasetSplit, + language: Language, + query_key: str, + score_key: str, + query_id_column: str, + ) -> RankingDataset: + if split != DatasetSplit.TEST: + raise ValueError(f"Split '{split}' not supported. Use TEST") + + if language not in self.SUPPORTED_DATASET_LANGUAGES: + raise ValueError(f"Language '{language}' not supported.") + + query_df = pd.DataFrame( + load_dataset("MaltCompany/Freelancer-Project-Matching", query_key)["test"] + ) + candidate_df = pd.DataFrame( + load_dataset("MaltCompany/Freelancer-Project-Matching", "profiles")["test"] + ) + score_df = pd.DataFrame( + load_dataset("MaltCompany/Freelancer-Project-Matching", score_key)["test"] + ) + + if language != Language.CROSS: + candidate_df = candidate_df[candidate_df["language"] == language.value] + + # create labels + candidate_df = ( + candidate_df.reset_index(drop=True).reset_index(names="idx").sort_values("idx") + ) + # add labels to scores + score_df = candidate_df[["profile_id", "idx"]].merge(score_df, how="left", on="profile_id") + + # threshold score and keep relevancy labels + score_df = ( + score_df.sort_values("idx") + .groupby(by=query_id_column) + .apply( + lambda group: list(group[group["score"] >= self.RELEVANCE_SCORE_THRESHOLD]["idx"]), + include_groups=False, + ) + .reset_index(name="relevancy_labels") + ) + + # make sure inputs coincide and process documents' strings + relevancy_labels = score_df.sort_values(query_id_column)["relevancy_labels"].tolist() + queries = [ + self._input_to_str(q) + for q in query_df.sort_values(query_id_column).to_dict(orient="records") + ] + corpus = [self._candidate_profile_to_str(p) for p in candidate_df.to_dict(orient="records")] + + return RankingDataset(queries, relevancy_labels, corpus, language=language) + + @property + def citation(self) -> str: + """Job Title Similarity task citation.""" + return """ +@article{jouanneau2024skill, + title={Skill matching at scale: freelancer-project alignment for efficient multilingual candidate retrieval}, + author={Jouanneau, Warren and Palyart, Marc and Jouffroy, Emma}, + booktitle = {Proceedings of the 4th Workshop on Recommender Systems for Human Resources + (RecSys in {HR} 2024), in conjunction with the 18th {ACM} Conference on + Recommender Systems}, + year={2024} +} +@article{jouanneau2025efficient, + title={An Efficient Long-Context Ranking Architecture With Calibrated LLM Distillation: Application to Person-Job Fit}, + author={Jouanneau, Warren and Jouffroy, Emma and Palyart, Marc}, + booktitle = {Proceedings of the 5th Workshop on Recommender Systems for Human Resources + (RecSys in {HR} 2025), in conjunction with the 19th {ACM} Conference on + Recommender Systems}, + year={2025} +} +""" + + +@register_task() +class ProjectCandidateRanking(_BaseCandidateRanking): + """ + Project brief Freelancer candidate ranking task based on Jouanneau et al. (2024) and Jouanneau et al. (2025). + + This task evaluates a model's ability to rank freelancer candidates (based on their profile) by semantic similarity + (skill matching fit) to projects briefs. Given a query job title, the model must rank corpus job titles such + that semantically similar ones appear higher than non-similar ones. + + Notes + ----- + HuggingFace Dataset: https://huggingface.co/datasets/MaltCompany/Freelancer-Project-Matching + + Languages: en, de, es, fr, nl. + + cross_lingual to test language agnostic matching + + The dataset contains: + - ``briefs``: 200 project briefs (title + description) + - ``profiles``: 4019 candidate profiles to rank each containing: + - a headline + - a language + - a description + - a list of skills and soft_skills + - a list of experience: + - with a title + - a description + - a list of skills + - Skill matching fit ``brief_scores`` 200 * 4019 + + Each brief and profile pair are scored. The score indicates the skill matching fit. + A threshold is applied on the score to binarize the interactions into relevant and non-relevant pairs. + """ + + @property + def name(self) -> str: + """Project candidate matching task name.""" + return "Project-candidate matching" + + @property + def description(self) -> str: + """Project candidate matching task description.""" + return "Rank candidate's profile in a corpus based on their skill based job fit to query project briefs." + + @property + def query_input_type(self) -> ModelInputType: + """Query input type for briefs.""" + return ModelInputType.PROJECT_BRIEF_STRING + + @staticmethod + def _input_to_str(input_dict: dict[str, str]) -> str: + return f"{input_dict['title']}\n\n{input_dict['description']}" + + def load_monolingual_data(self, split: DatasetSplit, language: Language) -> RankingDataset: + """Load Job Title Similarity data from the HuggingFace dataset.""" + return self._load_and_format_data(split, language, "briefs", "brief_scores", "brief_id") + + +@register_task() +class SearchQueryCandidateRanking(_BaseCandidateRanking): + """ + Search query Freelancer candidate ranking task based on Jouanneau et al. (2024) and Jouanneau et al. (2025). + + This task evaluates a model's ability to rank freelancer candidates (based on their profile) by semantic similarity + (skill matching fit) to search query. Given a query job title, the model must rank corpus job titles such + that semantically similar ones appear higher than non-similar ones. + + Notes + ----- + HuggingFace Dataset: https://huggingface.co/datasets/MaltCompany/Freelancer-Project-Matching + + Languages: en, de, es, fr, nl. + + cross_lingual to test language agnostic matching + + The dataset contains: + - ``queries``: 200 search queries + - ``profiles``: 4019 candidate profiles to rank each containing: + - a headline + - a language + - a description + - a list of skills and soft_skills + - a list of experience: + - with a title + - a description + - a list of skills + - Skill matching fit ``query_score`` 200 * 4019 + + Each query and profile pair are scored. The score indicates the skill matching fit. + A threshold is applied on the score to binarize the interactions into relevant and non-relevant pairs. + """ + + @property + def name(self) -> str: + """Project candidate matching task name.""" + return "Project-candidate matching" + + @property + def description(self) -> str: + """Project candidate matching task description.""" + return "Rank candidate's profile in a corpus based on their skill based job fit to a search query." + + @property + def query_input_type(self) -> ModelInputType: + """Query input type for briefs.""" + return ModelInputType.SEARCH_QUERY_STRING + + @staticmethod + def _input_to_str(input_dict: dict[str, str]) -> str: + return input_dict["value"] + + def load_monolingual_data(self, split: DatasetSplit, language: Language) -> RankingDataset: + """Load Job Title Similarity data from the HuggingFace dataset.""" + return self._load_and_format_data(split, language, "queries", "query_scores", "query_id") diff --git a/src/workrb/types.py b/src/workrb/types.py index 8acedc6..1ae5e83 100644 --- a/src/workrb/types.py +++ b/src/workrb/types.py @@ -37,6 +37,7 @@ class Language(str, Enum): JA = "ja" KO = "ko" ZH = "zh" + CROSS = "cross_lingual" class LabelType(str, Enum): @@ -74,3 +75,18 @@ class ModelInputType(str, Enum): Sentence describing or containing a skill. For example, a skill description or job ad sentence. """ + + PROJECT_BRIEF_STRING = "brief_string" + """ + A few sentence describing a project + """ + + SEARCH_QUERY_STRING = "search_query_string" + """ + A few input word to search for a profile + """ + + PROFILE_STRING = "profile_string" + """ + A few sentences describing an applicant profile + """ diff --git a/tests/test_freelancer_project_matching.py b/tests/test_freelancer_project_matching.py new file mode 100644 index 0000000..7ea7c10 --- /dev/null +++ b/tests/test_freelancer_project_matching.py @@ -0,0 +1,19 @@ +import workrb +from workrb.tasks.abstract.base import Language + + +def test_freelancer_project_ranking_task_loads(): + """Test that task loads without errors""" + task = workrb.tasks.ProjectCandidateRanking(split="test", languages=[Language.EN.value]) + dataset = task.lang_datasets[Language.EN] + + assert len(dataset.query_texts) > 0 + assert len(dataset.target_space) > 0 + assert len(dataset.target_indices) == len(dataset.query_texts) + + task = workrb.tasks.SearchQueryCandidateRanking(split="test", languages=[Language.EN.value]) + dataset = task.lang_datasets[Language.EN] + + assert len(dataset.query_texts) > 0 + assert len(dataset.target_space) > 0 + assert len(dataset.target_indices) == len(dataset.query_texts) From aae9d2d34fe151896d304fef3a96672c67278f11 Mon Sep 17 00:00:00 2001 From: "warren.jouanneau" Date: Fri, 20 Feb 2026 10:23:49 +0100 Subject: [PATCH 2/2] docs: added comments and doc (freelancer candidate matching) --- README.md | 30 +++++----- .../ranking/freelancer_project_matching.py | 8 +-- src/workrb/types.py | 58 +++++++++++++++++-- 3 files changed, 74 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index 655f020..da4a7c3 100644 --- a/README.md +++ b/README.md @@ -201,20 +201,22 @@ lang_result_ci = summary["mean_per_language/en/f1_macro/ci_margin"] ## Supported tasks & models ### Tasks -| Task Name | Label Type | Dataset Size (English) | Languages | -| --- | --- | --- | --- | -| **Ranking** -| Job to Skills WorkBench | multi_label | 3039 queries x 13939 targets | 28 | -| Job Title Similarity | multi_label | 105 queries x 2619 targets | 11 | -| Job Normalization | single_label | 15463 queries x 2942 targets | 28 | -| Skill to Job WorkBench | multi_label | 13492 queries x 3039 targets | 28 | -| Skill Extraction House | multi_label | 262 queries x 13891 targets | 28 | -| Skill Extraction Tech | multi_label | 338 queries x 13891 targets | 28 | -| Skill Extraction SkillSkape | multi_label | 1191 queries x 13891 targets | 28 | -| Skill Similarity SkillMatch-1K | single_label | 900 queries x 2648 targets | 1 | -| Skill Normalization ESCO | multi_label | 72008 queries x 13939 targets | 28 | -| **Classification** -| Job-Skill Classification | multi_label | 3039 samples, 13939 classes | 28 | +| Task Name | Label Type | Dataset Size (English) | Languages | +|--------------------------------| --- |-------------------------------------|-----------| +| **Ranking** +| Job to Skills WorkBench | multi_label | 3039 queries x 13939 targets | 28 | +| Job Title Similarity | multi_label | 105 queries x 2619 targets | 11 | +| Job Normalization | single_label | 15463 queries x 2942 targets | 28 | +| Skill to Job WorkBench | multi_label | 13492 queries x 3039 targets | 28 | +| Skill Extraction House | multi_label | 262 queries x 13891 targets | 28 | +| Skill Extraction Tech | multi_label | 338 queries x 13891 targets | 28 | +| Skill Extraction SkillSkape | multi_label | 1191 queries x 13891 targets | 28 | +| Skill Similarity SkillMatch-1K | single_label | 900 queries x 2648 targets | 1 | +| Skill Normalization ESCO | multi_label | 72008 queries x 13939 targets | 28 | +| Query-Candidate Matching | multi_label | 200 queries x 4019 (x-lang) targets | 5 | +| Project-Candidate Matching | multi_label | 200 queries x 4019 (x-lang) targets | 5 | +| **Classification** +| Job-Skill Classification | multi_label | 3039 samples, 13939 classes | 28 | ### Models diff --git a/src/workrb/tasks/ranking/freelancer_project_matching.py b/src/workrb/tasks/ranking/freelancer_project_matching.py index 697201e..0d6d69e 100644 --- a/src/workrb/tasks/ranking/freelancer_project_matching.py +++ b/src/workrb/tasks/ranking/freelancer_project_matching.py @@ -89,11 +89,11 @@ def label_type(self) -> LabelType: @property def target_input_type(self) -> ModelInputType: """Target input type for profiles.""" - return ModelInputType.PROFILE_STRING + return ModelInputType.CANDIDATE_PROFILE_STRING @staticmethod def _candidate_profile_to_str(candidate: dict[str, Any]) -> str: - experiences = "\n".join( + experiences = "\n\n".join( f"{exp['title']}\n{exp['description']}\n{', '.join(exp['skills'])}" for exp in candidate["experiences"] ) @@ -223,7 +223,7 @@ class ProjectCandidateRanking(_BaseCandidateRanking): @property def name(self) -> str: """Project candidate matching task name.""" - return "Project-candidate matching" + return "Project-Candidate Matching" @property def description(self) -> str: @@ -280,7 +280,7 @@ class SearchQueryCandidateRanking(_BaseCandidateRanking): @property def name(self) -> str: """Project candidate matching task name.""" - return "Project-candidate matching" + return "Query-Candidate Matching" @property def description(self) -> str: diff --git a/src/workrb/types.py b/src/workrb/types.py index 1ae5e83..2ac3068 100644 --- a/src/workrb/types.py +++ b/src/workrb/types.py @@ -78,15 +78,65 @@ class ModelInputType(str, Enum): PROJECT_BRIEF_STRING = "brief_string" """ - A few sentence describing a project + Full natural-language description of a freelance or consulting opportunity. + + Context: + Used as the primary input to candidate-matching pipelines. + Represents a concrete project (not a permanent job position). + + Content: + Typically includes business context, scope, required skills, + expectations, and optional soft requirements. + + Example: + Lead Dev for Greenfield B2B Material Platform (Next.js/GraphQL) + + We are Architech Innovations, a scale-up in the AEC tech space. ... + We are building a greenfield MVP from the ground up. ... + We are looking for a developer who have a proven track record ... + skills: Strong decision-making, ... """ SEARCH_QUERY_STRING = "search_query_string" """ - A few input word to search for a profile + Short keyword-based query used to retrieve candidate profiles. + + Context: + Used when a full project brief is not available, or as a + lightweight input to narrow down candidates before deeper matching. + + Content: + Minimal, high-signal keywords describing a role, skillset, + or professional focus. + + Example: + IT financial controller """ - PROFILE_STRING = "profile_string" + CANDIDATE_PROFILE_STRING = "candidate_profile_string" """ - A few sentences describing an applicant profile + Structured natural-language summary of a candidate’s professional background. + + Context: + Retrieved from the candidate database and evaluated for relevance + against PROJECT_BRIEF_TEXT or SEARCH_QUERY_TEXT. + + Content: + Include title, bio, skills, experience history. + + Example: + Expert Shopify Developer + + Bio: + I build and optimize Shopify Plus environments, ... + + Skills: + Shopify Plus,Headless Commerce, ... + + Experiences: + Lead Shopify Developer + I led the development of a headless e-commerce site ... + Shopify Plus, ... + + ... """