Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 16 additions & 14 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -201,20 +201,22 @@ lang_result_ci = summary["mean_per_language/en/f1_macro/ci_margin"]
## Supported tasks & models

### Tasks
| Task Name | Label Type | Dataset Size (English) | Languages |
| --- | --- | --- | --- |
| **Ranking**
| Job to Skills WorkBench | multi_label | 3039 queries x 13939 targets | 28 |
| Job Title Similarity | multi_label | 105 queries x 2619 targets | 11 |
| Job Normalization | single_label | 15463 queries x 2942 targets | 28 |
| Skill to Job WorkBench | multi_label | 13492 queries x 3039 targets | 28 |
| Skill Extraction House | multi_label | 262 queries x 13891 targets | 28 |
| Skill Extraction Tech | multi_label | 338 queries x 13891 targets | 28 |
| Skill Extraction SkillSkape | multi_label | 1191 queries x 13891 targets | 28 |
| Skill Similarity SkillMatch-1K | single_label | 900 queries x 2648 targets | 1 |
| Skill Normalization ESCO | multi_label | 72008 queries x 13939 targets | 28 |
| **Classification**
| Job-Skill Classification | multi_label | 3039 samples, 13939 classes | 28 |
| Task Name | Label Type | Dataset Size (English) | Languages |
|--------------------------------| --- |-------------------------------------|-----------|
| **Ranking**
| Job to Skills WorkBench | multi_label | 3039 queries x 13939 targets | 28 |
| Job Title Similarity | multi_label | 105 queries x 2619 targets | 11 |
| Job Normalization | single_label | 15463 queries x 2942 targets | 28 |
| Skill to Job WorkBench | multi_label | 13492 queries x 3039 targets | 28 |
| Skill Extraction House | multi_label | 262 queries x 13891 targets | 28 |
| Skill Extraction Tech | multi_label | 338 queries x 13891 targets | 28 |
| Skill Extraction SkillSkape | multi_label | 1191 queries x 13891 targets | 28 |
| Skill Similarity SkillMatch-1K | single_label | 900 queries x 2648 targets | 1 |
| Skill Normalization ESCO | multi_label | 72008 queries x 13939 targets | 28 |
| Query-Candidate Matching | multi_label | 200 queries x 4019 (x-lang) targets | 5 |
| Project-Candidate Matching | multi_label | 200 queries x 4019 (x-lang) targets | 5 |
| **Classification**
| Job-Skill Classification | multi_label | 3039 samples, 13939 classes | 28 |


### Models
Expand Down
6 changes: 6 additions & 0 deletions src/workrb/tasks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@

# Task implementations
from .classification.job2skill import ESCOJob2SkillClassification
from .ranking.freelancer_project_matching import (
ProjectCandidateRanking,
SearchQueryCandidateRanking,
)
from .ranking.job2skill import ESCOJob2SkillRanking
from .ranking.job_similarity import JobTitleSimilarityRanking
from .ranking.jobnorm import JobBERTJobNormRanking
Expand Down Expand Up @@ -39,4 +43,6 @@
"TechSkillExtractRanking",
"SkillSkapeExtractRanking",
"SkillMatch1kSkillSimilarityRanking",
"ProjectCandidateRanking",
"SearchQueryCandidateRanking",
]
6 changes: 6 additions & 0 deletions src/workrb/tasks/ranking/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
- Minimize external dependencies
"""

from workrb.tasks.ranking.freelancer_project_matching import (
ProjectCandidateRanking,
SearchQueryCandidateRanking,
)
from workrb.tasks.ranking.job2skill import ESCOJob2SkillRanking
from workrb.tasks.ranking.job_similarity import JobTitleSimilarityRanking
from workrb.tasks.ranking.jobnorm import JobBERTJobNormRanking
Expand All @@ -26,6 +30,8 @@
"HouseSkillExtractRanking",
"JobBERTJobNormRanking",
"JobTitleSimilarityRanking",
"ProjectCandidateRanking",
"SearchQueryCandidateRanking",
"SkillMatch1kSkillSimilarityRanking",
"SkillSkapeExtractRanking",
"TechSkillExtractRanking",
Expand Down
301 changes: 301 additions & 0 deletions src/workrb/tasks/ranking/freelancer_project_matching.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,301 @@
"""Project brief Freelancer candidate ranking task using a synthetic dataset provided by Malt."""

from abc import ABC, abstractmethod
from typing import Any

import pandas as pd
from datasets import load_dataset

from workrb.registry import register_task
from workrb.tasks.abstract.base import DatasetSplit, LabelType, Language
from workrb.tasks.abstract.ranking_base import RankingDataset, RankingTask, RankingTaskGroup
from workrb.types import ModelInputType


class _BaseCandidateRanking(RankingTask, ABC):
"""
Project brief Freelancer candidate ranking task based on Jouanneau et al. (2024) and Jouanneau et al. (2025).

This task evaluates a model's ability to rank freelancer candidates based on their profile
by semantic similarity (skill matching fit) to search query or project brief.
Given a query job title, the model must rank corpus job titles such that semantically similar ones
appear higher than non-similar ones.

Notes
-----
HuggingFace Dataset: https://huggingface.co/datasets/MaltCompany/Freelancer-Project-Matching

Languages: en, de, es, fr, nl.
+ cross_lingual to test language agnostic matching

The dataset contains:
- ``queries``: 200 search queries
- ``briefs``: 200 project briefs (title + description)
- ``profiles``: 4019 candidate profiles to rank each containing:
- a headline
- a language
- a description
- a list of skills and soft_skills
- a list of experience:
- with a title
- a description
- a list of skills
- Skill matching fit ``brief_scores`` 200 * 4019 and ``query_score`` 200 * 4019

Each query or brief and profile pair are scored. The score indicates the skill matching fit.
A threshold is applied on the score to binarize the interactions into relevant and non-relevant pairs.
"""

SUPPORTED_DATASET_LANGUAGES = [
Language.DE,
Language.EN,
Language.ES,
Language.FR,
Language.NL,
Language.CROSS,
]

RELEVANCE_SCORE_THRESHOLD = 0.8

@property
def default_metrics(self) -> list[str]:
return ["map", "rp@5", "rp@10", "mrr"]

@property
def task_group(self) -> RankingTaskGroup:
"""Job Title Similarity task group."""
return RankingTaskGroup.JOBSIM

@property
def supported_query_languages(self) -> list[Language]:
"""Supported query languages."""
return [Language.EN]

@property
def supported_target_languages(self) -> list[Language]:
"""Supported target languages."""
return self.SUPPORTED_DATASET_LANGUAGES

@property
def split_test_fraction(self) -> float:
"""Fraction of data to use for test split."""
return 1.0

@property
def label_type(self) -> LabelType:
"""Multi-label ranking for project-candidate skill job fit."""
return LabelType.MULTI_LABEL

@property
def target_input_type(self) -> ModelInputType:
"""Target input type for profiles."""
return ModelInputType.CANDIDATE_PROFILE_STRING

@staticmethod
def _candidate_profile_to_str(candidate: dict[str, Any]) -> str:
experiences = "\n\n".join(
f"{exp['title']}\n{exp['description']}\n{', '.join(exp['skills'])}"
for exp in candidate["experiences"]
)
return (
f"{candidate['headline']}\n\n"
f"Bio:\n{candidate['description']}\n\n"
f"Skills:\n{','.join(candidate['skills'] + candidate['soft_skills'])}\n\n"
f"Experiences:\n{experiences}"
)

@staticmethod
@abstractmethod
def _input_to_str(query: dict[str, str]) -> str:
pass

def _load_and_format_data(
self,
split: DatasetSplit,
language: Language,
query_key: str,
score_key: str,
query_id_column: str,
) -> RankingDataset:
if split != DatasetSplit.TEST:
raise ValueError(f"Split '{split}' not supported. Use TEST")

if language not in self.SUPPORTED_DATASET_LANGUAGES:
raise ValueError(f"Language '{language}' not supported.")

query_df = pd.DataFrame(
load_dataset("MaltCompany/Freelancer-Project-Matching", query_key)["test"]
)
candidate_df = pd.DataFrame(
load_dataset("MaltCompany/Freelancer-Project-Matching", "profiles")["test"]
)
score_df = pd.DataFrame(
load_dataset("MaltCompany/Freelancer-Project-Matching", score_key)["test"]
)

if language != Language.CROSS:
candidate_df = candidate_df[candidate_df["language"] == language.value]

# create labels
candidate_df = (
candidate_df.reset_index(drop=True).reset_index(names="idx").sort_values("idx")
)
# add labels to scores
score_df = candidate_df[["profile_id", "idx"]].merge(score_df, how="left", on="profile_id")

# threshold score and keep relevancy labels
score_df = (
score_df.sort_values("idx")
.groupby(by=query_id_column)
.apply(
lambda group: list(group[group["score"] >= self.RELEVANCE_SCORE_THRESHOLD]["idx"]),
include_groups=False,
)
.reset_index(name="relevancy_labels")
)

# make sure inputs coincide and process documents' strings
relevancy_labels = score_df.sort_values(query_id_column)["relevancy_labels"].tolist()
queries = [
self._input_to_str(q)
for q in query_df.sort_values(query_id_column).to_dict(orient="records")
]
corpus = [self._candidate_profile_to_str(p) for p in candidate_df.to_dict(orient="records")]

return RankingDataset(queries, relevancy_labels, corpus, language=language)

@property
def citation(self) -> str:
"""Job Title Similarity task citation."""
return """
@article{jouanneau2024skill,
title={Skill matching at scale: freelancer-project alignment for efficient multilingual candidate retrieval},
author={Jouanneau, Warren and Palyart, Marc and Jouffroy, Emma},
booktitle = {Proceedings of the 4th Workshop on Recommender Systems for Human Resources
(RecSys in {HR} 2024), in conjunction with the 18th {ACM} Conference on
Recommender Systems},
year={2024}
}
@article{jouanneau2025efficient,
title={An Efficient Long-Context Ranking Architecture With Calibrated LLM Distillation: Application to Person-Job Fit},
author={Jouanneau, Warren and Jouffroy, Emma and Palyart, Marc},
booktitle = {Proceedings of the 5th Workshop on Recommender Systems for Human Resources
(RecSys in {HR} 2025), in conjunction with the 19th {ACM} Conference on
Recommender Systems},
year={2025}
}
"""


@register_task()
class ProjectCandidateRanking(_BaseCandidateRanking):
"""
Project brief Freelancer candidate ranking task based on Jouanneau et al. (2024) and Jouanneau et al. (2025).

This task evaluates a model's ability to rank freelancer candidates (based on their profile) by semantic similarity
(skill matching fit) to projects briefs. Given a query job title, the model must rank corpus job titles such
that semantically similar ones appear higher than non-similar ones.

Notes
-----
HuggingFace Dataset: https://huggingface.co/datasets/MaltCompany/Freelancer-Project-Matching

Languages: en, de, es, fr, nl.
+ cross_lingual to test language agnostic matching

The dataset contains:
- ``briefs``: 200 project briefs (title + description)
- ``profiles``: 4019 candidate profiles to rank each containing:
- a headline
- a language
- a description
- a list of skills and soft_skills
- a list of experience:
- with a title
- a description
- a list of skills
- Skill matching fit ``brief_scores`` 200 * 4019

Each brief and profile pair are scored. The score indicates the skill matching fit.
A threshold is applied on the score to binarize the interactions into relevant and non-relevant pairs.
"""

@property
def name(self) -> str:
"""Project candidate matching task name."""
return "Project-Candidate Matching"

@property
def description(self) -> str:
"""Project candidate matching task description."""
return "Rank candidate's profile in a corpus based on their skill based job fit to query project briefs."

@property
def query_input_type(self) -> ModelInputType:
"""Query input type for briefs."""
return ModelInputType.PROJECT_BRIEF_STRING

@staticmethod
def _input_to_str(input_dict: dict[str, str]) -> str:
return f"{input_dict['title']}\n\n{input_dict['description']}"

def load_monolingual_data(self, split: DatasetSplit, language: Language) -> RankingDataset:
"""Load Job Title Similarity data from the HuggingFace dataset."""
return self._load_and_format_data(split, language, "briefs", "brief_scores", "brief_id")


@register_task()
class SearchQueryCandidateRanking(_BaseCandidateRanking):
"""
Search query Freelancer candidate ranking task based on Jouanneau et al. (2024) and Jouanneau et al. (2025).

This task evaluates a model's ability to rank freelancer candidates (based on their profile) by semantic similarity
(skill matching fit) to search query. Given a query job title, the model must rank corpus job titles such
that semantically similar ones appear higher than non-similar ones.

Notes
-----
HuggingFace Dataset: https://huggingface.co/datasets/MaltCompany/Freelancer-Project-Matching

Languages: en, de, es, fr, nl.
+ cross_lingual to test language agnostic matching

The dataset contains:
- ``queries``: 200 search queries
- ``profiles``: 4019 candidate profiles to rank each containing:
- a headline
- a language
- a description
- a list of skills and soft_skills
- a list of experience:
- with a title
- a description
- a list of skills
- Skill matching fit ``query_score`` 200 * 4019

Each query and profile pair are scored. The score indicates the skill matching fit.
A threshold is applied on the score to binarize the interactions into relevant and non-relevant pairs.
"""

@property
def name(self) -> str:
"""Project candidate matching task name."""
return "Query-Candidate Matching"

@property
def description(self) -> str:
"""Project candidate matching task description."""
return "Rank candidate's profile in a corpus based on their skill based job fit to a search query."

@property
def query_input_type(self) -> ModelInputType:
"""Query input type for briefs."""
return ModelInputType.SEARCH_QUERY_STRING

@staticmethod
def _input_to_str(input_dict: dict[str, str]) -> str:
return input_dict["value"]

def load_monolingual_data(self, split: DatasetSplit, language: Language) -> RankingDataset:
"""Load Job Title Similarity data from the HuggingFace dataset."""
return self._load_and_format_data(split, language, "queries", "query_scores", "query_id")
Loading