Skip to content

Commit 5f27df6

Browse files
authored
feat: add MELO and MELS datasets as ranking tasks (#37)
Introduce the MELO (Multilingual Entity Linking of Occupations) and MELS (Multilingual Entity Linking of Skills) benchmarks as new ranking tasks. MELO provides 42 evaluation datasets spanning 21 languages for job title normalization into ESCO, built from crosswalks between national occupation taxonomies and ESCO published by official EU labor organizations. MELS follows the same methodology but targets skill normalization, covering 5 languages with 8 datasets. - Add MELORanking task class with 42 datasets across 21 languages - Add MELSRanking task class with 8 datasets across 5 languages - Implement get_dataset_languages() for both tasks, supporting monolingual and cross-lingual dataset variants - Add Austria and Belgium datasets to MELO (6 additional dataset IDs) - Add unit tests for dataset ID filtering and language mapping - Update README and example scripts with new tasks
1 parent 5113982 commit 5f27df6

13 files changed

+758
-6
lines changed

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,12 +207,14 @@ lang_result_ci = summary["mean_per_language/en/f1_macro/ci_margin"]
207207
| Job to Skills WorkBench | multi_label | 3039 queries x 13939 targets | 28 |
208208
| Job Title Similarity | multi_label | 105 queries x 2619 targets | 11 |
209209
| Job Normalization | single_label | 15463 queries x 2942 targets | 28 |
210+
| Job Normalization MELO | multi_label | 633 queries x 33813 targets | 21 |
210211
| Skill to Job WorkBench | multi_label | 13492 queries x 3039 targets | 28 |
211212
| Skill Extraction House | multi_label | 262 queries x 13891 targets | 28 |
212213
| Skill Extraction Tech | multi_label | 338 queries x 13891 targets | 28 |
213214
| Skill Extraction SkillSkape | multi_label | 1191 queries x 13891 targets | 28 |
214215
| Skill Similarity SkillMatch-1K | single_label | 900 queries x 2648 targets | 1 |
215216
| Skill Normalization ESCO | multi_label | 72008 queries x 13939 targets | 28 |
217+
| Skill Normalization MELS | multi_label | 1722 queries x 19466 targets | 5 |
216218
| Query-Candidate Matching | multi_label | 200 queries x 4019 (x-lang) targets | 5 |
217219
| Project-Candidate Matching | multi_label | 200 queries x 4019 (x-lang) targets | 5 |
218220
| **Classification**

examples/run_benchmark_flat_average.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,8 @@
5959
# Tasks with monolingual, cross-lingual, and multilingual datasets
6060
workrb.tasks.ProjectCandidateRanking(split=split, languages=langs),
6161
workrb.tasks.SearchQueryCandidateRanking(split=split, languages=langs),
62-
# TODO: add MELO and MELS tasks when PR #37 is merged
62+
workrb.tasks.MELORanking(split=split, languages=langs),
63+
workrb.tasks.MELSRanking(split=split, languages=langs),
6364
]
6465

6566
# Evaluate

examples/run_benchmark_flat_average_all_langs.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,8 @@
4545
# Tasks with monolingual, cross-lingual, and multilingual datasets
4646
workrb.tasks.ProjectCandidateRanking(split=split, languages=langs),
4747
workrb.tasks.SearchQueryCandidateRanking(split=split, languages=langs),
48-
# TODO: add MELO and MELS tasks when PR #37 is merged
48+
workrb.tasks.MELORanking(split=split, languages=langs),
49+
workrb.tasks.MELSRanking(split=split, languages=langs),
4950
]
5051

5152
# Evaluate

examples/run_benchmark_language_weighted.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,8 @@
5656
# Tasks with monolingual, cross-lingual, and multilingual datasets
5757
workrb.tasks.ProjectCandidateRanking(split=split, languages=langs),
5858
workrb.tasks.SearchQueryCandidateRanking(split=split, languages=langs),
59-
# TODO: add MELO and MELS tasks when PR #37 is merged
59+
workrb.tasks.MELORanking(split=split, languages=langs),
60+
workrb.tasks.MELSRanking(split=split, languages=langs),
6061
]
6162

6263
# Evaluate

examples/run_benchmark_language_weighted_all_langs.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,8 @@
4242
# Tasks with monolingual, cross-lingual, and multilingual datasets
4343
workrb.tasks.ProjectCandidateRanking(split=split, languages=langs),
4444
workrb.tasks.SearchQueryCandidateRanking(split=split, languages=langs),
45-
# TODO: add MELO and MELS tasks when PR #37 is merged
45+
workrb.tasks.MELORanking(split=split, languages=langs),
46+
workrb.tasks.MELSRanking(split=split, languages=langs),
4647
]
4748

4849
# Evaluate

src/workrb/tasks/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
from .ranking.job2skill import ESCOJob2SkillRanking
1616
from .ranking.job_similarity import JobTitleSimilarityRanking
1717
from .ranking.jobnorm import JobBERTJobNormRanking
18+
from .ranking.melo import MELORanking
19+
from .ranking.mels import MELSRanking
1820
from .ranking.skill2job import ESCOSkill2JobRanking
1921
from .ranking.skill_extraction import (
2022
HouseSkillExtractRanking,
@@ -39,6 +41,8 @@
3941
"ESCOSkillNormRanking",
4042
"JobBERTJobNormRanking",
4143
"JobTitleSimilarityRanking",
44+
"MELORanking",
45+
"MELSRanking",
4246
"HouseSkillExtractRanking",
4347
"TechSkillExtractRanking",
4448
"SkillSkapeExtractRanking",

src/workrb/tasks/abstract/ranking_base.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ def __init__(
3838
target_indices: list[list[int]],
3939
target_space: list[str],
4040
dataset_id: str,
41+
allow_duplicate_queries: bool = True,
42+
allow_duplicate_targets: bool = False,
4143
):
4244
"""Initialize ranking dataset with validation.
4345
@@ -56,7 +58,7 @@ def __init__(
5658
self.target_indices = self._postprocess_indices(target_indices)
5759
self.target_space = self._postprocess_texts(target_space)
5860
self.dataset_id = dataset_id
59-
self.validate_dataset()
61+
self.validate_dataset(allow_duplicate_queries, allow_duplicate_targets)
6062

6163
def validate_dataset(
6264
self,

src/workrb/tasks/ranking/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
from workrb.tasks.ranking.job2skill import ESCOJob2SkillRanking
1515
from workrb.tasks.ranking.job_similarity import JobTitleSimilarityRanking
1616
from workrb.tasks.ranking.jobnorm import JobBERTJobNormRanking
17+
from workrb.tasks.ranking.melo import MELORanking
18+
from workrb.tasks.ranking.mels import MELSRanking
1719
from workrb.tasks.ranking.skill2job import ESCOSkill2JobRanking
1820
from workrb.tasks.ranking.skill_extraction import (
1921
HouseSkillExtractRanking,
@@ -30,6 +32,8 @@
3032
"HouseSkillExtractRanking",
3133
"JobBERTJobNormRanking",
3234
"JobTitleSimilarityRanking",
35+
"MELORanking",
36+
"MELSRanking",
3337
"ProjectCandidateRanking",
3438
"SearchQueryCandidateRanking",
3539
"SkillMatch1kSkillSimilarityRanking",

0 commit comments

Comments
 (0)