Skip to content

Commit 0c0f87b

Browse files
authored
Merge pull request #1217 from DaanVanVugt/feature/parse_audience
parse audience based on presence in description
2 parents 18a4f6e + e96d350 commit 0c0f87b

19 files changed

+49
-0
lines changed

lib/ingestors/event_ingestion.rb

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,37 @@ def convert_location(input)
2222
input
2323
end
2424

25+
def parse_audience(description)
26+
audience_mapping = {
27+
'post-docs': 'researchers',
28+
"PhD's candidate": 'researchers',
29+
'PhD student': 'researchers',
30+
'principal investigator': 'researchers',
31+
'professor': 'researchers',
32+
'scientist': 'researchers',
33+
'library staff': 'research support staff',
34+
'research librarian': 'research support staff',
35+
'information specialist': 'research support staff',
36+
'archivist': 'research support staff',
37+
'repository manager': 'research support staff',
38+
'data steward': 'research support staff',
39+
'data manager': 'research support staff',
40+
'data professional': 'research support staff',
41+
'data engineer': 'research support staff',
42+
'software engineer': 'research support staff',
43+
'data librarian': 'research support staff',
44+
'bachelor': 'students',
45+
'master': 'students',
46+
'teacher': 'trainers',
47+
'coaches': 'trainers',
48+
'educator': 'trainers',
49+
}
50+
audience_mapping
51+
.select{ |key, val| description.downcase.include?(key.to_s.downcase) }
52+
.values
53+
.uniq
54+
end
55+
2556
def parse_dates(input, timezone = nil)
2657
Time.use_zone(timezone) do
2758
# try to split on obvious interval markers

lib/ingestors/taxila/dans_ingestor.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ def process_dans(url)
6464

6565
event.source = 'DANS'
6666
event.timezone = 'Amsterdam'
67+
event.target_audience = parse_audience(event.description)
6768

6869
add_event(event)
6970
rescue Exception => e

lib/ingestors/taxila/dtls_ingestor.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ def process_dtls(url)
7272
event.set_default_times
7373
event.source = 'DTL'
7474
event.timezone = 'Amsterdam'
75+
event.target_audience = parse_audience(event.description)
7576
add_event(event)
7677
rescue Exception => e
7778
@messages << "Extract event fields failed with: #{e.message}"

lib/ingestors/taxila/han_ingestor.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ def process_han(_url)
5050
event.venue = "#{venue_super_css.text} #{venue_sub_css.text}"
5151
event.source = "HAN"
5252
event.timezone = 'Amsterdam'
53+
event.target_audience = parse_audience(event.description)
5354

5455
add_event(event)
5556
rescue Exception => e

lib/ingestors/taxila/lcrdm_ingestor.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ def process_lcrdm(url)
5858
event.source = 'LCRDM'
5959
event.timezone = 'Amsterdam'
6060
event.set_default_times
61+
event.target_audience = parse_audience(event.description)
6162

6263
add_event(event)
6364
rescue Exception => e

lib/ingestors/taxila/leiden_ingestor.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ def process_leiden(url)
8383
# does TeSS support that?
8484

8585
event.source = 'Universiteit Leiden'
86+
event.target_audience = parse_audience(event.description)
8687

8788
add_event(event)
8889
rescue Exception => e

lib/ingestors/taxila/maastricht_ingestor.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ def process_maastricht(url)
5858
event.timezone = 'Europe/Amsterdam' # how to get this from Icalendar Event object?
5959

6060
event.source = 'Maastricht University'
61+
event.target_audience = parse_audience(event.description)
6162

6263
add_event(event)
6364
rescue Exception => e

lib/ingestors/taxila/nwo_ingestor.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ def process_nwo(url)
4747
event.url = "https://www.nwo.nl#{event_data.css('h3.card__title > a').attribute('href').value}"
4848

4949
event.source = 'NWO'
50+
event.target_audience = parse_audience(event.description)
5051

5152
add_event(event)
5253
rescue Exception => e

lib/ingestors/taxila/odissei_ingestor.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ def process_odissei(_url)
5959
event.source = 'ODISSEI'
6060
event.timezone = 'Amsterdam'
6161
event.set_default_times
62+
event.target_audience = parse_audience(event.description)
6263
add_event(event)
6364
rescue Exception => e
6465
@messages << "Extract event fields failed with: #{e.message}"

lib/ingestors/taxila/oscd_ingestor.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ def process_oscd(url)
5757
end
5858
if div&.next_sibling&.next_sibling.nil? || (div&.next_sibling&.next_sibling&.name == 'h1')
5959
event.set_default_times
60+
event.target_audience = parse_audience(event.description)
6061
add_event(event)
6162
end
6263
end

0 commit comments

Comments
 (0)