Skip to content

Commit 4889ec1

Browse files
committed
improved doi replacement
1 parent 1eefbf4 commit 4889ec1

File tree

1 file changed

+5
-3
lines changed

1 file changed

+5
-3
lines changed

server/workers/orcid/src/orcid_service.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -434,10 +434,12 @@ def enrich_metadata_with_base(self, params: Dict[str, str], metadata: pd.DataFra
434434
base_metadata = base_metadata.reindex(columns=required_fields)
435435

436436
#base_metadata = self._explode_merged_dois(base_metadata)
437-
base_metadata = base_metadata.explode('merged_dois', ignore_index=True) if 'merged_dois' in base_metadata.columns else base_metadata
437+
base_metadata['merged_dois'] = base_metadata['merged_dois'].apply(lambda x: x[0] if isinstance(x, list) and len(x) > 0 else x)
438+
base_metadata['merged_dois'] = base_metadata['merged_dois'].apply(lambda x: x.split(';') if isinstance(x, str) else [])
439+
base_metadata['merged_dois'] = base_metadata['merged_dois'].apply(lambda x: [x.strip() for x in x] if isinstance(x, list) else x)
440+
base_metadata = base_metadata.explode('merged_dois', ignore_index=True)
438441
# replace doi with merged_dois if merged_dois is not empty, otherwise keep doi
439-
if 'merged_dois' in base_metadata.columns:
440-
base_metadata.loc[base_metadata['merged_dois'].notna() & (base_metadata['merged_dois'] != ''), 'doi'] = base_metadata.loc[base_metadata['merged_dois'].notna() & (base_metadata['merged_dois'] != ''), 'merged_dois']
442+
base_metadata.loc[base_metadata['merged_dois'].notna() & (base_metadata['merged_dois'] != ''), 'doi'] = base_metadata.loc[base_metadata['merged_dois'].notna() & (base_metadata['merged_dois'] != ''), 'merged_dois']
441443
base_metadata.loc[:, 'doi'] = base_metadata['doi'].apply(remove_doi_prefix)
442444

443445
# Remove rows where 'doi' is pd.NaN

0 commit comments

Comments
 (0)