Skip to content

Commit

Permalink
throw warnings when grobid failed extracting paper titles #15
Browse files Browse the repository at this point in the history
  • Loading branch information
stevenlujpl committed Jun 23, 2021
1 parent c13f57a commit 3f8abf5
Showing 1 changed file with 9 additions and 4 deletions.
13 changes: 9 additions & 4 deletions src/parserindexer/ads_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,12 +73,13 @@ def query_ads_database(self, title):
ads_dict = dict()

if len(data_docs) == 0:
warnings.warn('0 document found in the ADS database')
warnings.warn('[Warning] 0 document found in the ADS database')
return ads_dict

if len(data_docs) > 1:
warnings.warn('There are multiple documents returned from the ADS '
'database, and we are using the first document.')
warnings.warn('[Warning] There are multiple documents returned '
'from the ADS database, and we are using the first '
'document.')

data_docs = data_docs[0]

Expand All @@ -95,7 +96,11 @@ def parse(self, file_path):
tika_dict = super(AdsParser, self).parse(file_path)

# Get the title of the paper from grobid
title = tika_dict['metadata']['grobid:header_Title']
if 'grobid:header_Title' in tika_dict['metadata'].keys():
title = tika_dict['metadata']['grobid:header_Title']
else:
warnings.warn('[Warning] grobid:header_Title field not found')
return tika_dict

# Query the ADS database
ads_dict = self.query_ads_database(title)
Expand Down

0 comments on commit 3f8abf5

Please sign in to comment.