From 3f8abf564a2bb16f3af71fa8675111e048922faa Mon Sep 17 00:00:00 2001 From: Steven Y Lu Date: Tue, 22 Jun 2021 18:01:13 -0700 Subject: [PATCH] throw warnings when grobid failed extracting paper titles #15 --- src/parserindexer/ads_parser.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/parserindexer/ads_parser.py b/src/parserindexer/ads_parser.py index 4997248..d085947 100644 --- a/src/parserindexer/ads_parser.py +++ b/src/parserindexer/ads_parser.py @@ -73,12 +73,13 @@ def query_ads_database(self, title): ads_dict = dict() if len(data_docs) == 0: - warnings.warn('0 document found in the ADS database') + warnings.warn('[Warning] 0 document found in the ADS database') return ads_dict if len(data_docs) > 1: - warnings.warn('There are multiple documents returned from the ADS ' - 'database, and we are using the first document.') + warnings.warn('[Warning] There are multiple documents returned ' + 'from the ADS database, and we are using the first ' + 'document.') data_docs = data_docs[0] @@ -95,7 +96,11 @@ def parse(self, file_path): tika_dict = super(AdsParser, self).parse(file_path) # Get the title of the paper from grobid - title = tika_dict['metadata']['grobid:header_Title'] + if 'grobid:header_Title' in tika_dict['metadata'].keys(): + title = tika_dict['metadata']['grobid:header_Title'] + else: + warnings.warn('[Warning] grobid:header_Title field not found') + return tika_dict # Query the ADS database ads_dict = self.query_ads_database(title)