diff --git a/DuBibtex.py b/DuBibtex.py index adaea3d..83d307e 100644 --- a/DuBibtex.py +++ b/DuBibtex.py @@ -66,7 +66,7 @@ class Re: acm = re.compile('citation\.cfm\?id\=([\d\.]+)', flags=re.MULTILINE) acmBib = re.compile('
(.+)<\/pre>', flags=re.MULTILINE | re.IGNORECASE | re.S) - ieee = re.compile('ieee\.org\/document\/(\d+)', flags=re.MULTILINE) + ieee = re.compile('ieee\.org(?:\/abstract)?\/document\/(\d+)', flags=re.MULTILINE) year = re.compile('\w+(\d+)') @@ -191,17 +191,21 @@ def write_current_item(self): # Searches for DOI. self.debug_bib('Missing DOI, search "%s"...' % self.cur['title']) + title_without_brackets = re.sub(r'\{|\}', '', self.cur['title']) if 'journal' in self.cur and self.cur['journal'][:5].lower() == 'arxiv': content = request_url('https://www.google.com/search?q=%s' % - self.cur['title']) + title_without_brackets) m = Re.urlArxiv.search(content) if m and len(m.groups()) > 0: self.cur['url'] = "https://arxiv.org/pdf/%s" % m.groups()[0] - self.debug_bib('Missing DOI, search "%s"...' % self.cur['title']) + self.debug_bib('Missing DOI, search "%s"...' % title_without_brackets) else: - d = google_lookup(self.cur['title'], self) + d = google_lookup(title_without_brackets, self) if not d: - d = crossref_lookup(self.cur['title']) + # Try again with google scholar. + d = google_lookup(title_without_brackets, self, use_scholar=True) + if not d: + d = crossref_lookup(title_without_brackets) if d: self.fix_doi(d) else: @@ -338,8 +342,11 @@ def levenshtein(s1, s2): return previous_row[-1] -def google_lookup(s, parser): - html = request_url('https://www.google.com/search?q=%s' % s) +def google_lookup(s, parser, use_scholar=False): + if use_scholar: + html = request_url('https://scholar.google.com/scholar?q=%s' % s) + else: + html = request_url('https://www.google.com/search?q=%s' % s) with open('debug.txt', 'w', encoding='utf8') as f: f.write(html) diff --git a/tests/inputs/holocamera.bib b/tests/inputs/holocamera.bib new file mode 100644 index 0000000..96a8cf0 --- /dev/null +++ b/tests/inputs/holocamera.bib @@ -0,0 +1,10 @@ +@article{holocamera, + author={Heagerty, Jonathan and Li, Sida and Lee, Eric and Bhattacharyya, Shuvra and Bista, Sujal and Brawn, Barbara and Feng, Brandon and Jabbireddy, Susmija and JaJa, Joseph and Kacorri, Hernisa and Li, David and Yarnell, Derek and Zwicker, Matthias and Varshney, Amitabh}, + journal={IEEE Transactions on Visualization and Computer Graphics}, + title={{HoloCamera}: Advanced Volumetric Capture for Cinematic-Quality VR Applications}, + year={2024}, + volume={}, + number={}, + pages={}, + keywords={Volumetric Capture, Light Fields, Holoportation, Multi-camera Array} + } \ No newline at end of file diff --git a/tests/test_find_dois.py b/tests/test_find_dois.py index c0d4d0c..bee775b 100644 --- a/tests/test_find_dois.py +++ b/tests/test_find_dois.py @@ -31,3 +31,27 @@ def test_iccv_doi(filename, correct_doi): for entry in generated_library.entries: assert "doi" in entry assert entry.fields_dict["doi"].value == correct_doi + +@pytest.mark.parametrize("filename,correct_doi", [ + ("holocamera.bib", "10.1109/TVCG.2024.3372123"), +]) +def test_tvcg_doi(filename, correct_doi): + input_file = os.path.join("tests", "inputs", filename) + + assert os.path.isfile(input_file), f"File {input_file} does not exist" + library = bibtexparser.parse_file(input_file) + + with tempfile.NamedTemporaryFile() as fp: + p = Parser(output_file=fp.name) + for entry in library.entries: + p.copy_from_parsed_entry(entry) + p.write_current_item() + p.shut_down() + + # Check the doi + generated_library = bibtexparser.parse_file(fp.name) + assert len(generated_library.entries) == len( + library.entries), "Number of entries should be the same" + for entry in generated_library.entries: + assert "doi" in entry + assert entry.fields_dict["doi"].value == correct_doi \ No newline at end of file