Skip to content

Commit

Permalink
Do not find DOI for neurips papers. Add test and fixes #6
Browse files Browse the repository at this point in the history
  • Loading branch information
dli7319 committed Apr 11, 2024
1 parent adff2a3 commit e2724c6
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 4 deletions.
9 changes: 7 additions & 2 deletions DuBibtex.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ class Re:
acmBib = re.compile('<PRE id="[\d\.]+">(.+)<\/pre>',
flags=re.MULTILINE | re.IGNORECASE | re.S)
ieee = re.compile('ieee\.org(?:\/abstract)?\/document\/(\d+)', flags=re.MULTILINE)
neurips = re.compile(r'proceedings.neurips.cc\/paper', flags=re.MULTILINE)
year = re.compile('\w+(\d+)')


Expand Down Expand Up @@ -351,18 +352,22 @@ def google_lookup(s, parser, use_scholar=False):
f.write(html)


url_regexes = ['doiAcmUrl', 'acm', 'doiSpringer', 'doiWiley', 'doiUrl', 'ieee', 'doiCaltech', 'doiPubmed']
url_regexes = ['doiAcmUrl', 'acm', 'doiSpringer', 'doiWiley', 'doiUrl', 'ieee', 'doiCaltech', 'doiPubmed', 'neurips']

found_urls = []
for url_regex in url_regexes:
m = getattr(Re, url_regex).search(html)
if m and len(m.groups()) > 0:
if m:
found_urls.append((url_regex, m, m.start()))
# Sort by start position
found_urls.sort(key=lambda x: x[2])

for url_regex, m, _ in found_urls:

if url_regex == 'neurips':
# NeurIPS does not have a DOI.
return None

if url_regex == 'doiAcmUrl' and m and len(m.groups()) > 0:
res = m.groups()[0].replace('\\', '')
print("DOI from Google and ACM DOI: %s\n" % res)
Expand Down
11 changes: 11 additions & 0 deletions tests/inputs/ddpm.bib
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
@inproceedings{NEURIPS2020_4c5bcfec,
author = {Ho, Jonathan and Jain, Ajay and Abbeel, Pieter},
booktitle = {Advances in Neural Information Processing Systems},
editor = {H. Larochelle and M. Ranzato and R. Hadsell and M.F. Balcan and H. Lin},
pages = {6840--6851},
publisher = {Curran Associates, Inc.},
title = {Denoising Diffusion Probabilistic Models},
url = {https://proceedings.neurips.cc/paper_files/paper/2020/file/4c5bcfec8584af0d967f1ab10179ca4b-Paper.pdf},
volume = {33},
year = {2020}
}
14 changes: 12 additions & 2 deletions tests/test_find_dois.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,11 @@ def search_and_check_doi(filename, correct_doi):
assert len(generated_library.entries) == len(
library.entries), "Number of entries should be the same"
for entry in generated_library.entries:
assert "doi" in entry
assert entry.fields_dict["doi"].value == correct_doi
if correct_doi:
assert "doi" in entry
assert entry.fields_dict["doi"].value == correct_doi
else:
assert "doi" not in entry


@pytest.mark.parametrize("filename,correct_doi", [
Expand All @@ -42,3 +45,10 @@ def test_iccv_doi(filename, correct_doi):
])
def test_tvcg_doi(filename, correct_doi):
search_and_check_doi(filename, correct_doi)

@pytest.mark.parametrize("filename", [
("ddpm.bib"),
])
def test_neurips_doi(filename):
# NeuroIPS does not have a DOI
search_and_check_doi(filename, "")

0 comments on commit e2724c6

Please sign in to comment.