Skip to content

Commit

Permalink
Added TalkDirector.
Browse files Browse the repository at this point in the history
  • Loading branch information
ruofeidu committed Feb 16, 2025
1 parent c25cffe commit 0a7ff61
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 30 deletions.
68 changes: 41 additions & 27 deletions DuBibtex.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ class Re:
doiAcmUrl = re.compile(
r'https:\/\/dl\.acm\.org\/doi\/(?:\w+\/)?([\w\d\.\-\\\/]+)',
flags=re.MULTILINE)
doiJavascript = re.compile(r'doi\"\:\"([\w\d\.\-\\\/]+)\"', flags=re.MULTILINE)
doiJavascript = re.compile(r'doi\"\:\"([\w\d\.\-\\\/]+)\"',
flags=re.MULTILINE)
doiText = re.compile(r'"DOI":"([\w\.\\\/]*)"', flags=re.MULTILINE)
doiSpringer = re.compile(r'chapter\/([\w\.\\\/\_\-]+)', flags=re.MULTILINE)
doiWiley = re.compile(r'doi\/abs\/([\w\.\\\/\_\-]+)', flags=re.MULTILINE)
Expand All @@ -66,7 +67,8 @@ class Re:
acm = re.compile(r'citation\.cfm\?id\=([\d\.]+)', flags=re.MULTILINE)
acmBib = re.compile(r'<PRE id="[\d\.]+">(.+)<\/pre>',
flags=re.MULTILINE | re.IGNORECASE | re.S)
ieee = re.compile(r'ieee\.org(?:\/abstract)?\/document\/(\d+)', flags=re.MULTILINE)
ieee = re.compile(r'ieee\.org(?:\/abstract)?\/document\/(\d+)',
flags=re.MULTILINE)
neurips = re.compile(r'proceedings.neurips.cc', flags=re.MULTILINE)
year = re.compile(r'\w+(\d+)')

Expand All @@ -87,7 +89,8 @@ def __init__(self, output_file=None, use_offline_doi=None):
config.read("config.ini")
Paras.header['User-Agent'] = config.get(Paras.section, "header").strip()
Paras.searchDOI = config.getboolean(Paras.section, "searchDOI")
Paras.useOfflineDOI = use_offline_doi if use_offline_doi is not None else config.getboolean(Paras.section, "useOfflineDOI")
Paras.useOfflineDOI = use_offline_doi if use_offline_doi is not None else config.getboolean(
Paras.section, "useOfflineDOI")
Paras.printSelfInfo = config.getboolean(Paras.section, "printSelfInfo")
Paras.keepComments = config.getboolean(Paras.section, "keepComments")
Paras.debugBibCrawler = config.getboolean(Paras.section, "debugBibCrawler")
Expand All @@ -96,7 +99,8 @@ def __init__(self, output_file=None, use_offline_doi=None):
Paras.inputFileList = config.get(Paras.section,
"inputFileList").strip().split(",")
Paras.doiJsonFile = config.get(Paras.section, "doiJsonFile").strip()
Paras.outputFile = output_file if output_file else config.get(Paras.section, "outputFile").strip()
Paras.outputFile = output_file if output_file else config.get(
Paras.section, "outputFile").strip()
Paras.fieldRemovalList = config.get(Paras.section,
"fieldRemovalList").strip().split(",")
Paras.minYear = config.getint(Paras.section, "minYear")
Expand Down Expand Up @@ -192,10 +196,11 @@ def write_current_item(self):
# Searches for DOI.
self.debug_bib('Missing DOI, search "%s"...' % self.cur['title'])


title_without_brackets = re.sub(r'\{|\}', '', self.cur['title'])
if ('journal' in self.cur and any([x in self.cur['journal'].lower() for x in ["ieee"]]) or
'booktitle' in self.cur and any([x in self.cur['booktitle'].lower() for x in ["ieee", "iccv"]])):
if ('journal' in self.cur and
any([x in self.cur['journal'].lower() for x in ["ieee"]]) or
'booktitle' in self.cur and
any([x in self.cur['booktitle'].lower() for x in ["ieee", "iccv"]])):
d = ieee_xplore_lookup(title_without_brackets, self)
if d:
self.fix_doi(d)
Expand Down Expand Up @@ -294,7 +299,6 @@ def parse_line(self, line):
if m and len(m.groups()) > 0:
self.cur[m.groups()[0].lower()] = m.groups()[1]


def copy_from_parsed_entry(self, entry):
self.add_new_bib(entry.key, entry.entry_type)
for field in entry.fields:
Expand Down Expand Up @@ -348,24 +352,29 @@ def levenshtein(s1, s2):

return previous_row[-1]


def ieee_xplore_lookup(s, parser):
# Search IEEE Xplore
xplore_search_url='https://ieeexplore.ieee.org/rest/search'
payload={
"newsearch": "true",
"queryText": s,
"highlight": "true",
"returnFacets": [
"ALL"
],
"returnType": "SEARCH",
"matchPubs": "true"
xplore_search_url = 'https://ieeexplore.ieee.org/rest/search'
payload = {
"newsearch": "true",
"queryText": s,
"highlight": "true",
"returnFacets": ["ALL"],
"returnType": "SEARCH",
"matchPubs": "true"
}
response = requests.post(xplore_search_url, json=payload, headers={
"User-Agent": Paras.header["User-Agent"],
"Origin": "https://ieeexplore.ieee.org",
"Referer": "https://ieeexplore.ieee.org/search/searchresult.jsp?newsearch=true&queryText="
})
response = requests.post(
xplore_search_url,
json=payload,
headers={
"User-Agent":
Paras.header["User-Agent"],
"Origin":
"https://ieeexplore.ieee.org",
"Referer":
"https://ieeexplore.ieee.org/search/searchresult.jsp?newsearch=true&queryText="
})
try:
result = response.json()
if result["records"]:
Expand All @@ -374,6 +383,7 @@ def ieee_xplore_lookup(s, parser):
pass
return None


def google_lookup_ieee_only(s, parser):
# Search Google with IEEE keyword
html = request_url('https://www.google.com/search?q=ieee+%s' % s)
Expand All @@ -388,12 +398,16 @@ def google_lookup_ieee_only(s, parser):
return res
return None


def google_lookup(s, parser):
html = request_url('https://www.google.com/search?q=%s' % s)
with open('debug.txt', 'w', encoding='utf8') as f:
f.write(html)

url_regexes = ['doiAcmUrl', 'acm', 'doiSpringer', 'doiWiley', 'doiUrl', 'ieee', 'doiCaltech', 'doiPubmed', 'neurips']
url_regexes = [
'doiAcmUrl', 'acm', 'doiSpringer', 'doiWiley', 'doiUrl', 'ieee',
'doiCaltech', 'doiPubmed', 'neurips'
]

found_urls = []
for url_regex in url_regexes:
Expand Down Expand Up @@ -488,15 +502,15 @@ def google_lookup(s, parser):

if url_regex == 'doiCaltech' and m and len(m.groups()) > 0:
html_cal = request_url('https://authors.library.caltech.edu/%s' %
m.groups()[0])
m.groups()[0])
m = Re.doiUrl.search(html_cal, re.M)
if m and len(m.groups()) > 0:
res = m.groups()[0]
res = res.replace('\\', '')
print("DOI from Google and Caltech: %s\n" % res)
return res

if url_regex == 'doiPubmed' and m and len(m.groups()) > 0:
if url_regex == 'doiPubmed' and m and len(m.groups()) > 0:
html_pubmed = request_url('https://www.ncbi.nlm.nih.gov/pubmed/%s' %
m.groups()[0])
m = Re.doiUrl.search(html_pubmed, re.M)
Expand All @@ -505,7 +519,7 @@ def google_lookup(s, parser):
res = res.replace('\\', '')
print("DOI from Google and PubMed: %s\n" % res)
return res

# Nowadays, CVPR papers are hard to fetch DOI without ieee keyword.
html = request_url('https://www.google.com/search?q=ieee+%s' % s)
m = Re.ieee.search(html)
Expand Down
4 changes: 2 additions & 2 deletions config.ini
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ searchDOI = True
keepComments = False
useOfflineDOI = True
printSelfInfo = True
inputFileList = thing3d.in.bib
outputFile = thing3d.out.bib
inputFileList = talkdirector.in.bib
outputFile = talkdirector.out.bib
doiJsonFile = doi_dict.json
debugBibCrawler = True
debugStatistics = True
Expand Down
2 changes: 1 addition & 1 deletion doi_dict.json

Large diffs are not rendered by default.

0 comments on commit 0a7ff61

Please sign in to comment.