-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpaper.py
executable file
·227 lines (213 loc) · 9.88 KB
/
paper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
from sanic import Blueprint
from sanic.response import text
import sanic
from sanic import Sanic, response
from sanic_session import Session
from sanic.response import text, redirect, json, ResponseStream, file_stream, file, html
import motor
import motor.motor_asyncio
from bson.objectid import ObjectId
from sanic_cors import CORS
from auth import protected, add_user_info_cookie
from query import PaperSearch
from mycrypt import encrypt, decrypt, DATABASE_KEY
from os.path import isfile, join
import requests
from utils import get_collection
from config import *
paper = Blueprint('paper', url_prefix='/api/v1/paper')
@paper.get("/test")
async def paper_test(request):
client = motor.motor_asyncio.AsyncIOMotorClient(MONGODB_CONNECTION_STRING)
source = request.args.get("source", "arxiv")
# id = ObjectId("6532290bd507ea15ca185e83")
id = ObjectId("6569d43b2c9d068894c84b8f")
document = await get_collection(source).find_one({"_id": id})
return json({"status": "ok"}, headers={"Access-Control-Allow-Origin": "*"})
@paper.get("/info")
async def get_paper_info(request):
# """This is used to get the detailed information of a paper.
# openapi:
# ---
# parameters:
# - name: id
# in: query
# description: The id of the paper.
# required: true
# schema:
# type: str[6 char]
# responses:
# - '200':
# type: json
# """
id = request.args.get("id")
if id == None:
return text("must query with an id", status=416, headers={"Access-Control-Allow-Origin": "*"})
source = request.args.get("source", "100pdfs")
collection = get_collection(source)
if collection == None:
return text("The collection doesn't exists!", status=416, headers={"Access-Control-Allow-Origin": "*"})
document = await collection.find_one({"_id": ObjectId( id)})
if document == None:
return text("The paper doesn't exists!, the paper id you asked is {}".format(id), status=416, headers={"Access-Control-Allow-Origin": "*"})
# with open("/home/hxs/main/debug/01.txt", "wt") as t:
# t.write(request.args.get("id"))
if source == "arxiv":
author = document["author"] if "author" in document else document["authors"]
affiliation = None
author_all = None
else:
author = document["author"]["name"]
affiliation = document["author"]["affiliation"]
author_all = {key: value for key, value in zip(author, affiliation)}
ans = {
"id" : id,
"title" : document["title"],
"abstract": document["abstract"],
"doc_type": None,
"year": document["year"] if source == "100pdfs" else None,
"volume": document["volume"] if source == "100pdfs" else None,
"cite": None,
"kqi": None,
"tag": document["tag"] if source == "100pdfs" else document["categories"],
"doi" : document["doi"] if "doi" in document else None,
"author": author,
"affiliation": affiliation,
"author_all": author_all,
"url": "https://arxiv.org/abs/" + document["id"] if source == "arxiv" else None,
"keywords": document["keywords"] if source == "100pdfs" else None,
}
return json(ans, headers={"Access-Control-Allow-Origin": "*"})
@paper.get("/download")
@protected
async def download(request):
token = request.args.get("tk")
# if token != "u*DD@7eHbs3zE2A#":
# return text("no permission")
id = request.args.get("id")
if id == None:
return text("must query with an id", status=416, headers={"Access-Control-Allow-Origin": "*"})
source = request.args.get("source", "100pdfs")
collection = get_collection(source)
document = await collection.find_one({"_id": ObjectId(id)})
print(source)
if source == "100pdfs":
if document == None:
return text("The paper doesn't exists!, the paper id you asked is {}".format(id), status=416
, headers={"Access-Control-Allow-Origin": "*"})
path = document["pdf_address"]
# return text("not imply")
return await file_stream(path, filename=document["title"] + ".pdf",
headers={"Access-Control-Allow-Origin": "*"})
elif source == "arxiv":
if document == None:
return text("The paper doesn't exists!, the paper id you asked is {}".format(id), status=416
, headers={"Access-Control-Allow-Origin": "*"})
arxiv_pdf = "https://arxiv.org/pdf/" + document["id"] + ".pdf"
return redirect(arxiv_pdf, headers={"Access-Control-Allow-Origin": "*"})
else:
return text("not imply", status=416, headers={"Access-Control-Allow-Origin": "*"})
@paper.get("/tables/img")
async def tables_img(requests):
id = requests.args.get("id")
if id == None:
return text("must query with an id", status=416, headers={"Access-Control-Allow-Origin": "*"})
source = requests.args.get("source", "100pdfs")
collection = get_collection(source)
document = await collection.find_one({"_id": ObjectId(id)})
if document == None:
return text("The paper doesn't exists!, the paper id you asked is {}".format(id), status=416
, headers={"Access-Control-Allow-Origin": "*"})
table_num = len(document["table_rendition_address"])
res = {}
url = "/api/v1/paper/table/img"
for i in range(table_num):
res[i] = url + f"?id={id}&index={i}"
return json(res, headers={"Access-Control-Allow-Origin": "*"})
@paper.get("/tables")
async def tables(requests):
id = requests.args.get("id")
if id == None:
return text("must query with an id", status=416, headers={"Access-Control-Allow-Origin": "*"})
source = requests.args.get("source", "100pdfs")
collection = get_collection(source)
document = await collection.find_one({"_id": ObjectId(id)})
if document == None:
return text("The paper doesn't exists!, the paper id you asked is {}".format(id), status=416
, headers={"Access-Control-Allow-Origin": "*"})
table_num = len(document["csv_address"])
res = {}
url = "/api/v1/paper/table"
for i in range(table_num):
res[i] = url + f"?id={id}&index={i}"
return json(res, headers={"Access-Control-Allow-Origin": "*"})
@paper.get("/table/img")
async def table_img(requests):
id = requests.args.get("id")
index = requests.args.get("index")
index = int(index)
index = 0 if index == None else index
if id == None:
return text("must query with an id", status=416, headers={"Access-Control-Allow-Origin": "*"})
source = requests.args.get("source", "100pdfs")
collection = get_collection(source)
document = await collection.find_one({"_id": ObjectId(id)})
if document == None:
return text("The paper doesn't exists!, the paper id you asked is {}".format(id), status=416
, headers={"Access-Control-Allow-Origin": "*"})
if index >= len(document["table_rendition_address"]):
return text("The index is too large!", status=416, headers={"Access-Control-Allow-Origin": "*"})
return await file(document["table_rendition_address"][index], mime_type="image/png", headers={"Access-Control-Allow-Origin": "*"})
@paper.get("/table")
async def table(requests):
id = requests.args.get("id")
index = requests.args.get("index")
index = int(index)
index = 0 if index == None else index
if id == None:
return text("must query with an id", status=416, headers={"Access-Control-Allow-Origin": "*"})
source = requests.args.get("source", "100pdfs")
collection = get_collection(source)
document = await collection.find_one({"_id": ObjectId(id)})
if document == None:
return text("The paper doesn't exists!, the paper id you asked is {}".format(id), status=416
, headers={"Access-Control-Allow-Origin": "*"})
if index >= len(document["csv_address"]):
return text("The index is too large!", status=416, headers={"Access-Control-Allow-Origin": "*"})
path = "/home/share/files/100_PDF_csv/" + document["paper_id"] + "/" + str(index) + ".csv"
path = document["csv_address"][index]
return await file_stream(path, mime_type="text/csv", headers={"Access-Control-Allow-Origin": "*"})
@paper.get("/pics")
async def pics(requests):
id = requests.args.get("id")
if id == None:
return text("must query with an id", status=416, headers={"Access-Control-Allow-Origin": "*"})
source = requests.args.get("source", "100pdfs")
collection = get_collection(source)
document = await collection.find_one({"_id": ObjectId(id)})
if document == None:
return text("The paper doesn't exists!, the paper id you asked is {}".format(id), status=416
, headers={"Access-Control-Allow-Origin": "*"})
table_num = len(document["pics_address"])
res = {}
url = "/api/v1/paper/pic"
for i in range(table_num):
res[i] = url + f"?id={id}&index={i}"
return json(res, headers={"Access-Control-Allow-Origin": "*"})
@paper.get("/pic")
async def pic(requests):
id = requests.args.get("id")
index = requests.args.get("index")
index = int(index)
index = 0 if index == None else index
if id == None:
return text("must query with an id", status=416, headers={"Access-Control-Allow-Origin": "*"})
source = requests.args.get("source", "100pdfs")
collection = get_collection(source)
document = await collection.find_one({"_id": ObjectId(id)})
if document == None:
return text("The paper doesn't exists!, the paper id you asked is {}".format(id), status=416
, headers={"Access-Control-Allow-Origin": "*"})
if index >= len(document["pics_address"]):
return text("The index is too large!", status=416, headers={"Access-Control-Allow-Origin": "*"})
return await file(document["pics_address"][index], mime_type="image/png", headers={"Access-Control-Allow-Origin": "*"})