This repository has been archived by the owner on Dec 31, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlw.py
executable file
·280 lines (234 loc) · 11 KB
/
lw.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
## -*- coding:gbk -*-
# Mar 23, 2018
Version = '1.4'
# Author: ZhuangTao, [email protected]
# Platform: Windows - ZH_CN only, must have access to 'learn.tsinghua.edu.cn'
# Dependencies: requests_2.18.4+ - https://pypi.python.org/pypi/requests#downloads, requests-x.xx.x.tar.gz,
# and run "setup.py install"
# A simple script to help sort LearnWeb files in my PC, developed with free time
import re, json, os, sys, getpass, datetime, sysass #, requests
## -*- Settings -*-
# Default destination path in Windows will be 'C:/Users/Username/Desktop'
# *** DO NOT change this item. ***
DesktopPath = sysass.get_desktop()
# You can use desktopPath or whatever you like, default "DesktopPath+'/LearnWeb'".
home_path = DesktopPath + '/LearnWeb'
# To disable HTTPS link, change 'https' in host to 'http', default enabled.
host = 'https://learn.tsinghua.edu.cn'
# Change the max filesize(MB) limit to deny the download when syn, default 20.
maxsize = 20
# Move files in '电子教案' to root folder in each course, default enabled.
vroot = True
# Detailed output, default disabled.
detailed_output = False
## -*- Program Starts -*-
print "LearnWeb Sync "+ Version +"\n# Sep 13, 2017\n# Author: ZhuangTao, [email protected]"
print "# Platform: Windows - ZH_CN only, must have access to 'learn.tsinghua.edu.cn'"
print '# Dependencies: requests_2.18.4+ - https://pypi.python.org/pypi/requests#downloads'
print '#download "requests-x.xx.x.tar.gz",and run "setup.py install", you can also use "Auto download".\n'
try:
import requests
except ImportError:
print '* 当前运行环境缺少requests模块,是否自动下载(y=是, 其他键=否)? (大约消耗几MB网络流量):',
if raw_input() == 'y':
t = sys.path[0]
os.chdir(t+"/lib/requests-2.18.4/")
os.system(r"python setup.py install")
print '依赖模块安装完成,请尝试重新运行脚本。如果多次安装失败,请联系我们。'
os.system("cls")
else:
print '可以使用 "pip install requests"命令下载\n或访问 "https://pypi.python.org/pypi/requests#downloads",下载tar.gz文件解压后,运行命令"setup.py install"即可安装。\n'
os._exit(-1)
def get_course_list():
global g_cookie, host, detailed_output
begin = datetime.datetime.now()
# 获取课程列表网页
get_url = host + r'/MultiLanguage/lesson/student/MyCourse.jsp?language=cn'
headers = {'Cookie': g_cookie}
resp = requests.get(get_url, headers=headers, verify=True, allow_redirects=False)
# 解析列表
patt = [r'<!--td height="25" class="tableNO">(.*?)</td-->',
r'<a href="(.*?)" target="_blank">',
r'(.*)</a><span style="color:red">.*</span></td>',
r'\(.*\)</a><span style="color:red">(.*)</span></td>',
r'<td width="15%" ><span class="red_text">(.*?)</span>个未交作业</td>'.decode('gbk'),
r'<td width="15%" ><span class="red_text">(.*?)</span>个未读公告</td>'.decode('gbk'),
r'<td width="15%" ><span class="red_text">(.*?)</span>个新文件</td>'.decode('gbk')]
res = []
courses = []
# print resp.text #debug
for columnid in range(patt.__len__()):
res.append(re.findall(patt[columnid], resp.text))
for courseid in range(res[0].__len__()):
tmp_course = (res[0][courseid], res[1][courseid], re.findall("(.*)\(\d*\)\(.*\)",res[2][courseid].strip())[0],
res[3][courseid], res[4][courseid], res[5][courseid], res[6][courseid])
courses.append(tmp_course)
if detailed_output:
for t in courses:
print t[2]
end = datetime.datetime.now()
# print (end-begin)
if (end - begin) > datetime.timedelta(seconds=1.0):
print '\n 【当前网络环境较差】'
bad_network = True
return courses
def login_web(account_dict):
global g_cookie, username, password, host
url = host + '/MultiLanguage/lesson/teacher/loginteacher.jsp'
data = account_dict
# 提交用户名与密码进行登陆, 初始化g_cookie
resp = requests.post(url, data=data, allow_redirects=False)
# print resp.headers['set-cookie']
g_cookie = resp.headers['set-cookie'].replace(r' path=/,', '').replace(r' path=/', '')
def get_file_info(url_raw, t_cookies):
r = requests.head(url_raw, headers={'Cookie': t_cookies})
# print r.headers
# print '.',
if 'Content-Disposition' in r.headers:
#print r.headers
#print '.',
t = re.findall(r'attachment;filename="(.*)"', r.headers['Content-Disposition'])[0]
s = r.headers['Content-Length']
return (t.decode('gbk'), s) # change:.encode('utf8')
else:
return 'ERR_FILE_ERROR'
def get_course_detail(course):
global host
course_url = host + course[1]
resp = requests.get(course_url, headers={'Cookie': g_cookie}, verify=True, allow_redirects=False)
detail_page = resp.text
course_cookie = resp.headers['set-cookie'].replace(r' path=/,', '').replace(r' path=/', '')
announce_url = re.findall(r'<a href="(.*)" target="content_frame" >课程公告</a>'.decode('gbk'), detail_page)[0]
file_url = re.findall(r'<a href="(.*?)" target="content_frame" >课程文件</a>'.decode('gbk'), detail_page)[0]
assignment_url = re.findall(r'<a href="(.*)" target="content_frame" >课程作业</a>'.decode('gbk'), detail_page)[0]
# print 'resp.text:',resp.text
# print 'file_url:',file_url
resp = requests.get(host + file_url, headers={'Cookie': course_cookie}, verify=True, allow_redirects=False)
filelist_cookie = resp.headers['set-cookie'].replace(r' path=/,', '').replace(r' path=/', '')
filelist_page = resp.text
tables = re.findall(r'<table>([.\s]*)</table>', filelist_page)
# print tables.__len__()
folder_names = re.findall(r';NN_showImage\(\d,\d\)">(.*?)</td>',
filelist_page) # print 'filelist_page',filelist_page
# print 'folder_names:',folder_names
patt = [r'<td width="80">(\d*?)</td>' # 序号
, r'left=100,top=100"\);\'>(.*?)</a></td>-->' # 下载链接
, r'<a target="_top" href="(.*)" >' # 标题
# ,r'<td width="300" align="center">(.*)</td>' #简介
, r'<td width="80" align="center">(.*)</td>' # 大小
, r'<td width="100" align="center">(.*)</td>' # 上传日期
# ,r"<td width='100' align='center'>(.*)\s*</td>", #新文件
]
folder = []
tables = re.findall('<table \s*?id="table_box" cellspacing="1" cellpadding="0">([\s\S]*?)</table>', filelist_page)
for folder_i in range(tables.__len__()):
res = []
for i in range(patt.__len__()):
res.append(re.findall(patt[i], tables[folder_i]))
tListInOneFolder = []
for j in range(res[folder_i].__len__()):
tTouple = ()
for k in range(patt.__len__()):
tTouple += (res[k][j],)
file_info = get_file_info(host + tTouple[2], course_cookie)
if file_info == 'ERR_FILE_ERROR':
print tTouple[1], " :状态异常跳过"
continue
tTouple += file_info
tListInOneFolder.append(tTouple)
folder.append(tListInOneFolder)
# print 'Retrived', tListInOneFolder.__len__(), 'file in folder', folder_i
# print
# print folder.__len__(), 'folders got:',folder
return (folder, folder_names, course_cookie)
def syn_file_in_course(course, folder_names, folders, t_cookie):
global home_path, host, new_files, maxsize, vroot, detailed_output
passed = 0
new = 0
for i in range(folders.__len__()): # got 3 folder
#print folder_names[i] # debug
if folder_names[i].encode('gbk') == '电子教案' and vroot:
folder_path = home_path + '/' + course[2]
else:
folder_path = home_path + '/' + course[2] + '/' + folder_names[i]
# print 'folder_path',folder_path
sysass.mkdir(folder_path)
for file in folders[i]: # got 14 files in folder[0]
file_path = folder_path + '/' + file[5]
if os.path.exists(file_path) and os.path.getsize(file_path) == int(file[6]):
passed += 1
if detailed_output:
print file[5], "文件完整,跳过"
else:
if int(file[6]) > (maxsize * 1024 * 1024):
print '跳过同步:',file[5], '超过' + str(maxsize) + 'M:', int(file[6]) / 1024.00 / 1024.00, 'MB'
else:
new += 1
new_list.append('[' + course[2] + ']: ' + file[5])
print "正在同步:", file[5], '文件大小:', int(file[6]) / 1024.00 / 1024.00, 'MB'
r = requests.get(host + file[2], headers={'Cookie': t_cookie})
with open(file_path, "wb") as code:
code.write(r.content)
return new, passed
def _init_cfg(t):
# 获取当前路径
curr_dir = os.path.dirname(os.path.realpath(__file__))
config_file = curr_dir + os.sep + "acc.cfg"
if (os.path.exists(config_file)) and t == 1:
with open(config_file, 'r') as json_file:
account = json.load(json_file)
else:
username = raw_input('校园网账号: ')
print '校园网密码(请勿在公共电脑使用,密码保存在本地,请确认电脑私人使用,输入完毕回车): ',
password = getpass.getpass('')
account = {"userid": username, "userpass": password, "submit1": "%E7%99%BB%E5%BD%95"} # 数据
with open(config_file, 'w') as json_file:
json.dump(account, json_file, ensure_ascii=False)
# print account
return account
if __name__ == '__main__':
global g_cookie, new_list, bad_network
g_cookie = ''
new_list = []
bad_network = False
try:
# *-- 登录学堂 --*
account = _init_cfg(1)
res = login_web(account)
# 记录运行开始时刻
begin = datetime.datetime.now()
# *-- 获取课程列表 --*
courses = get_course_list()
# *-- 账户登录失败允许重新输入凭据 --*
while courses.__len__() == 0:
print '账户验证失败'
begin = datetime.datetime.now() # 更新运行时刻
res = login_web(_init_cfg(0))
courses = get_course_list()
os.system('cls')
print 'LearnWeb Sync '+Version+'\n'
print '欢迎使用,',str(account['userid'])+", 您有", courses.__len__(), "门课程。"
# *-- 同步课程文件 --*
new = 0
for course in courses:
print "\n>>", course[2],course[3]
if course[3].__len__():
print '新版学堂暂不支持'
continue
(folders, folder_names, course_cookie) = get_course_detail(course)
(new_files, passed) = syn_file_in_course(course, folder_names, folders, course_cookie)
new += new_files
print '获取到', new_files, '个新文件, 跳过', passed, '个完整文件'
except KeyboardInterrupt:
print "\n>> *User aborted operation\n"
else:
end = datetime.datetime.now()
print '\n-*- ', (courses.__len__()), '门课程同步完毕, 下载了', new, '个新文件, 用时', end - begin, ' -*-'
if bad_network:
print '网络环境较差'
if new_list.__len__():
for str in new_list:
print str
print
# *-- 读取课程作业 --*
# *-- 读取课程公告 --*