使用Python获取番剧信息(二)
A python script for hexo-bilibili-bangumi
github/hexo-bilibili-bangumi-addon
需要电脑有python环境
所有信息均来自bangumi.tv。仅用于补全插件所获得的信息,如有侵权,请联系删除。
使用方法
- 先安装插件,具体方法详见插件主页;
- 获取番剧数据,得到
\source\_data\bangumis.json
; - pip安装
httpx
;
1 | pip install httpx |
- 根据已经获得的数据爬取每个番剧的信息,将python代码保存成文件放在项目根目录后运行。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217import httpx
import json
import os
def local_file(id, bangumis):
if os.path.exists('./source/_data/bangumis-save.json'):
with open("./source/_data/bangumis-save.json", "r", encoding="utf-8") as js_file:
js_txt = js_file.read()
py_data = json.loads(js_txt)
wantWatch = py_data.get("wantWatch")
watching = py_data.get("watching")
watched = py_data.get("watched")
for i in wantWatch:
if i["id"] == id:
try:
bangumis.score = i['score']
except:
bangumis.score = "-"
try:
bangumis.des = i['des']
except:
bangumis.des = "-"
try:
bangumis.wish = i['wish']
except:
bangumis.wish = "-"
try:
bangumis.doing = i['doing']
except:
bangumis.doing = "-"
try:
bangumis.collect = i['collect']
except:
bangumis.collect = "-"
try:
bangumis.totalCount = i['totalCount'][1:-1]
except:
bangumis.totalCount = "全12话"
return bangumis
else:
continue
for i in watching:
if i["id"] == id:
try:
bangumis.score = i['score']
except:
bangumis.score = "-"
try:
bangumis.des = i['des']
except:
bangumis.des = "-"
try:
bangumis.wish = i['wish']
except:
bangumis.wish = "-"
try:
bangumis.doing = i['doing']
except:
bangumis.doing = "-"
try:
bangumis.collect = i['collect']
except:
bangumis.collect = "-"
try:
bangumis.totalCount = i['totalCount'][1:-1]
except:
bangumis.totalCount = "全12话"
return bangumis
else:
continue
for i in watched:
if i["id"] == id:
try:
bangumis.score = i['score']
except:
bangumis.score = "-"
try:
bangumis.des = i['des']
except:
bangumis.des = "-"
try:
bangumis.wish = i['wish']
except:
bangumis.wish = "-"
try:
bangumis.doing = i['doing']
except:
bangumis.doing = "-"
try:
bangumis.collect = i['collect']
except:
bangumis.collect = "-"
try:
bangumis.totalCount = i['totalCount'][1:-1]
except:
bangumis.totalCount = "全12话"
return bangumis
else:
continue
else:
with open('./source/_data/bangumis-save.json', 'x', encoding="utf-8") as new:
new.write('{"wantWatch":[],"watching":[],"watched":[]}')
return 0
def get_data(id):
class bangumis:
score = "-"
des = "-"
wish = "-"
doing = "-"
collect = "-"
totalCount = "-"
local_data = local_file(id, bangumis)
if local_data != 0:
return local_data
url = "https://api.bgm.tv/v0/subjects/" + id
headers = {
'user-agent': 'Trrrrw/hexo-bilibili-bangumi-addon(https://github.com/Trrrrw/hexo-bilibili-bangumi-addon)',
'accept': 'application / json'
}
re = httpx.get(url=url, headers=headers)
dirt_data = json.loads(re.text)
try:
bangumis.score = dirt_data['rating']['score']
except:
bangumis.score = "-"
try:
bangumis.des = dirt_data['summary']
except:
bangumis.des = "-"
try:
bangumis.wish = dirt_data['collection']['wish']
except:
bangumis.wish = "-"
try:
bangumis.doing = dirt_data['collection']['doing']
except:
bangumis.doing = "-"
try:
bangumis.collect = dirt_data['collection']['collect']
except:
bangumis.collect = "-"
if dirt_data['infobox'][2]['key'] == "话数":
bangumis.totalCount = dirt_data['infobox'][2]['value']
else:
bangumis.totalCount = "12"
return bangumis
def read():
print("\n读取json信息")
with open("./source/_data/bangumis.json", "r", encoding="utf-8") as js_file:
js_txt = js_file.read()
py_data = json.loads(js_txt)
wantWatch = py_data.get("wantWatch")
watching = py_data.get("watching")
watched = py_data.get("watched")
for k in wantWatch:
k["type"] = "番剧"
print("\n正在获取 " + k["title"] + " 的信息")
bangumis = get_data(k["id"])
k["score"] = bangumis.score
k["des"] = bangumis.des
k["wish"] = bangumis.wish
k["doing"] = bangumis.doing
k["collect"] = bangumis.collect
k["totalCount"] = "全" + str(bangumis.totalCount) + "话"
for i in watching:
i["type"] = "番剧"
print("\n正在获取 " + i["title"] + " 的信息")
bangumis = get_data(i["id"])
i["score"] = bangumis.score
i["des"] = bangumis.des
i["wish"] = bangumis.wish
i["doing"] = bangumis.doing
i["collect"] = bangumis.collect
i["totalCount"] = "全" + str(bangumis.totalCount) + "话"
for j in watched:
j["type"] = "番剧"
print("\n正在获取 " + j["title"] + " 的信息")
bangumis = get_data(j["id"])
j["score"] = bangumis.score
j["des"] = bangumis.des
j["wish"] = bangumis.wish
j["doing"] = bangumis.doing
j["collect"] = bangumis.collect
j["totalCount"] = "全" + str(bangumis.totalCount) + "话"
return py_data
def write(py_data):
print("\n写入新的json")
with open("./source/_data/bangumis.json", "w", encoding="utf-8") as js_output:
js_output_txt = json.dumps(py_data, ensure_ascii=False)
js_output.write(js_output_txt)
with open("./source/_data/bangumis-save.json", "w", encoding="utf-8") as js_output:
js_output_txt = json.dumps(py_data, ensure_ascii=False)
js_output.write(js_output_txt)
if __name__ == "__main__":
os.system("hexo bangumi -u")
write(read())
print("\n完成")1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135import threading
import json
import httpx
import os
import time
def file_operate(option):
"""
bangumis.json文件读写
"""
if option == 'r':
try:
with open('./source/_data/bangumis.json', 'r', encoding='utf-8') as file_previous:
try:
return json.loads(file_previous.read())
except json.decoder.JSONDecodeError:
return {"wantWatch": [], "watching": [], "watched": []}
except FileNotFoundError:
return {"wantWatch": [], "watching": [], "watched": []}
elif option == 'w':
with open('./source/_data/bangumis.json', 'w', encoding='utf-8') as file_current:
file_current.write(json.dumps(data_current, ensure_ascii=False))
def match_previous(bgm_previous):
"""
通过比较id来查询过往文件中是否已有该番剧,如果已有,则再判断番剧信息是否正确。
"""
if bgm_current['id'] == bgm_previous['id']:
if bgm_previous['type'] == '番剧':
global flag
flag = flag + 1
bgm_current['score'] = bgm_previous['score']
bgm_current['des'] = bgm_previous['des']
bgm_current['wish'] = bgm_previous['wish']
bgm_current['doing'] = bgm_previous['doing']
bgm_current['collect'] = bgm_previous['collect']
bgm_current['totalCount'] = bgm_previous['totalCount']
def match():
"""
多线程任务,每次对一个番剧从之前的文件中查找信息match_previous(),如果未找到,或信息无效,则通过API获取信息
"""
thread_list_match = []
global flag, matched
flag = 0
for want_watch_previous in data_previous['wantWatch']:
t_match = threading.Thread(target=match_previous(want_watch_previous))
t_match.start()
thread_list_match.append(t_match)
[t_match.join() for t_match in thread_list_match]
for watching_previous in data_previous['watching']:
t_match = threading.Thread(target=match_previous(watching_previous))
t_match.start()
thread_list_match.append(t_match)
[t_match.join() for t_match in thread_list_match]
for watched_previous in data_previous['watched']:
t_match = threading.Thread(target=match_previous(watched_previous))
t_match.start()
thread_list_match.append(t_match)
[t_match.join() for t_match in thread_list_match]
if not flag:
print('\r\033[K本地无信息,将通过API获取:{}'.format(bgm_current['title']), end='')
url = "https://api.bgm.tv/v0/subjects/" + bgm_current['id']
headers = {
'user-agent': 'Trrrrw/hexo-bilibili-bangumi-addon(https://github.com/Trrrrw/hexo-bilibili-bangumi-addon)',
'accept': 'application / json'
}
try:
r = httpx.get(url=url, headers=headers)
except httpx.ConnectTimeout:
print('httpx.ConnectTimeout: The handshake operation timed out')
time.sleep(1)
r = httpx.get(url=url, headers=headers)
dirt_data = json.loads(r.text)
try:
bgm_current['score'] = dirt_data['rating']['score']
except KeyError:
bgm_current['score'] = "-"
try:
bgm_current['des'] = dirt_data['summary']
except KeyError:
bgm_current['des'] = "-"
try:
bgm_current['wish'] = dirt_data['collection']['wish']
except KeyError:
bgm_current['wish'] = "-"
try:
bgm_current['doing'] = dirt_data['collection']['doing']
except KeyError:
bgm_current['doing'] = "-"
try:
bgm_current['collect'] = dirt_data['collection']['collect']
except KeyError:
bgm_current['collect'] = "-"
for info in dirt_data['infobox']:
if info['key'] == '话数':
bgm_current['totalCount'] = '全' + info['value'] + '话'
break
else:
bgm_current['totalCount'] = '全*话'
bgm_current['type'] = '番剧'
matched = matched + 1
if __name__ == "__main__":
# 读取bangumis.json为字典data_previous
data_previous = file_operate('r')
# 获取新的bangumis.json
os.system("hexo bangumi -u")
# 读取bangumis.json为字典data_current
data_current = file_operate('r')
flag = 0
matched = 0
# 多线程
thread_list = []
for bgm_current in data_current['wantWatch']:
t = threading.Thread(target=match)
t.start()
thread_list.append(t)
[t.join() for t in thread_list]
for bgm_current in data_current['watching']:
t = threading.Thread(target=match)
t.start()
thread_list.append(t)
[t.join() for t in thread_list]
for bgm_current in data_current['watched']:
t = threading.Thread(target=match)
t.start()
thread_list.append(t)
[t.join() for t in thread_list]
print('\r\033[K成功获取{}个番剧信息'.format(matched))
file_operate('w')
补充
2022/10/13
在代码最前面添加了
1 | os.system("hexo bangumi -u") |
因为之前用bat的方法没有用。
2022/10/27
- 修复“想看”列表无法获取信息的bug(之前没用过想看,写的时候就没想到◑﹏◐);
- 可以把之前获取的数据存下来,每次只要获取新增的番剧信息就行,这样速度会快很多;
- 修复从本地获取数据时,“全”与“话”会重复的bug。
2023/01/26
- 使用threading多线程,虽然好像并没有加快,也算是整理的一遍代码了;
- 修复了部分番剧话数错误;
- 不用多存一个文件了;
- 使用方法和之前一样。
本博客所有文章除特别声明外,均采用 CC BY-NC-SA 4.0 许可协议。转载请注明来源 Trrrrw!
评论