biliarchiver/biliarchiver_archive_bvids.py

122 lines
4.1 KiB
Python
Raw Normal View History

2023-06-01 07:00:37 -07:00
import asyncio
2023-06-01 08:36:21 -07:00
import os
2023-06-01 07:00:37 -07:00
import argparse
2023-06-02 01:18:28 -07:00
2023-06-02 13:32:11 -07:00
from _biliarchiver_archive_bvid import archive_bvid
2023-06-02 01:18:28 -07:00
from bilix.sites.bilibili.downloader import DownloaderBilibili
from rich.console import Console
2023-06-03 03:26:11 -07:00
from httpx import Client
2023-06-02 01:18:28 -07:00
from rich.traceback import install
install()
from _biliarchiver_archive_bvid import BILIBILI_IDENTIFIER_PERFIX
2023-06-01 07:00:37 -07:00
def parse_args():
parser = argparse.ArgumentParser()
2023-06-04 01:13:14 -07:00
parser.add_argument('--cookies', type=str, default='~/.cookies.txt')
2023-06-02 11:48:16 -07:00
parser.add_argument('--bvids', type=str, help='bvids 列表的文件路径', required=True)
parser.add_argument('--skip-exist', action='store_true',
help='跳过 IA 上已存在的 item (只检查 p1 是否存在)')
2023-06-01 07:00:37 -07:00
args = parser.parse_args()
return args
def check_ia_item_exist(client: Client, identifier: str) -> bool:
params = {
'identifier': identifier,
'output': 'json',
}
r = client.get('https://archive.org/services/check_identifier.php' ,params=params)
r.raise_for_status()
r_json = r.json()
assert r_json['type'] =='success'
if r_json['code'] == 'available':
return False
elif r_json['code'] == 'not_available':
return True
else:
raise ValueError(f'Unexpected code: {r_json["code"]}')
2023-06-01 07:00:37 -07:00
def main():
args = parse_args()
2023-06-02 11:40:11 -07:00
assert args.bvids is not None, '必须指定 bvids 列表的文件路径'
2023-06-01 07:00:37 -07:00
with open(args.bvids, 'r', encoding='utf-8') as f:
bvids = f.read().splitlines()
2023-06-02 01:18:28 -07:00
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
2023-06-03 07:16:00 -07:00
from config import video_concurrency, part_concurrency, stream_retry
2023-06-02 01:18:28 -07:00
2023-06-04 01:13:14 -07:00
d = DownloaderBilibili(hierarchy=True, sess_data=None,
2023-06-03 07:16:00 -07:00
video_concurrency=video_concurrency,
part_concurrency=part_concurrency,
stream_retry=stream_retry,
2023-06-02 01:18:28 -07:00
)
2023-06-04 01:13:14 -07:00
update_cookies_from_file(d.client, args.cookies)
client = Client(cookies=d.client.cookies, headers=d.client.headers)
logined = is_login(client)
2023-06-03 03:26:11 -07:00
if not logined:
return
2023-06-02 01:18:28 -07:00
d.progress.start()
2023-06-03 10:11:02 -07:00
for index, bvid in enumerate(bvids):
if args.skip_exist:
identifier = f'{BILIBILI_IDENTIFIER_PERFIX}-{bvid}_p1'
if check_ia_item_exist(client, identifier):
print(f'IA 上已存在 {identifier} ,跳过')
continue
while len(asyncio.all_tasks(loop)) > video_concurrency:
2023-06-02 06:57:34 -07:00
loop.run_until_complete(asyncio.sleep(0.01))
2023-06-03 10:11:02 -07:00
print(f'=== {bvid} ({index+1}/{len(bvids)}) ===')
2023-06-03 03:26:11 -07:00
task = loop.create_task(archive_bvid(d, bvid, logined=logined))
2023-06-02 01:18:28 -07:00
while len(asyncio.all_tasks(loop)) > 0:
loop.run_until_complete(asyncio.sleep(1))
2023-06-02 01:18:28 -07:00
2023-06-01 07:00:37 -07:00
2023-06-04 01:13:14 -07:00
def update_cookies_from_file(client: Client, cookies_path: str):
cookies_path = os.path.expanduser(cookies_path)
assert os.path.exists(cookies_path), f'cookies 文件不存在: {cookies_path}'
from http.cookiejar import MozillaCookieJar
cj = MozillaCookieJar()
cj.load(cookies_path, ignore_discard=True, ignore_expires=True)
loadded_cookies = 0
for cookie in cj:
# only load bilibili cookies
if 'bilibili' in cookie.domain:
client.cookies.set(
cookie.name, cookie.value, domain=cookie.domain, path=cookie.path
)
loadded_cookies += 1
print(f'{cookies_path} 加载了 {loadded_cookies} 个 cookies')
if loadded_cookies > 100:
print('可能加载了过多的 cookies可能导致 httpx.Client 响应非常慢')
2023-06-04 01:13:14 -07:00
assert client.cookies.get('SESSDATA') is not None, 'SESSDATA 不存在'
# print(f'SESS_DATA: {client.cookies.get("SESSDATA")}')
2023-06-01 07:00:37 -07:00
2023-06-03 03:26:11 -07:00
def is_login(cilent: Client) -> bool:
r = cilent.get('https://api.bilibili.com/x/member/web/account')
r.raise_for_status()
nav_json = r.json()
if nav_json['code'] == 0:
print('用户登录成功')
return True
print('未登录/SESSDATA无效/过期')
return False
2023-06-01 07:00:37 -07:00
if __name__ == '__main__':
2023-06-02 01:18:28 -07:00
try:
main()
except KeyboardInterrupt:
print('KeyboardInterrupt')
finally:
# 显示终端光标
console = Console()
console.show_cursor()