2023-06-01 07:00:37 -07:00
|
|
|
import asyncio
|
2023-06-01 08:36:21 -07:00
|
|
|
import os
|
2023-06-01 07:00:37 -07:00
|
|
|
import argparse
|
2023-06-02 01:18:28 -07:00
|
|
|
|
2023-06-02 13:32:11 -07:00
|
|
|
from _biliarchiver_archive_bvid import archive_bvid
|
2023-06-02 01:18:28 -07:00
|
|
|
|
|
|
|
from bilix.sites.bilibili.downloader import DownloaderBilibili
|
|
|
|
from rich.console import Console
|
2023-06-03 03:26:11 -07:00
|
|
|
from httpx import Client
|
2023-06-02 01:18:28 -07:00
|
|
|
from rich.traceback import install
|
|
|
|
install()
|
|
|
|
|
2023-06-03 06:10:14 -07:00
|
|
|
from _biliarchiver_archive_bvid import BILIBILI_IDENTIFIER_PERFIX
|
2023-06-01 07:00:37 -07:00
|
|
|
|
|
|
|
def parse_args():
|
|
|
|
parser = argparse.ArgumentParser()
|
2023-06-02 11:40:11 -07:00
|
|
|
parser.add_argument('--sess-data', type=str, default=get_sess_data(),
|
|
|
|
help='cookie SESSDATA。不指定则会从 ~/.sess_data.txt 读取,指定则直接使用提供的字符串')
|
2023-06-02 11:48:16 -07:00
|
|
|
parser.add_argument('--bvids', type=str, help='bvids 列表的文件路径', required=True)
|
2023-06-03 06:10:14 -07:00
|
|
|
parser.add_argument('--skip-exist', action='store_true',
|
|
|
|
help='跳过 IA 上已存在的 item (只检查 p1 是否存在)')
|
2023-06-01 07:00:37 -07:00
|
|
|
args = parser.parse_args()
|
|
|
|
return args
|
|
|
|
|
2023-06-03 06:10:14 -07:00
|
|
|
def check_ia_item_exist(client: Client, identifier: str) -> bool:
|
|
|
|
params = {
|
|
|
|
'identifier': identifier,
|
|
|
|
'output': 'json',
|
|
|
|
}
|
|
|
|
r = client.get('https://archive.org/services/check_identifier.php' ,params=params)
|
|
|
|
r.raise_for_status()
|
|
|
|
r_json = r.json()
|
|
|
|
assert r_json['type'] =='success'
|
|
|
|
if r_json['code'] == 'available':
|
|
|
|
return False
|
|
|
|
elif r_json['code'] == 'not_available':
|
|
|
|
return True
|
|
|
|
else:
|
|
|
|
raise ValueError(f'Unexpected code: {r_json["code"]}')
|
|
|
|
|
2023-06-01 07:00:37 -07:00
|
|
|
def main():
|
|
|
|
args = parse_args()
|
2023-06-02 11:40:11 -07:00
|
|
|
|
|
|
|
assert args.bvids is not None, '必须指定 bvids 列表的文件路径'
|
2023-06-01 07:00:37 -07:00
|
|
|
with open(args.bvids, 'r', encoding='utf-8') as f:
|
|
|
|
bvids = f.read().splitlines()
|
2023-06-02 01:18:28 -07:00
|
|
|
|
|
|
|
loop = asyncio.new_event_loop()
|
|
|
|
asyncio.set_event_loop(loop)
|
|
|
|
|
2023-06-03 07:16:00 -07:00
|
|
|
from config import video_concurrency, part_concurrency, stream_retry
|
2023-06-02 01:18:28 -07:00
|
|
|
|
2023-06-03 07:16:00 -07:00
|
|
|
d = DownloaderBilibili(hierarchy=True, sess_data=args.sess_data,
|
|
|
|
video_concurrency=video_concurrency,
|
|
|
|
part_concurrency=part_concurrency,
|
|
|
|
stream_retry=stream_retry,
|
2023-06-02 01:18:28 -07:00
|
|
|
)
|
2023-06-03 06:10:14 -07:00
|
|
|
client = Client(cookies=d.client.cookies, headers=d.client.headers)
|
|
|
|
logined = is_login(client)
|
2023-06-03 03:26:11 -07:00
|
|
|
if not logined:
|
|
|
|
return
|
|
|
|
|
2023-06-02 01:18:28 -07:00
|
|
|
d.progress.start()
|
2023-06-03 10:11:02 -07:00
|
|
|
for index, bvid in enumerate(bvids):
|
2023-06-03 06:10:14 -07:00
|
|
|
if args.skip_exist:
|
|
|
|
identifier = f'{BILIBILI_IDENTIFIER_PERFIX}-{bvid}_p1'
|
|
|
|
if check_ia_item_exist(client, identifier):
|
|
|
|
print(f'IA 上已存在 {identifier} ,跳过')
|
|
|
|
continue
|
|
|
|
|
2023-06-03 06:41:03 -07:00
|
|
|
while len(asyncio.all_tasks(loop)) > video_concurrency:
|
2023-06-02 06:57:34 -07:00
|
|
|
loop.run_until_complete(asyncio.sleep(0.01))
|
2023-06-03 06:10:14 -07:00
|
|
|
|
2023-06-03 10:11:02 -07:00
|
|
|
print(f'=== {bvid} ({index+1}/{len(bvids)}) ===')
|
2023-06-03 06:10:14 -07:00
|
|
|
|
2023-06-03 03:26:11 -07:00
|
|
|
task = loop.create_task(archive_bvid(d, bvid, logined=logined))
|
2023-06-02 01:18:28 -07:00
|
|
|
|
2023-06-03 03:38:05 -07:00
|
|
|
while len(asyncio.all_tasks(loop)) > 0:
|
|
|
|
loop.run_until_complete(asyncio.sleep(1))
|
|
|
|
|
2023-06-02 01:18:28 -07:00
|
|
|
|
2023-06-01 07:00:37 -07:00
|
|
|
|
|
|
|
def get_sess_data():
|
2023-06-02 01:18:28 -07:00
|
|
|
with open(os.path.expanduser('~/.sess_data.txt'), 'r', encoding='utf-8') as f:
|
2023-06-01 07:00:37 -07:00
|
|
|
sess_data = f.read().strip()
|
|
|
|
return sess_data
|
|
|
|
|
2023-06-03 03:26:11 -07:00
|
|
|
def is_login(cilent: Client) -> bool:
|
|
|
|
r = cilent.get('https://api.bilibili.com/x/member/web/account')
|
|
|
|
r.raise_for_status()
|
|
|
|
nav_json = r.json()
|
|
|
|
if nav_json['code'] == 0:
|
|
|
|
print('用户登录成功')
|
|
|
|
return True
|
|
|
|
print('未登录/SESSDATA无效/过期')
|
|
|
|
return False
|
2023-06-01 07:00:37 -07:00
|
|
|
|
|
|
|
if __name__ == '__main__':
|
2023-06-02 01:18:28 -07:00
|
|
|
try:
|
|
|
|
main()
|
|
|
|
except KeyboardInterrupt:
|
|
|
|
print('KeyboardInterrupt')
|
|
|
|
finally:
|
|
|
|
# 显示终端光标
|
|
|
|
console = Console()
|
|
|
|
console.show_cursor()
|