diff --git a/biliarchiver_archive_bvids.py b/biliarchiver_archive_bvids.py index 598d00c..59969c3 100644 --- a/biliarchiver_archive_bvids.py +++ b/biliarchiver_archive_bvids.py @@ -10,15 +10,34 @@ from httpx import Client from rich.traceback import install install() +from _biliarchiver_archive_bvid import BILIBILI_IDENTIFIER_PERFIX def parse_args(): parser = argparse.ArgumentParser() parser.add_argument('--sess-data', type=str, default=get_sess_data(), help='cookie SESSDATA。不指定则会从 ~/.sess_data.txt 读取,指定则直接使用提供的字符串') parser.add_argument('--bvids', type=str, help='bvids 列表的文件路径', required=True) + parser.add_argument('--skip-exist', action='store_true', + help='跳过 IA 上已存在的 item (只检查 p1 是否存在)') args = parser.parse_args() return args +def check_ia_item_exist(client: Client, identifier: str) -> bool: + params = { + 'identifier': identifier, + 'output': 'json', + } + r = client.get('https://archive.org/services/check_identifier.php' ,params=params) + r.raise_for_status() + r_json = r.json() + assert r_json['type'] =='success' + if r_json['code'] == 'available': + return False + elif r_json['code'] == 'not_available': + return True + else: + raise ValueError(f'Unexpected code: {r_json["code"]}') + def main(): args = parse_args() @@ -33,16 +52,24 @@ def main(): d = DownloaderBilibili(video_concurrency=tasks_limit, part_concurrency=1, hierarchy=True, sess_data=args.sess_data, ) - - logined = is_login(Client(cookies=d.client.cookies, headers=d.client.headers)) + client = Client(cookies=d.client.cookies, headers=d.client.headers) + logined = is_login(client) if not logined: return d.progress.start() for bvid in bvids: + if args.skip_exist: + identifier = f'{BILIBILI_IDENTIFIER_PERFIX}-{bvid}_p1' + if check_ia_item_exist(client, identifier): + print(f'IA 上已存在 {identifier} ,跳过') + continue + while len(asyncio.all_tasks(loop)) > tasks_limit: loop.run_until_complete(asyncio.sleep(0.01)) + print(f'=== {bvid} ===') + task = loop.create_task(archive_bvid(d, bvid, logined=logined)) while len(asyncio.all_tasks(loop)) > 0: