diff --git a/.gitignore b/.gitignore index a1807f2..3c962ac 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,5 @@ bvids/ sess_data.txt biliup.home .venv/ -__pycache__/ \ No newline at end of file +__pycache__/ +videos/ \ No newline at end of file diff --git a/_biliup_archive_bvid.py b/_biliup_archive_bvid.py index e13d91b..41b73dc 100644 --- a/_biliup_archive_bvid.py +++ b/_biliup_archive_bvid.py @@ -37,11 +37,10 @@ api.get_subtitle_info = new_get_subtitle_info async def archive_bvid(d: DownloaderBilibili, bvid: str): - if not os.path.exists('biliup.home'): - raise Exception('先创建 biliup.home 文件') - # 需要先实例化一个用来进行http请求的client - # d = DownloaderBilibili(video_concurrency=5, part_concurrency=10, hierarchy=True, sess_data=sess_data) - # first we should initialize a http client + assert d.hierarchy is True, 'hierarchy 必须为 True' # 为了保持后续目录结构、文件命名的一致性 + assert d.client.cookies.get('SESSDATA') is not None, 'sess_data 不能为空' # 开个大会员呗,能下 4k 呢。 + assert os.path.exists('biliup.home'), '先创建 biliup.home 文件' # 防误操作 + url = f'https://www.bilibili.com/video/{bvid}/' # data = await api.get_video_info(client, "https://www.bilibili.com/video/BV1jK4y1N7ST?p=5") @@ -52,13 +51,14 @@ async def archive_bvid(d: DownloaderBilibili, bvid: str): os.makedirs(videos_basepath, exist_ok=True) - async with aiofiles.open(f'{videos_basepath}/videos_info.json', 'w', encoding='utf-8') as f: - await f.write(json.dumps(videos_info.dict(), ensure_ascii=False, indent=4)) + # async with aiofiles.open(f'{videos_basepath}/_videos_info.json', 'w', encoding='utf-8') as f: + # # 用于 debug 的来自 bilix 输出的视频信息,包含用户敏感信息(mid 等) + # await f.write(json.dumps(videos_info.dict(), ensure_ascii=False, indent=4)) pid = 0 for page in videos_info.pages: pid += 1 - if not page.p_name.startswith(f'P{pid}-'): + if not page.p_url.endswith(f'?p={pid}'): print(f'{bvid} 的第 {pid}P 不存在') continue @@ -77,10 +77,17 @@ async def archive_bvid(d: DownloaderBilibili, bvid: str): old_h1_title = video_info.h1_title video_info.pages[video_info.p].p_name = file_basename - video_info.h1_title = 'title' * 30 # 超长标题,用来 fallback 到 file_basename - cor1 = d.get_video(page.p_url ,video_info=video_info, quality=0, - dm=True, image=True, subtitle=True, path=video_basepath) + video_info.h1_title = 'tttttt' * 50 # 假装超长标题,强制 bilix fallback 到 file_basename 作为文件名 + cor1 = d.get_video(page.p_url ,video_info=video_info, path=video_basepath, + # hevc 优先 + quality=0, codec='hev', + # 下载 ass 弹幕(bilix 会自动调用 danmukuC 将 pb 弹幕转为 ass)、封面、字幕 + # 他们会被放进 extra 子目录里 + dm=True, image=True, subtitle=True + ) + # 下载原始的 pb 弹幕 cor2 = d.get_dm(page.p_url, video_info=video_info, path=video_extrapath) + # 获取视频详细信息 cor3 = download_bilibili_video_detail(d.client, bvid, f'{video_extrapath}/{file_basename}.info.json') await asyncio.gather(cor1, cor2, cor3) @@ -95,6 +102,7 @@ async def archive_bvid(d: DownloaderBilibili, bvid: str): async def download_bilibili_video_detail(client, bvid, filename): if os.path.exists(filename): + print(f'{bvid} 视频详情已存在') return # url = 'https://api.bilibili.com/x/web-interface/view' url = 'https://api.bilibili.com/x/web-interface/view/detail' # 超详细 @@ -105,6 +113,7 @@ async def download_bilibili_video_detail(client, bvid, filename): async with aiofiles.open(filename, 'w', encoding='utf-8') as f: # f.write(json.dumps(r.json(), indent=4, ensure_ascii=False)) await f.write(r.text) + print(f'{bvid} 视频详情已保存') # d = DownloaderBilibili(video_concurrency=2, part_concurrency=1, hierarchy=True, sess_data=None) # d.progress.start() diff --git a/_biliup_upload_bvid.py b/_biliup_upload_bvid.py index 692cc43..28a90b0 100644 --- a/_biliup_upload_bvid.py +++ b/_biliup_upload_bvid.py @@ -17,8 +17,9 @@ def upload_bvid(bvid): if os.path.exists(f'{videos_basepath}/{identifier}/_uploaded.mark'): print(f'{identifier} 已经上传过了(_uploaded.mark)') continue - pid = identifier.split('_')[-1][1:] - file_basename = identifier[len(identifier_perfix)+1:] + if identifier.startswith('_') : + print(f'跳过 {identifier}') + continue if not identifier.startswith(identifier_perfix): print(f'{identifier} 不是以 {identifier_perfix} 开头的正确 identifier') continue @@ -26,6 +27,9 @@ def upload_bvid(bvid): print(f'{identifier} 没有下载完成') continue + pid = identifier.split('_')[-1][1:] + file_basename = identifier[len(identifier_perfix)+1:] + print(f'开始上传 {identifier}') item = get_item(identifier) if item.exists: @@ -58,8 +62,8 @@ def upload_bvid(bvid): with open(f'{videos_basepath}/{identifier}/extra/{file_basename}.info.json', 'r', encoding='utf-8') as f: bv_info = json.load(f) - with open(f'{videos_basepath}/videos_info.json', 'r', encoding='utf-8') as f: - videos_info = json.load(f) + # with open(f'{videos_basepath}/_videos_info.json', 'r', encoding='utf-8') as f: + # videos_info = json.load(f) tags = ['BiliBili', 'video'] for tag in bv_info['data']['Tags']: @@ -72,7 +76,7 @@ def upload_bvid(bvid): break md = { - "mediatype": "web", + "mediatype": "movies", "collection": 'opensource_movies', "title": bv_info['data']['View']['title'] + f' P{pid} ' + part , "description": bv_info['data']['View']['desc'], diff --git a/biliup_archive_daily_bvids.py b/biliup_archive_daily_bvids.py index 4ad4033..3c28ca5 100644 --- a/biliup_archive_daily_bvids.py +++ b/biliup_archive_daily_bvids.py @@ -1,12 +1,19 @@ import asyncio import datetime +import logging import os import sys -from bilix.sites.bilibili.downloader import DownloaderBilibili -from _biliup_archive_bvid import archive_bvid import argparse + +from _biliup_archive_bvid import archive_bvid + +from bilix.sites.bilibili.downloader import DownloaderBilibili +from rich.console import Console import uvloop +from rich.traceback import install +install() + def parse_args(): parser = argparse.ArgumentParser() @@ -18,34 +25,40 @@ def parse_args(): def main(): args = parse_args() + print(args.sess_data) with open(args.bvids, 'r', encoding='utf-8') as f: bvids = f.read().splitlines() - async def do(): - d = DownloaderBilibili(video_concurrency=2, part_concurrency=1, hierarchy=True, sess_data=args.sess_data) - d.progress.start() - futs = [] - for bvid in bvids: - cor = asyncio.create_task(archive_bvid(d=d, bvid=bvid)) - fut = asyncio.gather(cor) - futs.append(fut) - if len(futs) == 2: - await asyncio.gather(*futs) - futs = [] - if len(futs) > 0: - await asyncio.gather(*futs) - futs = [] - d.progress.stop() - await d.aclose() - # asyncio.run(do()) - asyncio.set_event_loop_policy(uvloop.EventLoopPolicy()) - loop = asyncio.get_event_loop_policy().get_event_loop() - loop.run_until_complete(do()) + + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + + from tasks_limit import tasks_limit + + d = DownloaderBilibili(video_concurrency=tasks_limit, part_concurrency=1, hierarchy=True, sess_data=args.sess_data, + ) + d.progress.start() + for bvid in bvids: + # 限制同时下载的数量 + while len(asyncio.all_tasks(loop)) > tasks_limit: + loop.run_until_complete(asyncio.sleep(5)) + task = loop.create_task(archive_bvid(d, bvid)) + + loop.run_until_complete(asyncio.sleep(5)) + loop.close() + def get_sess_data(): - with open('sess_data.txt', 'r', encoding='utf-8') as f: + with open(os.path.expanduser('~/.sess_data.txt'), 'r', encoding='utf-8') as f: sess_data = f.read().strip() return sess_data if __name__ == '__main__': - main() \ No newline at end of file + try: + main() + except KeyboardInterrupt: + print('KeyboardInterrupt') + finally: + # 显示终端光标 + console = Console() + console.show_cursor() \ No newline at end of file diff --git a/tasks_limit.py b/tasks_limit.py new file mode 100644 index 0000000..9178bb9 --- /dev/null +++ b/tasks_limit.py @@ -0,0 +1 @@ +tasks_limit = 2 \ No newline at end of file