From f076e00cacbb238cbc2c4a4d003ae6ea63c63d7a Mon Sep 17 00:00:00 2001 From: yzqzss Date: Thu, 1 Jun 2023 22:00:37 +0800 Subject: [PATCH] init --- .gitignore | 4 + .../_biliup_archive_bvid.cpython-310.pyc | Bin 0 -> 3772 bytes _biliup_archive_bvid.py | 110 ++++++++++++++++ _biliup_upload_bvid.py | 120 ++++++++++++++++++ biliup_archive_daily_bvids.py | 29 +++++ biliup_get_daily_bvids.py | 71 +++++++++++ biliup_upload_daily_bvids.py | 0 7 files changed, 334 insertions(+) create mode 100644 .gitignore create mode 100644 __pycache__/_biliup_archive_bvid.cpython-310.pyc create mode 100644 _biliup_archive_bvid.py create mode 100644 _biliup_upload_bvid.py create mode 100644 biliup_archive_daily_bvids.py create mode 100644 biliup_get_daily_bvids.py create mode 100644 biliup_upload_daily_bvids.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..dfdcf14 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +biliup/ +bvids/ +sess_data.txt +biliup.home \ No newline at end of file diff --git a/__pycache__/_biliup_archive_bvid.cpython-310.pyc b/__pycache__/_biliup_archive_bvid.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bd9391cbb625721228034b92493b11695d53f1fb GIT binary patch literal 3772 zcmd5<&2JmW72nw}E}>G#fmeOmR@qH z*`+BFOTf0&#z8CCLDM9StwK5^Xi}#xdT^aMih;JL{t0ufC|Rf8eTe(skd);l$hln1 zo9~(THNW@hr_%`qe$M&=oz5!Cf3UOnr$gsiD8avgP^F?!LRF7ch^*C$Dr>Ex$y%@I zvNkG)tj&rEwdTbdR>f+>D{+GRbuZCKR+6%9c&SFZl9p}LGaH#oMz&+#SR-4>%C_p| zDmmC^dHKe^%03v2SH_t|6U?ky2+Sq#gcK?$rTOYm)=Ol*rw77rKr7S4G!G)U73l|;k`t6|5VKadnvY=FW*>4xB zPP5PsSRtHaje=V%EW%imG5YO-)1)}MSa3KiIKg7GI>-GcoRHW7X_WItRg7`R4VVo# z*oOCXm?L?ZBwn`%!Bl}Mp#q)}}}djWjtCrVEb z$(kDJb#0yWjL4w+ltPUQD*sxjQ!~<*)GLI>sI{!XTpZ>SP?A&1S!IcU1yd-kJ_8CB zTGgf9Z_I^ZD>zjygIFcx7E7t>H_8j;mgg)oUcPt&1T$69X+7rO* z*3A#LUi-9v^{@R8-|Bz#`_237TbFNF3CxUsDl|~M7oh}4fy{zPucUi~DiJ({DdmQ; zszOVhQfAfH)p>L?t!p4#r$j1f&xCI1F}uw@p?Oa8eFgYeOWzU10QFjYI90Jshx62c_1cNSyZEAOwk9XTFX+BEv_OZn4Qa z_ACqST^8JC%`cty{Ho&xXP`0KC3Nb9j<7}xMSRo>iUvnh3%wrrO<|y4fC@vl5OM^E zqRA0KcoInpNg9aIID=Jq-D45?IL=IbDN~1FD#!p)FD2A*l7cTm4yybJ49T;BBNhZU zvROs(M!!5XkOAp>%P@19gi0OUi;{H`Btwm=OXLax$I_@y4X~2AqW4r9i@<8sTGe`5 zq}B}t6&zVr!8y&f7>rqwwyq8x)_^nP5$val6_qGqqMnR&*q>ZgX^N(4W<`5O8LWNh zP|u9awN#j{XCh;5Y+XSZqPZ1t_Q49G`~seV4&T!7K@g>xJU-kX%ybA*@Ov2w&G(R( z>{@OedXZ7jpH*J@N6(64bl>+Cm_sSXqZn|m^?*Z^DclEiB8tP9(Mv=L=Wau&BI>!q69s(66=k@%)!A7oO*ag>t(}3(gsSDM43<; z`V?@e^>X3iwWp%7C>!P0cl8(HtwSE{$tZ)jNh_iBAng~dv~OS~dgLpuk{Vg%Itfa9 z?J_?fscT2phc*MtppB-{MigM8IDNjG9K6*MzUua2@X-FRuJ*6q=->OeuxF;Q_2#Ra z_wMW9LBHVnL{WASZ+=jS-0WxrHigg|6S^s3LKw-2&WW zV1ECC%L2Q`{f0fm{Z0UZ9z9275al0pBOzH>4d()*E)Rs|xPHy`5R&~C0~GIYNILiw zZZ&AT(Six;RzqPyqB0B69S9=`!Q&t)06_wg1BYyeZVg<{ZZTeS7kEr=Zpo-dS`b|A zrqf`;nmcYwTZ#BCPr}3|=;g@B17sjWXB;w3S?IW)h{?>x^u07jT)B8@tbDoo4vw;= zyW&yOWFp~v)E)|#!uGC!kVo+2m`(5#l79E|p)R#IPJa`)4t@?qy_6yr$*L*U)DvW! zWJp0fN}hhCYYB*X7Rf;TOOQOwsANCP!l#)qYC=r3i1{e59>iVHFQ|F)tw-vmjFutX zfuo;D%FsY@C^9m>4UJJ`j1c?dh*OmGAU;OKB@p$rNQpFvaS$h0^ikY{2&N;dg+|?k zgwCKQjqQnb6B0#Z5SOg4!cn|ygrvc_y*Mh-(mLOLW`tf}lEXocd<-&Ea66~U0Hs)` zJn$Fk4llvmk|{^F=6jysfs7Y?o>jwuqssZyNQyv;aeflp_y+k8kjT6yJ4l-$QL(}0 z!*;;W;M@<9Jc|TVlVa>~mLn{5BlIAS3E|*!!a3UZa~{Bd3S%~u0MQK%uM=Ihz^oP| zH{)vQGr6L8P{eFN$Et1f^LPsKDa>H59m1o~Ll>?>ri4r>7sNQeoQ2ZpF_myxz%eTt zh%`2sXS*nZh&|dh_`88Cut@men7<31yYQz1y=DLm(}Po)(6sA938=$Z;XZ(4aR Gb@G3t)lsDY literal 0 HcmV?d00001 diff --git a/_biliup_archive_bvid.py b/_biliup_archive_bvid.py new file mode 100644 index 0000000..c2c83e1 --- /dev/null +++ b/_biliup_archive_bvid.py @@ -0,0 +1,110 @@ +""" +bilix 提供了各个网站的api,如果你有需要当然可以使用,并且它们都是异步的 + +bilix provides api for various websites. You can use them if you need, and they are asynchronous +""" +import asyncio +import os +import shutil +import time + +import aiofiles +import httpx +from bilix.download.utils import raise_api_error, req_retry +from bilix.exception import APIError + +from bilix.sites.bilibili import api +from httpx import AsyncClient + +from rich import print +import json + +from bilix.sites.bilibili.downloader import DownloaderBilibili + + +identifier_perfix = 'BiliBili' + +@raise_api_error +async def new_get_subtitle_info(client: httpx.AsyncClient, bvid, cid): + params = {'bvid': bvid, 'cid': cid} + res = await req_retry(client, 'https://api.bilibili.com/x/player/v2', params=params) + info = json.loads(res.text) + if info['code'] == -400: + raise APIError(f'未找到字幕信息', params) + # return lan + return [[f'http:{i["subtitle_url"]}', i['lan']] for i in info['data']['subtitle']['subtitles']] +api.get_subtitle_info = new_get_subtitle_info + + +async def archive_bvid(bvid: str, sess_data: str): + if not os.path.exists('biliup.home'): + raise Exception('先创建 biliup.home 文件') + # 需要先实例化一个用来进行http请求的client + d = DownloaderBilibili(video_concurrency=5, part_concurrency=10, hierarchy=True, sess_data=sess_data) + # first we should initialize a http client + url = f'https://www.bilibili.com/video/{bvid}/' + # data = await api.get_video_info(client, "https://www.bilibili.com/video/BV1jK4y1N7ST?p=5") + + # d.update_cookies_from_browser('firefox') + d.update_cookies_from_browser + videos_basepath = f'biliup/videos/{bvid}' + videos_info = await api.get_video_info(d.client, url) + os.makedirs(videos_basepath, exist_ok=True) + + + async with aiofiles.open(f'{videos_basepath}/videos_info.json', 'w', encoding='utf-8') as f: + await f.write(json.dumps(videos_info.dict(), ensure_ascii=False, indent=4)) + + pid = 0 + d.progress.start() + for page in videos_info.pages: + pid += 1 + + file_basename = f'{bvid}_p{pid}' + video_basepath = f'{videos_basepath}/{identifier_perfix}-{file_basename}' + video_extrapath = f'{video_basepath}/extra' + if os.path.exists(f'{video_basepath}/_downloaded.mark'): + print(f'{bvid} 的第 {pid}p 已经下载过了') + continue + video_info = await api.get_video_info(d.client, page.p_url) + os.makedirs(video_basepath, exist_ok=True) + os.makedirs(video_extrapath, exist_ok=True) + + + old_p_name = video_info.pages[video_info.p].p_name + old_h1_title = video_info.h1_title + + video_info.pages[video_info.p].p_name = file_basename + video_info.h1_title = 'title' * 30 # 超长标题,用来 fallback 到 file_basename + cor1 = d.get_video(page.p_url ,video_info=video_info, quality=0, + dm=True, image=True, subtitle=True, path=video_basepath) + cor2 = d.get_dm(page.p_url, video_info=video_info, path=video_extrapath) + cor3 = download_bilibili_video_detail(d.client, bvid, f'{video_extrapath}/{file_basename}.info.json') + await asyncio.gather(cor1, cor2, cor3) + + video_info.pages[video_info.p].p_name = old_p_name + video_info.h1_title = old_h1_title + + async with aiofiles.open(f'{video_basepath}/_downloaded.mark', 'w', encoding='utf-8') as f: + await f.write('') + + await d.aclose() + d.progress.stop() + + +async def download_bilibili_video_detail(client, bvid, filename): + if os.path.exists(filename): + return + # url = 'https://api.bilibili.com/x/web-interface/view' + url = 'https://api.bilibili.com/x/web-interface/view/detail' # 超详细 + params = {'bvid': bvid} + r = await req_retry(client, url, params=params ,follow_redirects=True) + r.raise_for_status() + + async with aiofiles.open(filename, 'w', encoding='utf-8') as f: + # f.write(json.dumps(r.json(), indent=4, ensure_ascii=False)) + await f.write(r.text) + + +# asyncio.run(archive_bvid(bvid=bvid)) + diff --git a/_biliup_upload_bvid.py b/_biliup_upload_bvid.py new file mode 100644 index 0000000..802e213 --- /dev/null +++ b/_biliup_upload_bvid.py @@ -0,0 +1,120 @@ +identifier_perfix = 'BiliBili' + + + +import json +import os +import time +from internetarchive import get_item +def upload_bvid(bvid): + if not os.path.exists('biliup.home'): + raise Exception('先创建 biliup.home 文件') + access_key, secret_key = read_ia_keys(os.path.expanduser('~/.bili_ia_keys.txt')) + # sample: BiliBili-BV1Zh4y1x7RL_p3 + videos_basepath = f'biliup/videos/{bvid}' + for identifier in os.listdir(videos_basepath): + if os.path.exists(f'{videos_basepath}/{identifier}/_uploaded.mark'): + print(f'{identifier} 已经上传过了') + continue + pid = identifier.split('_')[-1][1:] + file_basename = identifier[len(identifier_perfix)+1:] + if not identifier.startswith(identifier_perfix): + print(f'{identifier} 不是 {identifier_perfix} 的视频') + continue + if not os.path.exists(f'{videos_basepath}/{identifier}/_downloaded.mark'): + print(f'{identifier} 没有下载完成') + continue + + print(f'开始上传 {identifier}') + item = get_item(identifier) + if item.exists: + print(f'{identifier} 已经存在') + filedict = {} # "remote filename": "local filename" + for filename in os.listdir(f'{videos_basepath}/{identifier}'): + file = f'{videos_basepath}/{identifier}/{filename}' + if os.path.isfile(file): + if os.path.basename(file).startswith('_'): + continue + if not os.path.isfile(file): + continue + filedict[filename] = file + + for filename in os.listdir(f'{videos_basepath}/{identifier}/extra'): + file = f'{videos_basepath}/{identifier}/extra/{filename}' + if os.path.isfile(file): + if file.startswith('_'): + continue + filedict[filename] = file + + for file_in_item in item.files: + if file_in_item["name"] in filedict: + filedict.pop(file_in_item["name"]) + print(f"File {file_in_item['name']} already exists in {identifier}.") + + + with open(f'{videos_basepath}/{identifier}/extra/{file_basename}.info.json', 'r', encoding='utf-8') as f: + bv_info = json.load(f) + with open(f'{videos_basepath}/videos_info.json', 'r', encoding='utf-8') as f: + videos_info = json.load(f) + + tags = ['BiliBili', 'video'] + for tag in bv_info['data']['Tags']: + tags.append(tag['tag_name']) + + md = { + "mediatype": "web", + "collection": 'movies', + "title": bv_info['data']['View']['title'] + ' ' +videos_info['pages'][int(pid) - 1]['p_name'], + "description": bv_info['data']['View']['desc'], + # "last-updated-date": time.strftime("%Y-%m-%d"), + 'creator': bv_info['data']['View']['owner']['name'], + # 'year': + "subject": "; ".join( + tags + ), # Keywords should be separated by ; but it doesn't matter much; the alternative is to set one per field with subject[0], subject[1], ... + "upload-state": "uploading", + 'originalurl': f'https://www.bilibili.com/video/{bvid}?p={pid}', + } + print(filedict) + print(md) + + r = item.upload( + files=filedict, + metadata=md, + access_key=access_key, + secret_key=secret_key, + verbose=True, + queue_derive=True, + ) + + tries = 30 + item = get_item(identifier) # refresh item + while not item.exists and tries > 0: + print(f"Waiting for item to be created ({tries}) ...", end='\r') + time.sleep(30) + item = get_item(identifier) + tries -= 1 + + new_md = {} + if item.metadata.get("upload-state") != "uploaded": + new_md.update({"upload-state": "uploaded"}) + if new_md: + r = item.modify_metadata( + metadata=new_md, + access_key=access_key, + secret_key=secret_key, + ) + r.raise_for_status() + with open(f'{videos_basepath}/{identifier}/_uploaded.mark', 'w', encoding='utf-8') as f: + f.write('') + print(f'{identifier} 上传完成') + +def read_ia_keys(keysfile): + ''' Return: tuple(`access_key`, `secret_key`) ''' + with open(keysfile, 'r', encoding='utf-8') as f: + key_lines = f.readlines() + + access_key = key_lines[0].strip() + secret_key = key_lines[1].strip() + + return access_key, secret_key \ No newline at end of file diff --git a/biliup_archive_daily_bvids.py b/biliup_archive_daily_bvids.py new file mode 100644 index 0000000..bd97f33 --- /dev/null +++ b/biliup_archive_daily_bvids.py @@ -0,0 +1,29 @@ +import asyncio +import datetime +from _biliup_archive_bvid import archive_bvid +import argparse + +def parse_args(): + parser = argparse.ArgumentParser() + today = datetime.date.today() + parser.add_argument('--sess-data', type=str, default=get_sess_data()) + parser.add_argument('--bvids', type=str, default=f'bvids/bvids-{today.isoformat()}.txt') + args = parser.parse_args() + return args + +def main(): + args = parse_args() + with open(args.bvids, 'r', encoding='utf-8') as f: + bvids = f.read().splitlines() + for bvid in bvids: + asyncio.run(archive_bvid(bvid=bvid, sess_data=args.sess_data)) + + +def get_sess_data(): + with open('sess_data.txt', 'r', encoding='utf-8') as f: + sess_data = f.read().strip() + return sess_data + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/biliup_get_daily_bvids.py b/biliup_get_daily_bvids.py new file mode 100644 index 0000000..9ecd4e9 --- /dev/null +++ b/biliup_get_daily_bvids.py @@ -0,0 +1,71 @@ +import asyncio +import os +import sys +import requests +import json +from bilix.sites.bilibili import DownloaderBilibili +from bilibili_api import video, sync +from internetarchive import get_item + + +bilibili_ranking_api = "https://api.bilibili.com/x/web-interface/ranking/v2" +bilibili_ranking_params = { + "rid": 0, + "type": "all" +} + +r = requests.get(bilibili_ranking_api, params=bilibili_ranking_params) +ranking_json = json.loads(r.text) +assert ranking_json['code'] == 0 + +ranking = ranking_json['data']['list'] +bvids = [] +for video_info in ranking: + # print(video_info['title'], video_info['bvid'], video_info['pic']) + bvid = video_info['bvid'] + bvids.append(bvid) + +import datetime +today = datetime.date.today() +os.makedirs('bvids', exist_ok=True) +with open(f'bvids/bvids-{today.isoformat()}.txt', 'w', encoding='utf-8') as f: + for bvid in bvids: + f.write(f'{bvid}' + '\n') + + + +# print(bvid) +# assert isinstance(bvid, str) + +# v = video.Video(bvid=bvid) +# video_info = sync(v.get_info()) + +# with open(f'bili/video/{bvid}/video-{bvid}.info.json', 'w', encoding='utf-8') as f: +# json.dump(video_info, f, ensure_ascii=False, indent=4) + +# # with open('ranking.json', 'w', encoding='utf-8') as f: +# # json.dump(ranking_json, f, ensure_ascii=False, indent=4) + + +# async def main(): +# d = DownloaderBilibili(video_concurrency=5, part_concurrency=10, hierarchy=False, +# sess_data=sess_data) + +# d.progress.start() +# # cor1 = d.get_series( +# # 'https://www.bilibili.com/bangumi/play/ss28277' +# # , quality=0) +# # cor2 = d.get_up(url_or_mid='436482484', quality=0) +# os.makedirs(f'bili/video/{bvid}', exist_ok=True) +# cor3 = d.get_series(url=f'https://www.bilibili.com/video/{bvid}', +# dm=True, quality=0, image=True, subtitle=True, path=f'bili/video/{bvid}') + +# await asyncio.gather(cor3) +# await d.aclose() + + +# if __name__ == '__main__': +# # asyncio.run(main()) +# pass + + diff --git a/biliup_upload_daily_bvids.py b/biliup_upload_daily_bvids.py new file mode 100644 index 0000000..e69de29