This commit is contained in:
yzqzss 2023-06-01 22:00:37 +08:00
commit f076e00cac
7 changed files with 334 additions and 0 deletions

4
.gitignore vendored Normal file
View File

@ -0,0 +1,4 @@
biliup/
bvids/
sess_data.txt
biliup.home

Binary file not shown.

110
_biliup_archive_bvid.py Normal file
View File

@ -0,0 +1,110 @@
"""
bilix 提供了各个网站的api如果你有需要当然可以使用并且它们都是异步的
bilix provides api for various websites. You can use them if you need, and they are asynchronous
"""
import asyncio
import os
import shutil
import time
import aiofiles
import httpx
from bilix.download.utils import raise_api_error, req_retry
from bilix.exception import APIError
from bilix.sites.bilibili import api
from httpx import AsyncClient
from rich import print
import json
from bilix.sites.bilibili.downloader import DownloaderBilibili
identifier_perfix = 'BiliBili'
@raise_api_error
async def new_get_subtitle_info(client: httpx.AsyncClient, bvid, cid):
params = {'bvid': bvid, 'cid': cid}
res = await req_retry(client, 'https://api.bilibili.com/x/player/v2', params=params)
info = json.loads(res.text)
if info['code'] == -400:
raise APIError(f'未找到字幕信息', params)
# return lan
return [[f'http:{i["subtitle_url"]}', i['lan']] for i in info['data']['subtitle']['subtitles']]
api.get_subtitle_info = new_get_subtitle_info
async def archive_bvid(bvid: str, sess_data: str):
if not os.path.exists('biliup.home'):
raise Exception('先创建 biliup.home 文件')
# 需要先实例化一个用来进行http请求的client
d = DownloaderBilibili(video_concurrency=5, part_concurrency=10, hierarchy=True, sess_data=sess_data)
# first we should initialize a http client
url = f'https://www.bilibili.com/video/{bvid}/'
# data = await api.get_video_info(client, "https://www.bilibili.com/video/BV1jK4y1N7ST?p=5")
# d.update_cookies_from_browser('firefox')
d.update_cookies_from_browser
videos_basepath = f'biliup/videos/{bvid}'
videos_info = await api.get_video_info(d.client, url)
os.makedirs(videos_basepath, exist_ok=True)
async with aiofiles.open(f'{videos_basepath}/videos_info.json', 'w', encoding='utf-8') as f:
await f.write(json.dumps(videos_info.dict(), ensure_ascii=False, indent=4))
pid = 0
d.progress.start()
for page in videos_info.pages:
pid += 1
file_basename = f'{bvid}_p{pid}'
video_basepath = f'{videos_basepath}/{identifier_perfix}-{file_basename}'
video_extrapath = f'{video_basepath}/extra'
if os.path.exists(f'{video_basepath}/_downloaded.mark'):
print(f'{bvid} 的第 {pid}p 已经下载过了')
continue
video_info = await api.get_video_info(d.client, page.p_url)
os.makedirs(video_basepath, exist_ok=True)
os.makedirs(video_extrapath, exist_ok=True)
old_p_name = video_info.pages[video_info.p].p_name
old_h1_title = video_info.h1_title
video_info.pages[video_info.p].p_name = file_basename
video_info.h1_title = 'title' * 30 # 超长标题,用来 fallback 到 file_basename
cor1 = d.get_video(page.p_url ,video_info=video_info, quality=0,
dm=True, image=True, subtitle=True, path=video_basepath)
cor2 = d.get_dm(page.p_url, video_info=video_info, path=video_extrapath)
cor3 = download_bilibili_video_detail(d.client, bvid, f'{video_extrapath}/{file_basename}.info.json')
await asyncio.gather(cor1, cor2, cor3)
video_info.pages[video_info.p].p_name = old_p_name
video_info.h1_title = old_h1_title
async with aiofiles.open(f'{video_basepath}/_downloaded.mark', 'w', encoding='utf-8') as f:
await f.write('')
await d.aclose()
d.progress.stop()
async def download_bilibili_video_detail(client, bvid, filename):
if os.path.exists(filename):
return
# url = 'https://api.bilibili.com/x/web-interface/view'
url = 'https://api.bilibili.com/x/web-interface/view/detail' # 超详细
params = {'bvid': bvid}
r = await req_retry(client, url, params=params ,follow_redirects=True)
r.raise_for_status()
async with aiofiles.open(filename, 'w', encoding='utf-8') as f:
# f.write(json.dumps(r.json(), indent=4, ensure_ascii=False))
await f.write(r.text)
# asyncio.run(archive_bvid(bvid=bvid))

120
_biliup_upload_bvid.py Normal file
View File

@ -0,0 +1,120 @@
identifier_perfix = 'BiliBili'
import json
import os
import time
from internetarchive import get_item
def upload_bvid(bvid):
if not os.path.exists('biliup.home'):
raise Exception('先创建 biliup.home 文件')
access_key, secret_key = read_ia_keys(os.path.expanduser('~/.bili_ia_keys.txt'))
# sample: BiliBili-BV1Zh4y1x7RL_p3
videos_basepath = f'biliup/videos/{bvid}'
for identifier in os.listdir(videos_basepath):
if os.path.exists(f'{videos_basepath}/{identifier}/_uploaded.mark'):
print(f'{identifier} 已经上传过了')
continue
pid = identifier.split('_')[-1][1:]
file_basename = identifier[len(identifier_perfix)+1:]
if not identifier.startswith(identifier_perfix):
print(f'{identifier} 不是 {identifier_perfix} 的视频')
continue
if not os.path.exists(f'{videos_basepath}/{identifier}/_downloaded.mark'):
print(f'{identifier} 没有下载完成')
continue
print(f'开始上传 {identifier}')
item = get_item(identifier)
if item.exists:
print(f'{identifier} 已经存在')
filedict = {} # "remote filename": "local filename"
for filename in os.listdir(f'{videos_basepath}/{identifier}'):
file = f'{videos_basepath}/{identifier}/{filename}'
if os.path.isfile(file):
if os.path.basename(file).startswith('_'):
continue
if not os.path.isfile(file):
continue
filedict[filename] = file
for filename in os.listdir(f'{videos_basepath}/{identifier}/extra'):
file = f'{videos_basepath}/{identifier}/extra/{filename}'
if os.path.isfile(file):
if file.startswith('_'):
continue
filedict[filename] = file
for file_in_item in item.files:
if file_in_item["name"] in filedict:
filedict.pop(file_in_item["name"])
print(f"File {file_in_item['name']} already exists in {identifier}.")
with open(f'{videos_basepath}/{identifier}/extra/{file_basename}.info.json', 'r', encoding='utf-8') as f:
bv_info = json.load(f)
with open(f'{videos_basepath}/videos_info.json', 'r', encoding='utf-8') as f:
videos_info = json.load(f)
tags = ['BiliBili', 'video']
for tag in bv_info['data']['Tags']:
tags.append(tag['tag_name'])
md = {
"mediatype": "web",
"collection": 'movies',
"title": bv_info['data']['View']['title'] + ' ' +videos_info['pages'][int(pid) - 1]['p_name'],
"description": bv_info['data']['View']['desc'],
# "last-updated-date": time.strftime("%Y-%m-%d"),
'creator': bv_info['data']['View']['owner']['name'],
# 'year':
"subject": "; ".join(
tags
), # Keywords should be separated by ; but it doesn't matter much; the alternative is to set one per field with subject[0], subject[1], ...
"upload-state": "uploading",
'originalurl': f'https://www.bilibili.com/video/{bvid}?p={pid}',
}
print(filedict)
print(md)
r = item.upload(
files=filedict,
metadata=md,
access_key=access_key,
secret_key=secret_key,
verbose=True,
queue_derive=True,
)
tries = 30
item = get_item(identifier) # refresh item
while not item.exists and tries > 0:
print(f"Waiting for item to be created ({tries}) ...", end='\r')
time.sleep(30)
item = get_item(identifier)
tries -= 1
new_md = {}
if item.metadata.get("upload-state") != "uploaded":
new_md.update({"upload-state": "uploaded"})
if new_md:
r = item.modify_metadata(
metadata=new_md,
access_key=access_key,
secret_key=secret_key,
)
r.raise_for_status()
with open(f'{videos_basepath}/{identifier}/_uploaded.mark', 'w', encoding='utf-8') as f:
f.write('')
print(f'{identifier} 上传完成')
def read_ia_keys(keysfile):
''' Return: tuple(`access_key`, `secret_key`) '''
with open(keysfile, 'r', encoding='utf-8') as f:
key_lines = f.readlines()
access_key = key_lines[0].strip()
secret_key = key_lines[1].strip()
return access_key, secret_key

View File

@ -0,0 +1,29 @@
import asyncio
import datetime
from _biliup_archive_bvid import archive_bvid
import argparse
def parse_args():
parser = argparse.ArgumentParser()
today = datetime.date.today()
parser.add_argument('--sess-data', type=str, default=get_sess_data())
parser.add_argument('--bvids', type=str, default=f'bvids/bvids-{today.isoformat()}.txt')
args = parser.parse_args()
return args
def main():
args = parse_args()
with open(args.bvids, 'r', encoding='utf-8') as f:
bvids = f.read().splitlines()
for bvid in bvids:
asyncio.run(archive_bvid(bvid=bvid, sess_data=args.sess_data))
def get_sess_data():
with open('sess_data.txt', 'r', encoding='utf-8') as f:
sess_data = f.read().strip()
return sess_data
if __name__ == '__main__':
main()

71
biliup_get_daily_bvids.py Normal file
View File

@ -0,0 +1,71 @@
import asyncio
import os
import sys
import requests
import json
from bilix.sites.bilibili import DownloaderBilibili
from bilibili_api import video, sync
from internetarchive import get_item
bilibili_ranking_api = "https://api.bilibili.com/x/web-interface/ranking/v2"
bilibili_ranking_params = {
"rid": 0,
"type": "all"
}
r = requests.get(bilibili_ranking_api, params=bilibili_ranking_params)
ranking_json = json.loads(r.text)
assert ranking_json['code'] == 0
ranking = ranking_json['data']['list']
bvids = []
for video_info in ranking:
# print(video_info['title'], video_info['bvid'], video_info['pic'])
bvid = video_info['bvid']
bvids.append(bvid)
import datetime
today = datetime.date.today()
os.makedirs('bvids', exist_ok=True)
with open(f'bvids/bvids-{today.isoformat()}.txt', 'w', encoding='utf-8') as f:
for bvid in bvids:
f.write(f'{bvid}' + '\n')
# print(bvid)
# assert isinstance(bvid, str)
# v = video.Video(bvid=bvid)
# video_info = sync(v.get_info())
# with open(f'bili/video/{bvid}/video-{bvid}.info.json', 'w', encoding='utf-8') as f:
# json.dump(video_info, f, ensure_ascii=False, indent=4)
# # with open('ranking.json', 'w', encoding='utf-8') as f:
# # json.dump(ranking_json, f, ensure_ascii=False, indent=4)
# async def main():
# d = DownloaderBilibili(video_concurrency=5, part_concurrency=10, hierarchy=False,
# sess_data=sess_data)
# d.progress.start()
# # cor1 = d.get_series(
# # 'https://www.bilibili.com/bangumi/play/ss28277'
# # , quality=0)
# # cor2 = d.get_up(url_or_mid='436482484', quality=0)
# os.makedirs(f'bili/video/{bvid}', exist_ok=True)
# cor3 = d.get_series(url=f'https://www.bilibili.com/video/{bvid}',
# dm=True, quality=0, image=True, subtitle=True, path=f'bili/video/{bvid}')
# await asyncio.gather(cor3)
# await d.aclose()
# if __name__ == '__main__':
# # asyncio.run(main())
# pass

View File