mirror of
https://github.com/saveweb/biliarchiver.git
synced 2024-09-19 11:05:28 -07:00
init
This commit is contained in:
commit
f076e00cac
4
.gitignore
vendored
Normal file
4
.gitignore
vendored
Normal file
@ -0,0 +1,4 @@
|
||||
biliup/
|
||||
bvids/
|
||||
sess_data.txt
|
||||
biliup.home
|
BIN
__pycache__/_biliup_archive_bvid.cpython-310.pyc
Normal file
BIN
__pycache__/_biliup_archive_bvid.cpython-310.pyc
Normal file
Binary file not shown.
110
_biliup_archive_bvid.py
Normal file
110
_biliup_archive_bvid.py
Normal file
@ -0,0 +1,110 @@
|
||||
"""
|
||||
bilix 提供了各个网站的api,如果你有需要当然可以使用,并且它们都是异步的
|
||||
|
||||
bilix provides api for various websites. You can use them if you need, and they are asynchronous
|
||||
"""
|
||||
import asyncio
|
||||
import os
|
||||
import shutil
|
||||
import time
|
||||
|
||||
import aiofiles
|
||||
import httpx
|
||||
from bilix.download.utils import raise_api_error, req_retry
|
||||
from bilix.exception import APIError
|
||||
|
||||
from bilix.sites.bilibili import api
|
||||
from httpx import AsyncClient
|
||||
|
||||
from rich import print
|
||||
import json
|
||||
|
||||
from bilix.sites.bilibili.downloader import DownloaderBilibili
|
||||
|
||||
|
||||
identifier_perfix = 'BiliBili'
|
||||
|
||||
@raise_api_error
|
||||
async def new_get_subtitle_info(client: httpx.AsyncClient, bvid, cid):
|
||||
params = {'bvid': bvid, 'cid': cid}
|
||||
res = await req_retry(client, 'https://api.bilibili.com/x/player/v2', params=params)
|
||||
info = json.loads(res.text)
|
||||
if info['code'] == -400:
|
||||
raise APIError(f'未找到字幕信息', params)
|
||||
# return lan
|
||||
return [[f'http:{i["subtitle_url"]}', i['lan']] for i in info['data']['subtitle']['subtitles']]
|
||||
api.get_subtitle_info = new_get_subtitle_info
|
||||
|
||||
|
||||
async def archive_bvid(bvid: str, sess_data: str):
|
||||
if not os.path.exists('biliup.home'):
|
||||
raise Exception('先创建 biliup.home 文件')
|
||||
# 需要先实例化一个用来进行http请求的client
|
||||
d = DownloaderBilibili(video_concurrency=5, part_concurrency=10, hierarchy=True, sess_data=sess_data)
|
||||
# first we should initialize a http client
|
||||
url = f'https://www.bilibili.com/video/{bvid}/'
|
||||
# data = await api.get_video_info(client, "https://www.bilibili.com/video/BV1jK4y1N7ST?p=5")
|
||||
|
||||
# d.update_cookies_from_browser('firefox')
|
||||
d.update_cookies_from_browser
|
||||
videos_basepath = f'biliup/videos/{bvid}'
|
||||
videos_info = await api.get_video_info(d.client, url)
|
||||
os.makedirs(videos_basepath, exist_ok=True)
|
||||
|
||||
|
||||
async with aiofiles.open(f'{videos_basepath}/videos_info.json', 'w', encoding='utf-8') as f:
|
||||
await f.write(json.dumps(videos_info.dict(), ensure_ascii=False, indent=4))
|
||||
|
||||
pid = 0
|
||||
d.progress.start()
|
||||
for page in videos_info.pages:
|
||||
pid += 1
|
||||
|
||||
file_basename = f'{bvid}_p{pid}'
|
||||
video_basepath = f'{videos_basepath}/{identifier_perfix}-{file_basename}'
|
||||
video_extrapath = f'{video_basepath}/extra'
|
||||
if os.path.exists(f'{video_basepath}/_downloaded.mark'):
|
||||
print(f'{bvid} 的第 {pid}p 已经下载过了')
|
||||
continue
|
||||
video_info = await api.get_video_info(d.client, page.p_url)
|
||||
os.makedirs(video_basepath, exist_ok=True)
|
||||
os.makedirs(video_extrapath, exist_ok=True)
|
||||
|
||||
|
||||
old_p_name = video_info.pages[video_info.p].p_name
|
||||
old_h1_title = video_info.h1_title
|
||||
|
||||
video_info.pages[video_info.p].p_name = file_basename
|
||||
video_info.h1_title = 'title' * 30 # 超长标题,用来 fallback 到 file_basename
|
||||
cor1 = d.get_video(page.p_url ,video_info=video_info, quality=0,
|
||||
dm=True, image=True, subtitle=True, path=video_basepath)
|
||||
cor2 = d.get_dm(page.p_url, video_info=video_info, path=video_extrapath)
|
||||
cor3 = download_bilibili_video_detail(d.client, bvid, f'{video_extrapath}/{file_basename}.info.json')
|
||||
await asyncio.gather(cor1, cor2, cor3)
|
||||
|
||||
video_info.pages[video_info.p].p_name = old_p_name
|
||||
video_info.h1_title = old_h1_title
|
||||
|
||||
async with aiofiles.open(f'{video_basepath}/_downloaded.mark', 'w', encoding='utf-8') as f:
|
||||
await f.write('')
|
||||
|
||||
await d.aclose()
|
||||
d.progress.stop()
|
||||
|
||||
|
||||
async def download_bilibili_video_detail(client, bvid, filename):
|
||||
if os.path.exists(filename):
|
||||
return
|
||||
# url = 'https://api.bilibili.com/x/web-interface/view'
|
||||
url = 'https://api.bilibili.com/x/web-interface/view/detail' # 超详细
|
||||
params = {'bvid': bvid}
|
||||
r = await req_retry(client, url, params=params ,follow_redirects=True)
|
||||
r.raise_for_status()
|
||||
|
||||
async with aiofiles.open(filename, 'w', encoding='utf-8') as f:
|
||||
# f.write(json.dumps(r.json(), indent=4, ensure_ascii=False))
|
||||
await f.write(r.text)
|
||||
|
||||
|
||||
# asyncio.run(archive_bvid(bvid=bvid))
|
||||
|
120
_biliup_upload_bvid.py
Normal file
120
_biliup_upload_bvid.py
Normal file
@ -0,0 +1,120 @@
|
||||
identifier_perfix = 'BiliBili'
|
||||
|
||||
|
||||
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
from internetarchive import get_item
|
||||
def upload_bvid(bvid):
|
||||
if not os.path.exists('biliup.home'):
|
||||
raise Exception('先创建 biliup.home 文件')
|
||||
access_key, secret_key = read_ia_keys(os.path.expanduser('~/.bili_ia_keys.txt'))
|
||||
# sample: BiliBili-BV1Zh4y1x7RL_p3
|
||||
videos_basepath = f'biliup/videos/{bvid}'
|
||||
for identifier in os.listdir(videos_basepath):
|
||||
if os.path.exists(f'{videos_basepath}/{identifier}/_uploaded.mark'):
|
||||
print(f'{identifier} 已经上传过了')
|
||||
continue
|
||||
pid = identifier.split('_')[-1][1:]
|
||||
file_basename = identifier[len(identifier_perfix)+1:]
|
||||
if not identifier.startswith(identifier_perfix):
|
||||
print(f'{identifier} 不是 {identifier_perfix} 的视频')
|
||||
continue
|
||||
if not os.path.exists(f'{videos_basepath}/{identifier}/_downloaded.mark'):
|
||||
print(f'{identifier} 没有下载完成')
|
||||
continue
|
||||
|
||||
print(f'开始上传 {identifier}')
|
||||
item = get_item(identifier)
|
||||
if item.exists:
|
||||
print(f'{identifier} 已经存在')
|
||||
filedict = {} # "remote filename": "local filename"
|
||||
for filename in os.listdir(f'{videos_basepath}/{identifier}'):
|
||||
file = f'{videos_basepath}/{identifier}/{filename}'
|
||||
if os.path.isfile(file):
|
||||
if os.path.basename(file).startswith('_'):
|
||||
continue
|
||||
if not os.path.isfile(file):
|
||||
continue
|
||||
filedict[filename] = file
|
||||
|
||||
for filename in os.listdir(f'{videos_basepath}/{identifier}/extra'):
|
||||
file = f'{videos_basepath}/{identifier}/extra/{filename}'
|
||||
if os.path.isfile(file):
|
||||
if file.startswith('_'):
|
||||
continue
|
||||
filedict[filename] = file
|
||||
|
||||
for file_in_item in item.files:
|
||||
if file_in_item["name"] in filedict:
|
||||
filedict.pop(file_in_item["name"])
|
||||
print(f"File {file_in_item['name']} already exists in {identifier}.")
|
||||
|
||||
|
||||
with open(f'{videos_basepath}/{identifier}/extra/{file_basename}.info.json', 'r', encoding='utf-8') as f:
|
||||
bv_info = json.load(f)
|
||||
with open(f'{videos_basepath}/videos_info.json', 'r', encoding='utf-8') as f:
|
||||
videos_info = json.load(f)
|
||||
|
||||
tags = ['BiliBili', 'video']
|
||||
for tag in bv_info['data']['Tags']:
|
||||
tags.append(tag['tag_name'])
|
||||
|
||||
md = {
|
||||
"mediatype": "web",
|
||||
"collection": 'movies',
|
||||
"title": bv_info['data']['View']['title'] + ' ' +videos_info['pages'][int(pid) - 1]['p_name'],
|
||||
"description": bv_info['data']['View']['desc'],
|
||||
# "last-updated-date": time.strftime("%Y-%m-%d"),
|
||||
'creator': bv_info['data']['View']['owner']['name'],
|
||||
# 'year':
|
||||
"subject": "; ".join(
|
||||
tags
|
||||
), # Keywords should be separated by ; but it doesn't matter much; the alternative is to set one per field with subject[0], subject[1], ...
|
||||
"upload-state": "uploading",
|
||||
'originalurl': f'https://www.bilibili.com/video/{bvid}?p={pid}',
|
||||
}
|
||||
print(filedict)
|
||||
print(md)
|
||||
|
||||
r = item.upload(
|
||||
files=filedict,
|
||||
metadata=md,
|
||||
access_key=access_key,
|
||||
secret_key=secret_key,
|
||||
verbose=True,
|
||||
queue_derive=True,
|
||||
)
|
||||
|
||||
tries = 30
|
||||
item = get_item(identifier) # refresh item
|
||||
while not item.exists and tries > 0:
|
||||
print(f"Waiting for item to be created ({tries}) ...", end='\r')
|
||||
time.sleep(30)
|
||||
item = get_item(identifier)
|
||||
tries -= 1
|
||||
|
||||
new_md = {}
|
||||
if item.metadata.get("upload-state") != "uploaded":
|
||||
new_md.update({"upload-state": "uploaded"})
|
||||
if new_md:
|
||||
r = item.modify_metadata(
|
||||
metadata=new_md,
|
||||
access_key=access_key,
|
||||
secret_key=secret_key,
|
||||
)
|
||||
r.raise_for_status()
|
||||
with open(f'{videos_basepath}/{identifier}/_uploaded.mark', 'w', encoding='utf-8') as f:
|
||||
f.write('')
|
||||
print(f'{identifier} 上传完成')
|
||||
|
||||
def read_ia_keys(keysfile):
|
||||
''' Return: tuple(`access_key`, `secret_key`) '''
|
||||
with open(keysfile, 'r', encoding='utf-8') as f:
|
||||
key_lines = f.readlines()
|
||||
|
||||
access_key = key_lines[0].strip()
|
||||
secret_key = key_lines[1].strip()
|
||||
|
||||
return access_key, secret_key
|
29
biliup_archive_daily_bvids.py
Normal file
29
biliup_archive_daily_bvids.py
Normal file
@ -0,0 +1,29 @@
|
||||
import asyncio
|
||||
import datetime
|
||||
from _biliup_archive_bvid import archive_bvid
|
||||
import argparse
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
today = datetime.date.today()
|
||||
parser.add_argument('--sess-data', type=str, default=get_sess_data())
|
||||
parser.add_argument('--bvids', type=str, default=f'bvids/bvids-{today.isoformat()}.txt')
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
with open(args.bvids, 'r', encoding='utf-8') as f:
|
||||
bvids = f.read().splitlines()
|
||||
for bvid in bvids:
|
||||
asyncio.run(archive_bvid(bvid=bvid, sess_data=args.sess_data))
|
||||
|
||||
|
||||
def get_sess_data():
|
||||
with open('sess_data.txt', 'r', encoding='utf-8') as f:
|
||||
sess_data = f.read().strip()
|
||||
return sess_data
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
71
biliup_get_daily_bvids.py
Normal file
71
biliup_get_daily_bvids.py
Normal file
@ -0,0 +1,71 @@
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
import requests
|
||||
import json
|
||||
from bilix.sites.bilibili import DownloaderBilibili
|
||||
from bilibili_api import video, sync
|
||||
from internetarchive import get_item
|
||||
|
||||
|
||||
bilibili_ranking_api = "https://api.bilibili.com/x/web-interface/ranking/v2"
|
||||
bilibili_ranking_params = {
|
||||
"rid": 0,
|
||||
"type": "all"
|
||||
}
|
||||
|
||||
r = requests.get(bilibili_ranking_api, params=bilibili_ranking_params)
|
||||
ranking_json = json.loads(r.text)
|
||||
assert ranking_json['code'] == 0
|
||||
|
||||
ranking = ranking_json['data']['list']
|
||||
bvids = []
|
||||
for video_info in ranking:
|
||||
# print(video_info['title'], video_info['bvid'], video_info['pic'])
|
||||
bvid = video_info['bvid']
|
||||
bvids.append(bvid)
|
||||
|
||||
import datetime
|
||||
today = datetime.date.today()
|
||||
os.makedirs('bvids', exist_ok=True)
|
||||
with open(f'bvids/bvids-{today.isoformat()}.txt', 'w', encoding='utf-8') as f:
|
||||
for bvid in bvids:
|
||||
f.write(f'{bvid}' + '\n')
|
||||
|
||||
|
||||
|
||||
# print(bvid)
|
||||
# assert isinstance(bvid, str)
|
||||
|
||||
# v = video.Video(bvid=bvid)
|
||||
# video_info = sync(v.get_info())
|
||||
|
||||
# with open(f'bili/video/{bvid}/video-{bvid}.info.json', 'w', encoding='utf-8') as f:
|
||||
# json.dump(video_info, f, ensure_ascii=False, indent=4)
|
||||
|
||||
# # with open('ranking.json', 'w', encoding='utf-8') as f:
|
||||
# # json.dump(ranking_json, f, ensure_ascii=False, indent=4)
|
||||
|
||||
|
||||
# async def main():
|
||||
# d = DownloaderBilibili(video_concurrency=5, part_concurrency=10, hierarchy=False,
|
||||
# sess_data=sess_data)
|
||||
|
||||
# d.progress.start()
|
||||
# # cor1 = d.get_series(
|
||||
# # 'https://www.bilibili.com/bangumi/play/ss28277'
|
||||
# # , quality=0)
|
||||
# # cor2 = d.get_up(url_or_mid='436482484', quality=0)
|
||||
# os.makedirs(f'bili/video/{bvid}', exist_ok=True)
|
||||
# cor3 = d.get_series(url=f'https://www.bilibili.com/video/{bvid}',
|
||||
# dm=True, quality=0, image=True, subtitle=True, path=f'bili/video/{bvid}')
|
||||
|
||||
# await asyncio.gather(cor3)
|
||||
# await d.aclose()
|
||||
|
||||
|
||||
# if __name__ == '__main__':
|
||||
# # asyncio.run(main())
|
||||
# pass
|
||||
|
||||
|
0
biliup_upload_daily_bvids.py
Normal file
0
biliup_upload_daily_bvids.py
Normal file
Loading…
Reference in New Issue
Block a user