biliarchiver/_biliup_upload_bvid.py

140 lines
5.4 KiB
Python
Raw Normal View History

2023-06-01 07:00:37 -07:00
identifier_perfix = 'BiliBili'
import json
import os
import time
from internetarchive import get_item
2023-06-01 20:55:42 -07:00
from rich import print
2023-06-01 07:00:37 -07:00
def upload_bvid(bvid):
if not os.path.exists('biliup.home'):
raise Exception('先创建 biliup.home 文件')
access_key, secret_key = read_ia_keys(os.path.expanduser('~/.bili_ia_keys.txt'))
# sample: BiliBili-BV1Zh4y1x7RL_p3
videos_basepath = f'biliup/videos/{bvid}'
for identifier in os.listdir(videos_basepath):
if os.path.exists(f'{videos_basepath}/{identifier}/_uploaded.mark'):
2023-06-01 21:08:26 -07:00
print(f'{identifier} 已经上传过了(_uploaded.mark)')
2023-06-01 07:00:37 -07:00
continue
2023-06-02 01:18:28 -07:00
if identifier.startswith('_') :
print(f'跳过 {identifier}')
continue
2023-06-01 07:00:37 -07:00
if not identifier.startswith(identifier_perfix):
2023-06-01 21:08:26 -07:00
print(f'{identifier} 不是以 {identifier_perfix} 开头的正确 identifier')
2023-06-01 07:00:37 -07:00
continue
if not os.path.exists(f'{videos_basepath}/{identifier}/_downloaded.mark'):
print(f'{identifier} 没有下载完成')
continue
2023-06-02 01:18:28 -07:00
pid = identifier.split('_')[-1][1:]
file_basename = identifier[len(identifier_perfix)+1:]
2023-06-01 07:00:37 -07:00
print(f'开始上传 {identifier}')
item = get_item(identifier)
if item.exists:
2023-06-01 21:08:26 -07:00
print(f'item {identifier} 已存在(item.exists)')
if item.metadata.get("upload-state") == "uploaded":
print(f'{identifier} 已经上传过了,跳过(item.metadata.uploaded)')
continue
2023-06-01 07:00:37 -07:00
filedict = {} # "remote filename": "local filename"
for filename in os.listdir(f'{videos_basepath}/{identifier}'):
file = f'{videos_basepath}/{identifier}/{filename}'
if os.path.isfile(file):
if os.path.basename(file).startswith('_'):
continue
if not os.path.isfile(file):
continue
filedict[filename] = file
for filename in os.listdir(f'{videos_basepath}/{identifier}/extra'):
file = f'{videos_basepath}/{identifier}/extra/{filename}'
if os.path.isfile(file):
if file.startswith('_'):
continue
filedict[filename] = file
for file_in_item in item.files:
if file_in_item["name"] in filedict:
filedict.pop(file_in_item["name"])
print(f"File {file_in_item['name']} already exists in {identifier}.")
with open(f'{videos_basepath}/{identifier}/extra/{file_basename}.info.json', 'r', encoding='utf-8') as f:
bv_info = json.load(f)
2023-06-02 01:18:28 -07:00
# with open(f'{videos_basepath}/_videos_info.json', 'r', encoding='utf-8') as f:
# videos_info = json.load(f)
2023-06-01 07:00:37 -07:00
tags = ['BiliBili', 'video']
for tag in bv_info['data']['Tags']:
tags.append(tag['tag_name'])
2023-06-01 20:53:20 -07:00
pubdate = bv_info['data']['View']['pubdate']
for page in bv_info['data']['View']['pages']:
if page['page'] == int(pid):
cid = page['cid']
part = page['part']
break
2023-06-01 07:00:37 -07:00
md = {
2023-06-02 01:18:28 -07:00
"mediatype": "movies",
2023-06-01 20:53:20 -07:00
"collection": 'opensource_movies',
"title": bv_info['data']['View']['title'] + f' P{pid} ' + part ,
2023-06-01 07:00:37 -07:00
"description": bv_info['data']['View']['desc'],
2023-06-01 20:53:20 -07:00
'creator': bv_info['data']['View']['owner']['name'], # UP 主
# UTC time
'date': time.strftime("%Y-%m-%d", time.gmtime(pubdate)),
'year': time.strftime("%Y", time.gmtime(pubdate)),
'bvid': bvid,
'aid': bv_info['data']['View']['aid'],
'cid': cid,
2023-06-01 07:00:37 -07:00
"subject": "; ".join(
tags
), # Keywords should be separated by ; but it doesn't matter much; the alternative is to set one per field with subject[0], subject[1], ...
"upload-state": "uploading",
2023-06-01 21:17:21 -07:00
'originalurl': f'https://www.bilibili.com/video/{bvid}/?p={pid}',
2023-06-01 21:08:26 -07:00
'project': 'bilibili top100 daily archive',
2023-06-01 21:17:21 -07:00
'scanner': 'biliup v2233.0.2 (dev)',
2023-06-01 07:00:37 -07:00
}
print(filedict)
print(md)
r = item.upload(
files=filedict,
metadata=md,
access_key=access_key,
secret_key=secret_key,
verbose=True,
queue_derive=True,
)
tries = 30
item = get_item(identifier) # refresh item
while not item.exists and tries > 0:
print(f"Waiting for item to be created ({tries}) ...", end='\r')
time.sleep(30)
item = get_item(identifier)
tries -= 1
new_md = {}
if item.metadata.get("upload-state") != "uploaded":
new_md.update({"upload-state": "uploaded"})
if new_md:
r = item.modify_metadata(
metadata=new_md,
access_key=access_key,
secret_key=secret_key,
)
r.raise_for_status()
with open(f'{videos_basepath}/{identifier}/_uploaded.mark', 'w', encoding='utf-8') as f:
f.write('')
print(f'{identifier} 上传完成')
def read_ia_keys(keysfile):
''' Return: tuple(`access_key`, `secret_key`) '''
with open(keysfile, 'r', encoding='utf-8') as f:
key_lines = f.readlines()
access_key = key_lines[0].strip()
secret_key = key_lines[1].strip()
return access_key, secret_key