2023-06-01 07:00:37 -07:00
|
|
|
import json
|
|
|
|
import os
|
|
|
|
import time
|
|
|
|
from internetarchive import get_item
|
2023-06-01 20:55:42 -07:00
|
|
|
from rich import print
|
2023-06-02 12:09:57 -07:00
|
|
|
|
2023-06-02 13:32:11 -07:00
|
|
|
from _biliarchiver_archive_bvid import BILIBILI_IDENTIFIER_PERFIX
|
2023-06-02 12:09:57 -07:00
|
|
|
|
|
|
|
|
2023-06-01 07:00:37 -07:00
|
|
|
def upload_bvid(bvid):
|
2023-06-02 13:32:11 -07:00
|
|
|
if not os.path.exists('biliarchiver.home'):
|
|
|
|
raise Exception('先创建 biliarchiver.home 文件')
|
2023-06-01 07:00:37 -07:00
|
|
|
access_key, secret_key = read_ia_keys(os.path.expanduser('~/.bili_ia_keys.txt'))
|
|
|
|
# sample: BiliBili-BV1Zh4y1x7RL_p3
|
2023-06-02 13:32:11 -07:00
|
|
|
videos_basepath = f'biliarchiver/videos/{bvid}'
|
2023-06-01 07:00:37 -07:00
|
|
|
for identifier in os.listdir(videos_basepath):
|
2023-06-02 13:50:05 -07:00
|
|
|
if os.path.exists(f'{videos_basepath}/{identifier}/_uploaded.mark'):
|
|
|
|
print(f'{identifier} 已经上传过了(_uploaded.mark)')
|
|
|
|
continue
|
2023-06-02 01:18:28 -07:00
|
|
|
if identifier.startswith('_') :
|
|
|
|
print(f'跳过 {identifier}')
|
|
|
|
continue
|
2023-06-02 12:09:57 -07:00
|
|
|
if not identifier.startswith(BILIBILI_IDENTIFIER_PERFIX):
|
|
|
|
print(f'{identifier} 不是以 {BILIBILI_IDENTIFIER_PERFIX} 开头的正确 identifier')
|
2023-06-01 07:00:37 -07:00
|
|
|
continue
|
|
|
|
if not os.path.exists(f'{videos_basepath}/{identifier}/_downloaded.mark'):
|
|
|
|
print(f'{identifier} 没有下载完成')
|
|
|
|
continue
|
|
|
|
|
2023-06-02 01:18:28 -07:00
|
|
|
pid = identifier.split('_')[-1][1:]
|
2023-06-02 12:09:57 -07:00
|
|
|
file_basename = identifier[len(BILIBILI_IDENTIFIER_PERFIX)+1:]
|
2023-06-02 01:18:28 -07:00
|
|
|
|
2023-06-02 11:40:11 -07:00
|
|
|
print(f'==== 开始上传 {identifier} ====')
|
2023-06-01 07:00:37 -07:00
|
|
|
item = get_item(identifier)
|
|
|
|
if item.exists:
|
2023-06-01 21:08:26 -07:00
|
|
|
print(f'item {identifier} 已存在(item.exists)')
|
2023-06-02 13:50:05 -07:00
|
|
|
if item.metadata.get("upload-state") == "uploaded":
|
|
|
|
print(f'{identifier} 已经上传过了,跳过(item.metadata.uploaded)')
|
|
|
|
with open(f'{videos_basepath}/{identifier}/_uploaded.mark', 'w', encoding='utf-8') as f:
|
|
|
|
f.write('')
|
|
|
|
continue
|
2023-06-01 07:00:37 -07:00
|
|
|
filedict = {} # "remote filename": "local filename"
|
2023-06-02 06:57:34 -07:00
|
|
|
for filename in os.listdir(f'{videos_basepath}/{identifier}/extra'):
|
|
|
|
file = f'{videos_basepath}/{identifier}/extra/{filename}'
|
|
|
|
if os.path.isfile(file):
|
|
|
|
if file.startswith('_'):
|
|
|
|
continue
|
|
|
|
filedict[filename] = file
|
|
|
|
|
2023-06-01 07:00:37 -07:00
|
|
|
for filename in os.listdir(f'{videos_basepath}/{identifier}'):
|
|
|
|
file = f'{videos_basepath}/{identifier}/{filename}'
|
|
|
|
if os.path.isfile(file):
|
|
|
|
if os.path.basename(file).startswith('_'):
|
|
|
|
continue
|
|
|
|
if not os.path.isfile(file):
|
|
|
|
continue
|
|
|
|
filedict[filename] = file
|
|
|
|
|
|
|
|
|
2023-06-02 06:57:34 -07:00
|
|
|
# IA 去重
|
2023-06-01 07:00:37 -07:00
|
|
|
for file_in_item in item.files:
|
|
|
|
if file_in_item["name"] in filedict:
|
|
|
|
filedict.pop(file_in_item["name"])
|
|
|
|
print(f"File {file_in_item['name']} already exists in {identifier}.")
|
|
|
|
|
|
|
|
|
|
|
|
with open(f'{videos_basepath}/{identifier}/extra/{file_basename}.info.json', 'r', encoding='utf-8') as f:
|
|
|
|
bv_info = json.load(f)
|
2023-06-02 01:18:28 -07:00
|
|
|
# with open(f'{videos_basepath}/_videos_info.json', 'r', encoding='utf-8') as f:
|
|
|
|
# videos_info = json.load(f)
|
2023-06-01 07:00:37 -07:00
|
|
|
|
|
|
|
tags = ['BiliBili', 'video']
|
|
|
|
for tag in bv_info['data']['Tags']:
|
|
|
|
tags.append(tag['tag_name'])
|
2023-06-01 20:53:20 -07:00
|
|
|
pubdate = bv_info['data']['View']['pubdate']
|
|
|
|
for page in bv_info['data']['View']['pages']:
|
|
|
|
if page['page'] == int(pid):
|
|
|
|
cid = page['cid']
|
|
|
|
part = page['part']
|
|
|
|
break
|
|
|
|
|
2023-06-01 07:00:37 -07:00
|
|
|
md = {
|
2023-06-02 01:18:28 -07:00
|
|
|
"mediatype": "movies",
|
2023-06-01 20:53:20 -07:00
|
|
|
"collection": 'opensource_movies',
|
|
|
|
"title": bv_info['data']['View']['title'] + f' P{pid} ' + part ,
|
2023-06-02 07:04:25 -07:00
|
|
|
"description": identifier + ' uploading...',
|
2023-06-01 20:53:20 -07:00
|
|
|
'creator': bv_info['data']['View']['owner']['name'], # UP 主
|
|
|
|
# UTC time
|
|
|
|
'date': time.strftime("%Y-%m-%d", time.gmtime(pubdate)),
|
|
|
|
'year': time.strftime("%Y", time.gmtime(pubdate)),
|
|
|
|
'aid': bv_info['data']['View']['aid'],
|
2023-06-03 04:30:10 -07:00
|
|
|
'bvid': bvid,
|
2023-06-01 20:53:20 -07:00
|
|
|
'cid': cid,
|
2023-06-03 04:30:10 -07:00
|
|
|
'mid': bv_info['data']['View']['owner']['mid'],
|
2023-06-01 07:00:37 -07:00
|
|
|
"subject": "; ".join(
|
|
|
|
tags
|
|
|
|
), # Keywords should be separated by ; but it doesn't matter much; the alternative is to set one per field with subject[0], subject[1], ...
|
|
|
|
"upload-state": "uploading",
|
2023-06-01 21:17:21 -07:00
|
|
|
'originalurl': f'https://www.bilibili.com/video/{bvid}/?p={pid}',
|
2023-06-03 04:30:47 -07:00
|
|
|
'scanner': 'biliarchiver v0.0.5 (dev)',
|
2023-06-01 07:00:37 -07:00
|
|
|
}
|
|
|
|
print(filedict)
|
|
|
|
print(md)
|
|
|
|
|
2023-06-02 13:41:08 -07:00
|
|
|
if filedict:
|
|
|
|
r = item.upload(
|
|
|
|
files=filedict,
|
|
|
|
metadata=md,
|
|
|
|
access_key=access_key,
|
|
|
|
secret_key=secret_key,
|
|
|
|
verbose=True,
|
|
|
|
queue_derive=True,
|
|
|
|
retries=5,
|
|
|
|
)
|
2023-06-01 07:00:37 -07:00
|
|
|
|
|
|
|
tries = 30
|
|
|
|
item = get_item(identifier) # refresh item
|
|
|
|
while not item.exists and tries > 0:
|
|
|
|
print(f"Waiting for item to be created ({tries}) ...", end='\r')
|
|
|
|
time.sleep(30)
|
|
|
|
item = get_item(identifier)
|
|
|
|
tries -= 1
|
|
|
|
|
|
|
|
new_md = {}
|
|
|
|
if item.metadata.get("upload-state") != "uploaded":
|
|
|
|
new_md.update({"upload-state": "uploaded"})
|
2023-06-02 07:01:23 -07:00
|
|
|
if item.metadata.get("description") != bv_info['data']['View']['desc']:
|
|
|
|
new_md.update({"description": bv_info['data']['View']['desc']})
|
2023-06-02 13:41:08 -07:00
|
|
|
if item.metadata.get("scanner") != md['scanner']:
|
|
|
|
new_md.update({"scanner": md['scanner']})
|
2023-06-01 07:00:37 -07:00
|
|
|
if new_md:
|
2023-06-02 07:04:25 -07:00
|
|
|
print(f"Updating metadata:")
|
|
|
|
print(new_md)
|
2023-06-01 07:00:37 -07:00
|
|
|
r = item.modify_metadata(
|
|
|
|
metadata=new_md,
|
|
|
|
access_key=access_key,
|
|
|
|
secret_key=secret_key,
|
|
|
|
)
|
|
|
|
r.raise_for_status()
|
|
|
|
with open(f'{videos_basepath}/{identifier}/_uploaded.mark', 'w', encoding='utf-8') as f:
|
|
|
|
f.write('')
|
2023-06-02 11:40:11 -07:00
|
|
|
print(f'==== {identifier} 上传完成 ====')
|
2023-06-01 07:00:37 -07:00
|
|
|
|
|
|
|
def read_ia_keys(keysfile):
|
|
|
|
''' Return: tuple(`access_key`, `secret_key`) '''
|
|
|
|
with open(keysfile, 'r', encoding='utf-8') as f:
|
|
|
|
key_lines = f.readlines()
|
|
|
|
|
|
|
|
access_key = key_lines[0].strip()
|
|
|
|
secret_key = key_lines[1].strip()
|
|
|
|
|
|
|
|
return access_key, secret_key
|