bili_get_bvids: 添加 "入站必刷"、"每周必看"

This commit is contained in:
yzqzss 2023-07-22 03:21:40 +08:00
parent 4fcd91f384
commit f7d07082ae

View File

@ -14,17 +14,31 @@ from rich import print
def arg_parse():
parser = argparse.ArgumentParser()
# 为啥是 by-xxx 而不是 from-xxx ?因为命令行里好敲……
ranking_group = parser.add_argument_group()
ranking_group.title = 'by ranking'
ranking_group.description = '排行榜(全站榜,非个性推荐榜)'
ranking_group.add_argument('--by-ranking', action='store_true', help='从排行榜获取 bvids')
ranking_group.add_argument('--ranking-rid', type=int, default=0, help='目标排行 rid0 为全站排行榜 [default: 0]')
ranking_group.add_argument('--ranking-rid', type=int, default=0, help='目标排行 rid0 为全站排行榜。rid 等于分区的 tid [default: 0]')
up_videos_group = parser.add_argument_group()
up_videos_group.title = 'by up videos'
up_videos_group.description = 'up 主用户页投稿'
up_videos_group.add_argument('--by-up_videos', action='store_true', help='从 up 主用户页获取全部的投稿的 bvids')
up_videos_group.add_argument('--up_videos-mid', type=str, help='目标 up 主的 mid (也可以是用户页的 URL)')
popular_precious_group = parser.add_argument_group()
popular_precious_group.title = 'popular precious'
popular_precious_group.description = '入站必刷,更新频率低'
popular_precious_group.add_argument('--by-popular_precious', action='store_true', help='从入站必刷获取 bvids', dest='by_popular_precious')
popular_series_group = parser.add_argument_group()
popular_series_group.title = 'popular series'
popular_series_group.description = '每周必看每周五晚18:00更新'
popular_series_group.add_argument('--by-popular_series', action='store_true', help='从每周必看获取 bvids', dest='by_popular_series')
popular_series_group.add_argument('--popular_series-number', type=int, default=1, help='获取第几期(周) [default: 1]')
popular_series_group.add_argument('--all-popular_series', action='store_true', help='自动获取全部的每周必看(增量)', dest='all_popular_series')
args = parser.parse_args()
return args
@ -79,9 +93,10 @@ async def by_up_videos(url_or_mid: str) -> Path:
keyword = '' # 搜索关键词
bv_ids = []
pn = 1
print(f'获取第 {pn} 页...')
up_name, total_size, bv_ids_page = await api.get_up_info(client, mid, pn, ps, order, keyword)
bv_ids += bv_ids_page
print(f'{mid} {up_name}{total_size} 个视频... (第 1 页)')
print(f'{mid} {up_name}{total_size} 个视频. (如果最新的视频为合作视频的非主作者UP 名可能会识别错误,但不影响获取 bvid 列表)')
while pn < total_size / ps:
pn += 1
print(f'获取第 {pn} 页 (10s...)')
@ -102,6 +117,62 @@ async def by_up_videos(url_or_mid: str) -> Path:
print(f'已保存 {len(bv_ids)} 个 bvid 到 {abs_filepath}')
return Path(abs_filepath)
def by_popular_precious():
API_URL = "https://api.bilibili.com/x/web-interface/popular/precious"
r = requests.get(API_URL)
r.raise_for_status()
popular_precious_json = json.loads(r.text)
assert popular_precious_json['code'] == 0
bvids = []
for video_info in popular_precious_json['data']['list']:
bvid = video_info['bvid']
bvids.append(bvid)
filepath = f'bvids/by-popular_precious/{int(time.time())}.txt'
os.makedirs(os.path.dirname(filepath), exist_ok=True)
abs_filepath = os.path.abspath(filepath)
with open(abs_filepath, 'w', encoding='utf-8') as f:
f.write('\n'.join(bvids))
print(f'已保存 {len(bvids)} 个 bvid 到 {abs_filepath}')
def by_popular_series_one(number: int):
API_URL = "https://api.bilibili.com/x/web-interface/popular/series/one"
params = {
"number": number
}
r = requests.get(API_URL, params=params)
r.raise_for_status()
popular_series_json = json.loads(r.text)
assert popular_series_json['code'] == 0
bvids = []
for video_info in popular_series_json['data']['list']:
bvid = video_info['bvid']
bvids.append(bvid)
filepath = f'bvids/by-popular_series/s{number}-{int(time.time())}.txt'
os.makedirs(os.path.dirname(filepath), exist_ok=True)
abs_filepath = os.path.abspath(filepath)
with open(abs_filepath, 'w', encoding='utf-8') as f:
f.write('\n'.join(bvids))
print(f'已保存 {len(bvids)} 个 bvid 到 {abs_filepath}')
def not_got_popular_series() -> list[int]:
API_URL = "http://api.bilibili.com/x/web-interface/popular/series/list"
got_series = []
os.makedirs('bvids/by-popular_series', exist_ok=True)
for filename in os.listdir('bvids/by-popular_series'):
if filename.endswith('.txt'):
got_series.append(int(filename.split('-')[0][1:])) # s{number}-{int(time.time())}.txt
r = requests.get(API_URL)
r.raise_for_status()
popular_series_json = json.loads(r.text)
assert popular_series_json['code'] == 0
max_series_number = popular_series_json['data']['list'][0]['number']
series_not_got = []
for i in range(1, max_series_number + 1):
if i not in got_series:
series_not_got.append(i)
return series_not_got
async def _main():
args = arg_parse()
@ -110,6 +181,16 @@ async def _main():
if args.by_up_videos:
assert args.up_videos_mid is not None, 'up_videos_mid 不能为空'
await by_up_videos(args.up_videos_mid)
if args.by_popular_precious:
by_popular_precious()
if args.by_popular_series:
if args.all_popular_series:
for number in not_got_popular_series():
time.sleep(3)
by_popular_series_one(number)
else:
by_popular_series_one(args.popular_series_number)
def main():
asyncio.run(_main())