feat: select_best_tracker() before running

feat: read `ARCHIVIST` from env
dependencies: remove legacy `bson`
This commit is contained in:
yzqzss 2024-04-22 16:38:45 +08:00
parent a99ace395c
commit a43d8b6cbe
9 changed files with 54 additions and 58 deletions

4
.gitignore vendored
View File

@ -1,2 +1,4 @@
.vscode
__pycache__
__pycache__
*.conf
dist/

View File

@ -1 +1,5 @@
# huashijie_work
ARCHIVIST (节点名)读取优先级:
`ARCHIVIST` 环境变量 > `ARCHIVIST.conf` 配置文件

View File

@ -84,7 +84,12 @@ def process(tracker: Tracker, TASK: Task):
raise NotImplementedError(r.text)
def main():
print("zh: 为避免您的节点被 ban请不要使用相同 IP 多开同一个项目!")
print("en: To avoid being banned, please do not run concurrently on the same project with the same IP!")
archivist = get_archivist() or new_archivist()
time.sleep(1) # avoid infinite restart consume too much CPU
tracker = Tracker(project_id="huashijie_work", client_version=VERSION, archivist=archivist, session=session)
while True:

View File

@ -2,8 +2,6 @@ import os
def new_archivist():
print("zh: 未避免您的节点被 ban请不要在同一个 IP 下多开!")
print("en: To avoid being banned, please do not run concurrently on the same IP!")
print("zh: 第一次运行,请输入可以唯一标识您节点的字符串。(合法字符:字母、数字、-、_")
print("en: First run, please input a string that can uniquely identify your node. (Legal characters: letters, numbers, -, _)")
with open("ARCHIVIST.conf", "w") as f:
@ -11,9 +9,9 @@ def new_archivist():
return get_archivist()
def get_archivist():
if not os.path.exists("ARCHIVIST.conf"):
return ""
print("zh: 未避免您的节点被 ban请不要在同一个 IP 下多开!")
print("en: To avoid being banned, please do not run concurrently on the same IP!")
with open("ARCHIVIST.conf", "r") as f:
return f.read().splitlines()[0].strip()
if arch := os.getenv("ARCHIVIST", ""):
return arch
if os.path.exists("ARCHIVIST.conf"):
with open("ARCHIVIST.conf", "r") as f:
return f.read().splitlines()[0].strip()
return ""

View File

@ -2,8 +2,6 @@ from typing import Optional
from dataclasses import dataclass
from datetime import datetime
from bson import ObjectId
class Status:
TODO = "TODO"
PROCESSING = "PROCESSING"
@ -16,7 +14,8 @@ class Status:
@dataclass
class Task:
_id: ObjectId
_id: str
""" ObjectID """
id: int
status: Status
archivist: str

View File

@ -48,7 +48,7 @@ TRACKER_NODES = [
"https://0.tracker.saveweb.org/",
"https://1.tracker.saveweb.org/",
"https://2.tracker.saveweb.org/",
"https://3.tracker.saveweb.org/",
"http://3.tracker.saveweb.org/",
]
TEST_TRACKER_NODES = [
@ -56,8 +56,7 @@ TEST_TRACKER_NODES = [
]
class Tracker:
API_BASE = TRACKER_NODES[0]
# API_BASE = TEST_TRACKER_NODES[0]
API_BASE: str = TEST_TRACKER_NODES[0]
API_VERSION = "v1"
client_version: str
project_id: str
@ -82,7 +81,9 @@ class Tracker:
self.archivist = archivist
self.session = session
assert self.project.client.version == self.client_version, "client_version mismatch"
self.select_best_tracker()
assert self.project.client.version == self.client_version, "client_version mismatch, please upgrade your client."
print(f"[tracker] Hello, {self.archivist}!")
print(f"[tracker] Project: {self.project}")
@ -97,6 +98,32 @@ class Tracker:
self.__project = self.fetch_project()
self.__project_last_fetched = time.time()
return copy.deepcopy(self.__project)
def select_best_tracker(self):
result = [] # [(node, ping)]
print("[client->trackers] select_best_tracker...")
for node in TRACKER_NODES:
try:
self.session.get(node + 'ping', timeout=0.05) # DNS preload, dirty hack
except Exception:
pass
for node in TRACKER_NODES:
print(f"[client->tracker({node})] ping...")
start = time.time()
try:
r = self.session.get(node + 'ping', timeout=5)
r.raise_for_status()
print(f"[client<-tracker({node})] ping OK. {time.time() - start:.2f}s")
result.append((node, time.time() - start))
except Exception as e:
print(f"[client->tracker({node}) ping failed. {type(e)}")
result.append((node, float('inf')))
result.sort(key=lambda x: x[1])
self.API_BASE = result[0][0]
print("===============================")
print(f"tracker selected: {self.API_BASE}")
print("===============================")
def fetch_project(self):
"""
@ -131,6 +158,7 @@ class Tracker:
time.sleep(sleep_need)
elif sleep_need < 0:
print(f"[tracker] you are {sleep_need:.2f}s late, Qos: {self.project.client.claim_task_delay}")
print("[client->tracker] claim_task")
start = time.time()
r = self.session.post(f'{self.API_BASE}{self.API_VERSION}/project/{self.project_id}/{self.client_version}/{self.archivist}/claim_task')
@ -142,7 +170,7 @@ class Tracker:
r_json = r.json()
print(f'[client<-tracker] claim_task. OK (time cost: {time.time() - start:.2f}s):', r_json)
return r_json
raise Exception(r.text)
raise Exception(r.status_code, r.text)
def update_task(self, task_id: Union[str, int], status: str)->Dict[str, Any]:
"""

41
poetry.lock generated
View File

@ -1,19 +1,5 @@
# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.
[[package]]
name = "bson"
version = "0.5.10"
description = "BSON codec for Python"
optional = false
python-versions = "*"
files = [
{file = "bson-0.5.10.tar.gz", hash = "sha256:d6511b2ab051139a9123c184de1a04227262173ad593429d21e443d6462d6590"},
]
[package.dependencies]
python-dateutil = ">=2.4.0"
six = ">=1.9.0"
[[package]]
name = "certifi"
version = "2024.2.2"
@ -135,20 +121,6 @@ files = [
{file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"},
]
[[package]]
name = "python-dateutil"
version = "2.9.0.post0"
description = "Extensions to the standard Python datetime module"
optional = false
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
files = [
{file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"},
{file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"},
]
[package.dependencies]
six = ">=1.5"
[[package]]
name = "requests"
version = "2.31.0"
@ -170,17 +142,6 @@ urllib3 = ">=1.21.1,<3"
socks = ["PySocks (>=1.5.6,!=1.5.7)"]
use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
[[package]]
name = "six"
version = "1.16.0"
description = "Python 2 and 3 compatibility utilities"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
files = [
{file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"},
{file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"},
]
[[package]]
name = "urllib3"
version = "2.2.1"
@ -201,4 +162,4 @@ zstd = ["zstandard (>=0.18.0)"]
[metadata]
lock-version = "2.0"
python-versions = "^3.9"
content-hash = "17d70c3e5f4343b38290e2cdd06c4ba5f548093a297eb095bc2502adbd3919ff"
content-hash = "ee7fed81cef220efc922f65299b2c6240968697a8ab9aad99eb924d83f71d2d5"

View File

@ -8,7 +8,6 @@ readme = "README.md"
[tool.poetry.dependencies]
python = "^3.9"
requests = "^2.31.0"
bson = "^0.5.10"
[tool.poetry.scripts]
huashijie_work = "huashijie.huashijie_work:main"