diff --git a/.gitignore b/.gitignore index 4edd750..4638a1d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ .vscode -__pycache__ \ No newline at end of file +__pycache__ +*.conf +dist/ \ No newline at end of file diff --git a/README.md b/README.md index e838f73..2c98bc8 100644 --- a/README.md +++ b/README.md @@ -1 +1,5 @@ # huashijie_work + +ARCHIVIST (节点名)读取优先级: + +`ARCHIVIST` 环境变量 > `ARCHIVIST.conf` 配置文件 diff --git a/huashijie/hsj_api.py b/hsj_api.py similarity index 100% rename from huashijie/hsj_api.py rename to hsj_api.py diff --git a/huashijie/huashijie_work.py b/huashijie/huashijie_work.py index 7ce07df..30414c2 100644 --- a/huashijie/huashijie_work.py +++ b/huashijie/huashijie_work.py @@ -84,7 +84,12 @@ def process(tracker: Tracker, TASK: Task): raise NotImplementedError(r.text) def main(): + print("zh: 为避免您的节点被 ban,请不要使用相同 IP 多开同一个项目!") + print("en: To avoid being banned, please do not run concurrently on the same project with the same IP!") + archivist = get_archivist() or new_archivist() + + time.sleep(1) # avoid infinite restart consume too much CPU tracker = Tracker(project_id="huashijie_work", client_version=VERSION, archivist=archivist, session=session) while True: diff --git a/huashijie/util/archivist.py b/huashijie/util/archivist.py index 7bfbcfb..2f15734 100644 --- a/huashijie/util/archivist.py +++ b/huashijie/util/archivist.py @@ -2,8 +2,6 @@ import os def new_archivist(): - print("zh: 未避免您的节点被 ban,请不要在同一个 IP 下多开!") - print("en: To avoid being banned, please do not run concurrently on the same IP!") print("zh: 第一次运行,请输入可以唯一标识您节点的字符串。(合法字符:字母、数字、-、_)") print("en: First run, please input a string that can uniquely identify your node. (Legal characters: letters, numbers, -, _)") with open("ARCHIVIST.conf", "w") as f: @@ -11,9 +9,9 @@ def new_archivist(): return get_archivist() def get_archivist(): - if not os.path.exists("ARCHIVIST.conf"): - return "" - print("zh: 未避免您的节点被 ban,请不要在同一个 IP 下多开!") - print("en: To avoid being banned, please do not run concurrently on the same IP!") - with open("ARCHIVIST.conf", "r") as f: - return f.read().splitlines()[0].strip() \ No newline at end of file + if arch := os.getenv("ARCHIVIST", ""): + return arch + if os.path.exists("ARCHIVIST.conf"): + with open("ARCHIVIST.conf", "r") as f: + return f.read().splitlines()[0].strip() + return "" \ No newline at end of file diff --git a/huashijie/util/task.py b/huashijie/util/task.py index 88e1015..bd9cbad 100644 --- a/huashijie/util/task.py +++ b/huashijie/util/task.py @@ -2,8 +2,6 @@ from typing import Optional from dataclasses import dataclass from datetime import datetime -from bson import ObjectId - class Status: TODO = "TODO" PROCESSING = "PROCESSING" @@ -16,7 +14,8 @@ class Status: @dataclass class Task: - _id: ObjectId + _id: str + """ ObjectID """ id: int status: Status archivist: str diff --git a/huashijie/util/tracker.py b/huashijie/util/tracker.py index 3ad0993..f1d9b46 100644 --- a/huashijie/util/tracker.py +++ b/huashijie/util/tracker.py @@ -48,7 +48,7 @@ TRACKER_NODES = [ "https://0.tracker.saveweb.org/", "https://1.tracker.saveweb.org/", "https://2.tracker.saveweb.org/", - "https://3.tracker.saveweb.org/", + "http://3.tracker.saveweb.org/", ] TEST_TRACKER_NODES = [ @@ -56,8 +56,7 @@ TEST_TRACKER_NODES = [ ] class Tracker: - API_BASE = TRACKER_NODES[0] - # API_BASE = TEST_TRACKER_NODES[0] + API_BASE: str = TEST_TRACKER_NODES[0] API_VERSION = "v1" client_version: str project_id: str @@ -82,7 +81,9 @@ class Tracker: self.archivist = archivist self.session = session - assert self.project.client.version == self.client_version, "client_version mismatch" + self.select_best_tracker() + + assert self.project.client.version == self.client_version, "client_version mismatch, please upgrade your client." print(f"[tracker] Hello, {self.archivist}!") print(f"[tracker] Project: {self.project}") @@ -97,6 +98,32 @@ class Tracker: self.__project = self.fetch_project() self.__project_last_fetched = time.time() return copy.deepcopy(self.__project) + + def select_best_tracker(self): + result = [] # [(node, ping)] + print("[client->trackers] select_best_tracker...") + for node in TRACKER_NODES: + try: + self.session.get(node + 'ping', timeout=0.05) # DNS preload, dirty hack + except Exception: + pass + + for node in TRACKER_NODES: + print(f"[client->tracker({node})] ping...") + start = time.time() + try: + r = self.session.get(node + 'ping', timeout=5) + r.raise_for_status() + print(f"[client<-tracker({node})] ping OK. {time.time() - start:.2f}s") + result.append((node, time.time() - start)) + except Exception as e: + print(f"[client->tracker({node}) ping failed. {type(e)}") + result.append((node, float('inf'))) + result.sort(key=lambda x: x[1]) + self.API_BASE = result[0][0] + print("===============================") + print(f"tracker selected: {self.API_BASE}") + print("===============================") def fetch_project(self): """ @@ -131,6 +158,7 @@ class Tracker: time.sleep(sleep_need) elif sleep_need < 0: print(f"[tracker] you are {sleep_need:.2f}s late, Qos: {self.project.client.claim_task_delay}") + print("[client->tracker] claim_task") start = time.time() r = self.session.post(f'{self.API_BASE}{self.API_VERSION}/project/{self.project_id}/{self.client_version}/{self.archivist}/claim_task') @@ -142,7 +170,7 @@ class Tracker: r_json = r.json() print(f'[client<-tracker] claim_task. OK (time cost: {time.time() - start:.2f}s):', r_json) return r_json - raise Exception(r.text) + raise Exception(r.status_code, r.text) def update_task(self, task_id: Union[str, int], status: str)->Dict[str, Any]: """ diff --git a/poetry.lock b/poetry.lock index cd9c100..7652cf8 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,19 +1,5 @@ # This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. -[[package]] -name = "bson" -version = "0.5.10" -description = "BSON codec for Python" -optional = false -python-versions = "*" -files = [ - {file = "bson-0.5.10.tar.gz", hash = "sha256:d6511b2ab051139a9123c184de1a04227262173ad593429d21e443d6462d6590"}, -] - -[package.dependencies] -python-dateutil = ">=2.4.0" -six = ">=1.9.0" - [[package]] name = "certifi" version = "2024.2.2" @@ -135,20 +121,6 @@ files = [ {file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"}, ] -[[package]] -name = "python-dateutil" -version = "2.9.0.post0" -description = "Extensions to the standard Python datetime module" -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" -files = [ - {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"}, - {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"}, -] - -[package.dependencies] -six = ">=1.5" - [[package]] name = "requests" version = "2.31.0" @@ -170,17 +142,6 @@ urllib3 = ">=1.21.1,<3" socks = ["PySocks (>=1.5.6,!=1.5.7)"] use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] -[[package]] -name = "six" -version = "1.16.0" -description = "Python 2 and 3 compatibility utilities" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" -files = [ - {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, - {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, -] - [[package]] name = "urllib3" version = "2.2.1" @@ -201,4 +162,4 @@ zstd = ["zstandard (>=0.18.0)"] [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "17d70c3e5f4343b38290e2cdd06c4ba5f548093a297eb095bc2502adbd3919ff" +content-hash = "ee7fed81cef220efc922f65299b2c6240968697a8ab9aad99eb924d83f71d2d5" diff --git a/pyproject.toml b/pyproject.toml index 078001d..46df2c5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,6 @@ readme = "README.md" [tool.poetry.dependencies] python = "^3.9" requests = "^2.31.0" -bson = "^0.5.10" [tool.poetry.scripts] huashijie_work = "huashijie.huashijie_work:main"