yzqzss
2848efdcc8
All checks were successful
Gitea Go Release Actions / Release Go Binary (arm64, darwin) (push) Successful in 47s
Gitea Go Release Actions / Release Go Binary (amd64, darwin) (push) Successful in 1m17s
Gitea Go Release Actions / Release Go Binary (amd64, linux) (push) Successful in 1m36s
Gitea Go Release Actions / Release Go Binary (amd64, windows) (push) Successful in 1m36s
Gitea Go Release Actions / Release Go Binary (arm64, linux) (push) Successful in 44s
Gitea Go Release Actions / Release Go Binary (arm, linux) (push) Successful in 1m0s
218 lines
5.9 KiB
Go
218 lines
5.9 KiB
Go
package main
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"log"
|
|
"net/http"
|
|
"os"
|
|
"os/signal"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"strconv"
|
|
|
|
cnblogs_api "git.saveweb.org/saveweb/cnblogs/pkg"
|
|
savewebtracker "git.saveweb.org/saveweb/saveweb_tracker/src/saveweb_tracker"
|
|
"github.com/hashicorp/go-retryablehttp"
|
|
)
|
|
|
|
var BASE_CONCURRENCY = 10
|
|
var WITH_DELAY = true
|
|
|
|
var tasks_chan = make(chan savewebtracker.Task, BASE_CONCURRENCY)
|
|
var Interrupted = false
|
|
var WaitClaimWorker sync.WaitGroup
|
|
var WaitProcesserWorker sync.WaitGroup
|
|
|
|
var project_id = "cnblogs_rss_detect"
|
|
|
|
var Logger *log.Logger
|
|
var DEBUG = false
|
|
|
|
func init() {
|
|
if os.Getenv("BASE_CONCURRENCY") != "" {
|
|
fmt.Println("BASE_CONCURRENCY:", os.Getenv("BASE_CONCURRENCY"))
|
|
BASE_CONCURRENCY, _ = strconv.Atoi(os.Getenv("BASE_CONCURRENCY"))
|
|
}
|
|
if os.Getenv("NO_WITH_DELAY") != "" {
|
|
fmt.Println("NO_WITH_DELAY:", os.Getenv("NO_WITH_DELAY"))
|
|
WITH_DELAY = false
|
|
}
|
|
if os.Getenv("DEBUG") != "" {
|
|
DEBUG = true
|
|
}
|
|
Logger = log.New(os.Stdout, "["+project_id+"] ", log.Ldate|log.Ltime|log.Lmsgprefix)
|
|
}
|
|
|
|
var custom_delay_lock = sync.Mutex{}
|
|
|
|
// ClaimTask 并把任务放入 task_chan
|
|
func claimWorker(i int, tracker *savewebtracker.Tracker) {
|
|
Logger.Println("[START] ClaimWorker", i)
|
|
defer Logger.Println("[STOP] ClaimWorker", i, " exited...")
|
|
defer WaitClaimWorker.Done()
|
|
for {
|
|
if Interrupted {
|
|
return
|
|
}
|
|
if os.Getenv("CUSTOM_DELAY") != "" {
|
|
custom_delay_lock.Lock()
|
|
// xxxms
|
|
custom_delay, _ := time.ParseDuration(os.Getenv("CUSTOM_DELAY"))
|
|
Logger.Println("Custom delay:", custom_delay, "...")
|
|
time.Sleep(custom_delay)
|
|
custom_delay_lock.Unlock()
|
|
|
|
WITH_DELAY = false
|
|
}
|
|
task := tracker.ClaimTask(WITH_DELAY)
|
|
if task == nil {
|
|
notask_sleep := max(
|
|
time.Duration(tracker.Project().Client.ClaimTaskDelay)*10*time.Second,
|
|
time.Duration(10)*time.Second,
|
|
)
|
|
Logger.Println("No task to claim, sleep", notask_sleep)
|
|
time.Sleep(notask_sleep)
|
|
continue
|
|
}
|
|
Logger.Println("Claimed task", task.Id)
|
|
tasks_chan <- *task
|
|
}
|
|
}
|
|
|
|
func ProcesserWorker(i int, tracker *savewebtracker.Tracker) {
|
|
Logger.Println("[START] ProcesserWorker", i)
|
|
defer Logger.Println("[STOP] ProcesserWorker", i, " exited...")
|
|
defer WaitProcesserWorker.Done()
|
|
for task := range tasks_chan {
|
|
Logger.Println("Processing task", task.Id)
|
|
|
|
// 在这儿处理任务
|
|
headers, r_status, ContentLength := cnblogs_api.GetRSSHeadHeaders(*tracker.HTTP_client, task.Id)
|
|
|
|
// HTTP/2 200
|
|
// date: Wed, 17 Jul 2024 06:23:58 GMT
|
|
// content-type: application/rss+xml // 或 application/xml
|
|
// content-length: 27623
|
|
// vary: Accept-Encoding
|
|
// set-cookie: ...
|
|
// last-modified: Wed, 17 Jul 2024 06:23:58 GMT
|
|
|
|
// HTTP/2 500
|
|
// date: Wed, 17 Jul 2024 06:23:20 GMT
|
|
// content-length: 0
|
|
// set-cookie: ....
|
|
|
|
var payload map[string]interface{}
|
|
|
|
var to_status savewebtracker.Status
|
|
|
|
if r_status == 200 {
|
|
if !(strings.Contains(headers.Get("Content-Type"), "application/xml") || strings.Contains(headers.Get("Content-Type"), "application/rss")) {
|
|
Logger.Panicln(task.Id, "unexpected Content-Type: ", headers.Get("Content-Type"))
|
|
}
|
|
if ContentLength == -1 || ContentLength == 0 {
|
|
// panic("unexpected content-length: " + fmt.Sprintf("%d", ContentLength))
|
|
Logger.Panicln(task.Id, "unexpected content-length: ", fmt.Sprintf("%d", ContentLength))
|
|
}
|
|
payload = map[string]interface{}{
|
|
"content-length": ContentLength,
|
|
}
|
|
to_status = savewebtracker.StatusDONE
|
|
} else if r_status == 500 {
|
|
Logger.Println(task.Id, "empty content")
|
|
to_status = savewebtracker.StatusEMPTY
|
|
} else {
|
|
Logger.Panicln(task.Id, "unexpected status code: ", r_status)
|
|
}
|
|
|
|
payload_str, err := json.Marshal(payload)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
|
|
Logger.Println("Inserting item", task.Id, r_status, string(payload_str))
|
|
tracker.InsertItem(task, fmt.Sprintf("%d", r_status), "int", string(payload_str))
|
|
Logger.Println("Inserted item", task.Id, to_status)
|
|
tracker.UpdateTask(task.Id, task.Id_type, to_status)
|
|
Logger.Println("Updated task", task.Id)
|
|
}
|
|
}
|
|
|
|
func InterruptHandler() {
|
|
fmt.Println("Press Ctrl+C to exit")
|
|
interrupt_c := make(chan os.Signal, 1)
|
|
signal.Notify(interrupt_c, os.Interrupt)
|
|
for {
|
|
s := <-interrupt_c
|
|
Logger.Println("Interrupted by", s, "signal (Press Ctrl+C again to force exit)")
|
|
if Interrupted {
|
|
Logger.Println("Force exit")
|
|
os.Exit(1)
|
|
return
|
|
}
|
|
Interrupted = true
|
|
}
|
|
}
|
|
|
|
func GetRetryableHttpClient(timeout time.Duration, debug bool) *http.Client {
|
|
retryClient := retryablehttp.NewClient()
|
|
retryClient.RetryMax = 3
|
|
retryClient.RetryWaitMin = 1 * time.Second
|
|
retryClient.RetryWaitMax = 10 * time.Second
|
|
retryClient.HTTPClient.Timeout = timeout
|
|
if !debug {
|
|
retryClient.Logger = nil
|
|
}
|
|
standardClient := retryClient.StandardClient() // *http.Client
|
|
Logger.Println("standardClient.Timeout:", standardClient.Timeout)
|
|
return standardClient
|
|
}
|
|
|
|
func ShowStatus(t *savewebtracker.Tracker) {
|
|
for {
|
|
project_json, err := json.Marshal(t.Project())
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
Logger.Println("Project:", string(project_json))
|
|
time.Sleep(60 * time.Second)
|
|
}
|
|
}
|
|
|
|
func main() {
|
|
tracker := savewebtracker.GetTracker(project_id, "0.1", savewebtracker.Archivist())
|
|
tracker.PING_client = GetRetryableHttpClient(10*time.Second, DEBUG)
|
|
// tracker.HTTP_client = GetRetryableHttpClient(10*time.Second, DEBUG)
|
|
tracker.SelectBestTracker().StartSelectTrackerBackground().StartFetchProjectBackground()
|
|
|
|
go InterruptHandler()
|
|
go ShowStatus(tracker)
|
|
|
|
cnblogs_api.EnsureConnection(*tracker.HTTP_client)
|
|
|
|
Logger.Println("-- Start --")
|
|
|
|
for i := 0; i < BASE_CONCURRENCY; i++ {
|
|
go claimWorker(i, tracker)
|
|
WaitClaimWorker.Add(1)
|
|
go ProcesserWorker(i, tracker)
|
|
WaitProcesserWorker.Add(1)
|
|
}
|
|
|
|
// wait for all claimWorker to finish
|
|
WaitClaimWorker.Wait()
|
|
Logger.Println("[STOP] All claimWorker done")
|
|
// close task_chan
|
|
close(tasks_chan)
|
|
Logger.Println("[STOP] task_chan closed")
|
|
// wait for all task_chan to finish
|
|
WaitProcesserWorker.Wait()
|
|
Logger.Println("[STOP] All ProcesserWorker done")
|
|
|
|
Logger.Println("-- All done --")
|
|
|
|
}
|