init
All checks were successful
Gitea Go Release Actions / Release Go Binary (amd64, windows) (push) Successful in 52s
Gitea Go Release Actions / Release Go Binary (amd64, darwin) (push) Successful in 1m15s
Gitea Go Release Actions / Release Go Binary (arm, linux) (push) Successful in 1m17s
Gitea Go Release Actions / Release Go Binary (amd64, linux) (push) Successful in 1m26s
Gitea Go Release Actions / Release Go Binary (arm64, darwin) (push) Successful in 39s
Gitea Go Release Actions / Release Go Binary (arm64, linux) (push) Successful in 1m13s
All checks were successful
Gitea Go Release Actions / Release Go Binary (amd64, windows) (push) Successful in 52s
Gitea Go Release Actions / Release Go Binary (amd64, darwin) (push) Successful in 1m15s
Gitea Go Release Actions / Release Go Binary (arm, linux) (push) Successful in 1m17s
Gitea Go Release Actions / Release Go Binary (amd64, linux) (push) Successful in 1m26s
Gitea Go Release Actions / Release Go Binary (arm64, darwin) (push) Successful in 39s
Gitea Go Release Actions / Release Go Binary (arm64, linux) (push) Successful in 1m13s
This commit is contained in:
parent
a55b676b7d
commit
592fa2dea5
56
.gitea/workflows/release.yml
Normal file
56
.gitea/workflows/release.yml
Normal file
@ -0,0 +1,56 @@
|
||||
name: Gitea Go Release Actions
|
||||
run-name: ${{ gitea.actor }} go🚀
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- '*'
|
||||
|
||||
jobs:
|
||||
releases-matrix:
|
||||
name: Release Go Binary
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
goos: [linux, windows, darwin]
|
||||
goarch: [amd64, arm64]
|
||||
include:
|
||||
- goarch: arm
|
||||
goos: linux
|
||||
exclude:
|
||||
- goarch: arm64
|
||||
goos: windows
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: wangyoucao577/go-release-action@v1
|
||||
id: go-release-action
|
||||
with:
|
||||
github_token: ${{ secrets.GITHUB_TOKEN }}
|
||||
goos: ${{ matrix.goos }}
|
||||
goarch: ${{ matrix.goarch }}
|
||||
multi_binaries: true
|
||||
project_path: ./cmd/...
|
||||
upload: false
|
||||
# output is release_asset_dir
|
||||
- name: rename artifact
|
||||
# append os and arch to the artifact name (handle windows .exe)
|
||||
run: |
|
||||
cd ${{ steps.go-release-action.outputs.release_asset_dir }}
|
||||
for f in *; do
|
||||
if [[ $f == *.exe ]]; then
|
||||
noextname=$(basename "$f" .exe)
|
||||
mv "$f" "${noextname}-${{ matrix.goos }}-${{ matrix.goarch }}.exe"
|
||||
else
|
||||
mv "$f" "${f}-${{ matrix.goos }}-${{ matrix.goarch }}"
|
||||
fi
|
||||
done
|
||||
cd -
|
||||
- name: Compress binaries
|
||||
continue-on-error: true
|
||||
uses: svenstaro/upx-action@v2
|
||||
with:
|
||||
files: |
|
||||
${{ steps.go-release-action.outputs.release_asset_dir }}/**
|
||||
- uses: akkuman/gitea-release-action@v1
|
||||
with:
|
||||
files: |-
|
||||
${{ steps.go-release-action.outputs.release_asset_dir }}/**
|
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
*.conf
|
||||
/cnblogs_rss_detect
|
217
cmd/cnblogs_rss_detect/cnblogs_rss_detect.go
Normal file
217
cmd/cnblogs_rss_detect/cnblogs_rss_detect.go
Normal file
@ -0,0 +1,217 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/signal"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"strconv"
|
||||
|
||||
cnblogs_api "git.saveweb.org/saveweb/cnblogs/pkg"
|
||||
savewebtracker "git.saveweb.org/saveweb/saveweb_tracker/src/saveweb_tracker"
|
||||
"github.com/hashicorp/go-retryablehttp"
|
||||
)
|
||||
|
||||
var BASE_CONCURRENCY = 10
|
||||
var WITH_DELAY = true
|
||||
|
||||
var tasks_chan = make(chan savewebtracker.Task, BASE_CONCURRENCY)
|
||||
var Interrupted = false
|
||||
var WaitClaimWorker sync.WaitGroup
|
||||
var WaitProcesserWorker sync.WaitGroup
|
||||
|
||||
var project_id = "cnblogs_rss_detect"
|
||||
|
||||
var Logger *log.Logger
|
||||
var DEBUG = false
|
||||
|
||||
func init() {
|
||||
if os.Getenv("BASE_CONCURRENCY") != "" {
|
||||
fmt.Println("BASE_CONCURRENCY:", os.Getenv("BASE_CONCURRENCY"))
|
||||
BASE_CONCURRENCY, _ = strconv.Atoi(os.Getenv("BASE_CONCURRENCY"))
|
||||
}
|
||||
if os.Getenv("NO_WITH_DELAY") != "" {
|
||||
fmt.Println("NO_WITH_DELAY:", os.Getenv("NO_WITH_DELAY"))
|
||||
WITH_DELAY = false
|
||||
}
|
||||
if os.Getenv("DEBUG") != "" {
|
||||
DEBUG = true
|
||||
}
|
||||
Logger = log.New(os.Stdout, "["+project_id+"] ", log.Ldate|log.Ltime|log.Lmsgprefix)
|
||||
}
|
||||
|
||||
var custom_delay_lock = sync.Mutex{}
|
||||
|
||||
// ClaimTask 并把任务放入 task_chan
|
||||
func claimWorker(i int, tracker *savewebtracker.Tracker) {
|
||||
Logger.Println("[START] ClaimWorker", i)
|
||||
defer Logger.Println("[STOP] ClaimWorker", i, " exited...")
|
||||
defer WaitClaimWorker.Done()
|
||||
for {
|
||||
if Interrupted {
|
||||
return
|
||||
}
|
||||
if os.Getenv("CUSTOM_DELAY") != "" {
|
||||
custom_delay_lock.Lock()
|
||||
// xxxms
|
||||
custom_delay, _ := time.ParseDuration(os.Getenv("CUSTOM_DELAY"))
|
||||
Logger.Println("Custom delay:", custom_delay, "...")
|
||||
time.Sleep(custom_delay)
|
||||
custom_delay_lock.Unlock()
|
||||
|
||||
WITH_DELAY = false
|
||||
}
|
||||
task := tracker.ClaimTask(WITH_DELAY)
|
||||
if task == nil {
|
||||
notask_sleep := max(
|
||||
time.Duration(tracker.Project().Client.ClaimTaskDelay)*10*time.Second,
|
||||
time.Duration(10)*time.Second,
|
||||
)
|
||||
Logger.Println("No task to claim, sleep", notask_sleep)
|
||||
time.Sleep(notask_sleep)
|
||||
continue
|
||||
}
|
||||
Logger.Println("Claimed task", task.Id)
|
||||
tasks_chan <- *task
|
||||
}
|
||||
}
|
||||
|
||||
func ProcesserWorker(i int, tracker *savewebtracker.Tracker) {
|
||||
Logger.Println("[START] ProcesserWorker", i)
|
||||
defer Logger.Println("[STOP] ProcesserWorker", i, " exited...")
|
||||
defer WaitProcesserWorker.Done()
|
||||
for task := range tasks_chan {
|
||||
Logger.Println("Processing task", task.Id)
|
||||
|
||||
// 在这儿处理任务
|
||||
headers, r_status, ContentLength := cnblogs_api.GetRSSHeadHeaders(*tracker.HTTP_client, task.Id)
|
||||
|
||||
// HTTP/2 200
|
||||
// date: Wed, 17 Jul 2024 06:23:58 GMT
|
||||
// content-type: application/rss+xml // 或 application/xml
|
||||
// content-length: 27623
|
||||
// vary: Accept-Encoding
|
||||
// set-cookie: ...
|
||||
// last-modified: Wed, 17 Jul 2024 06:23:58 GMT
|
||||
|
||||
// HTTP/2 500
|
||||
// date: Wed, 17 Jul 2024 06:23:20 GMT
|
||||
// content-length: 0
|
||||
// set-cookie: ....
|
||||
|
||||
var payload map[string]interface{}
|
||||
|
||||
var to_status savewebtracker.Status
|
||||
|
||||
if r_status == 200 {
|
||||
if !(strings.Contains(headers.Get("Content-Type"), "application/xml") || strings.Contains(headers.Get("Content-Type"), "application/rss")) {
|
||||
Logger.Panicln(task.Id, "unexpected Content-Type: ", headers.Get("Content-Type"))
|
||||
}
|
||||
if ContentLength == -1 || ContentLength == 0 {
|
||||
// panic("unexpected content-length: " + fmt.Sprintf("%d", ContentLength))
|
||||
Logger.Panicln(task.Id, "unexpected content-length: ", fmt.Sprintf("%d", ContentLength))
|
||||
}
|
||||
payload = map[string]interface{}{
|
||||
"content-length": ContentLength,
|
||||
}
|
||||
to_status = savewebtracker.StatusDONE
|
||||
} else if r_status == 500 {
|
||||
Logger.Println(task.Id, "empty content")
|
||||
to_status = savewebtracker.StatusEMPTY
|
||||
} else {
|
||||
Logger.Panicln(task.Id, "unexpected status code: ", r_status)
|
||||
}
|
||||
|
||||
payload_str, err := json.Marshal(payload)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
Logger.Println("Inserting item", task.Id, r_status, string(payload_str))
|
||||
tracker.InsertItem(task, fmt.Sprintf("%d", r_status), "int", string(payload_str))
|
||||
Logger.Println("Inserted item", task.Id, to_status)
|
||||
tracker.UpdateTask(task.Id, task.Id_type, to_status)
|
||||
Logger.Println("Updated task", task.Id)
|
||||
}
|
||||
}
|
||||
|
||||
func InterruptHandler() {
|
||||
fmt.Println("Press Ctrl+C to exit")
|
||||
interrupt_c := make(chan os.Signal, 1)
|
||||
signal.Notify(interrupt_c, os.Interrupt)
|
||||
for {
|
||||
s := <-interrupt_c
|
||||
Logger.Println("Interrupted by", s, "signal (Press Ctrl+C again to force exit)")
|
||||
if Interrupted {
|
||||
Logger.Println("Force exit")
|
||||
os.Exit(1)
|
||||
return
|
||||
}
|
||||
Interrupted = true
|
||||
}
|
||||
}
|
||||
|
||||
func GetRetryableHttpClient(timeout time.Duration, debug bool) *http.Client {
|
||||
retryClient := retryablehttp.NewClient()
|
||||
retryClient.RetryMax = 3
|
||||
retryClient.RetryWaitMin = 1 * time.Second
|
||||
retryClient.RetryWaitMax = 10 * time.Second
|
||||
retryClient.HTTPClient.Timeout = timeout
|
||||
if !debug {
|
||||
retryClient.Logger = nil
|
||||
}
|
||||
standardClient := retryClient.StandardClient() // *http.Client
|
||||
Logger.Println("standardClient.Timeout:", standardClient.Timeout)
|
||||
return standardClient
|
||||
}
|
||||
|
||||
func ShowStatus(t *savewebtracker.Tracker) {
|
||||
for {
|
||||
project_json, err := json.Marshal(t.Project())
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
Logger.Println("Project:", string(project_json))
|
||||
time.Sleep(60 * time.Second)
|
||||
}
|
||||
}
|
||||
|
||||
func main() {
|
||||
tracker := savewebtracker.GetTracker(project_id, "0.1", savewebtracker.Archivist())
|
||||
tracker.PING_client = GetRetryableHttpClient(10*time.Second, DEBUG)
|
||||
// tracker.HTTP_client = GetRetryableHttpClient(10*time.Second, DEBUG)
|
||||
tracker.SelectBestTracker().StartSelectTrackerBackground().StartFetchProjectBackground()
|
||||
|
||||
go InterruptHandler()
|
||||
go ShowStatus(tracker)
|
||||
|
||||
cnblogs_api.EnsureConnection(*tracker.HTTP_client)
|
||||
|
||||
Logger.Println("-- Start --")
|
||||
|
||||
for i := 0; i < BASE_CONCURRENCY; i++ {
|
||||
go claimWorker(i, tracker)
|
||||
WaitClaimWorker.Add(1)
|
||||
go ProcesserWorker(i, tracker)
|
||||
WaitProcesserWorker.Add(1)
|
||||
}
|
||||
|
||||
// wait for all claimWorker to finish
|
||||
WaitClaimWorker.Wait()
|
||||
Logger.Println("[STOP] All claimWorker done")
|
||||
// close task_chan
|
||||
close(tasks_chan)
|
||||
Logger.Println("[STOP] task_chan closed")
|
||||
// wait for all task_chan to finish
|
||||
WaitProcesserWorker.Wait()
|
||||
Logger.Println("[STOP] All ProcesserWorker done")
|
||||
|
||||
Logger.Println("-- All done --")
|
||||
|
||||
}
|
10
go.mod
Normal file
10
go.mod
Normal file
@ -0,0 +1,10 @@
|
||||
module git.saveweb.org/saveweb/cnblogs
|
||||
|
||||
go 1.22.4
|
||||
|
||||
require (
|
||||
git.saveweb.org/saveweb/saveweb_tracker v0.1.12
|
||||
github.com/hashicorp/go-retryablehttp v0.7.7
|
||||
)
|
||||
|
||||
require github.com/hashicorp/go-cleanhttp v0.5.2 // indirect
|
24
go.sum
Normal file
24
go.sum
Normal file
@ -0,0 +1,24 @@
|
||||
git.saveweb.org/saveweb/saveweb_tracker v0.1.12 h1:zBYkMjABF5wwvSHZI9t3cVUjU0rhFFZJh0dFE0W59Nw=
|
||||
git.saveweb.org/saveweb/saveweb_tracker v0.1.12/go.mod h1:p891f4fshoA/Wiwmey23f2xJ9sKNEZwd5kmzG6lobik=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/fatih/color v1.16.0 h1:zmkK9Ngbjj+K0yRhTVONQh1p/HknKYSlNT+vZCzyokM=
|
||||
github.com/fatih/color v1.16.0/go.mod h1:fL2Sau1YI5c0pdGEVCbKQbLXB6edEj1ZgiY4NijnWvE=
|
||||
github.com/hashicorp/go-cleanhttp v0.5.2 h1:035FKYIWjmULyFRBKPs8TBQoi0x6d9G4xc9neXJWAZQ=
|
||||
github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48=
|
||||
github.com/hashicorp/go-hclog v1.6.3 h1:Qr2kF+eVWjTiYmU7Y31tYlP1h0q/X3Nl3tPGdaB11/k=
|
||||
github.com/hashicorp/go-hclog v1.6.3/go.mod h1:W4Qnvbt70Wk/zYJryRzDRU/4r0kIg0PVHBcfoyhpF5M=
|
||||
github.com/hashicorp/go-retryablehttp v0.7.7 h1:C8hUCYzor8PIfXHa4UrZkU4VvK8o9ISHxT2Q8+VepXU=
|
||||
github.com/hashicorp/go-retryablehttp v0.7.7/go.mod h1:pkQpWZeYWskR+D1tR2O5OcBFOxfA7DoAO6xtkuQnHTk=
|
||||
github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
|
||||
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
|
||||
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
|
||||
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
|
||||
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
||||
golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y=
|
||||
golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
64
pkg/cnblogs_api.go
Normal file
64
pkg/cnblogs_api.go
Normal file
@ -0,0 +1,64 @@
|
||||
package cnblogs_api
|
||||
|
||||
import (
|
||||
"io"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
)
|
||||
|
||||
var USER_AGENT = "SaveTheWebProject cnblogs-preserve/0.1 (+saveweb@saveweb.org) and not Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0"
|
||||
|
||||
var Logger = log.New(os.Stdout, "[cnblogs_api] ", log.Ldate|log.Ltime|log.Lmsgprefix)
|
||||
|
||||
func EnsureConnection(client http.Client) {
|
||||
req, err := http.NewRequest("GET", "https://www.cnblogs.com/robots.txt", nil)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
req.Header.Add("User-Agent", USER_AGENT)
|
||||
|
||||
resp, err := client.Do(req)
|
||||
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
text := string(body)
|
||||
Logger.Println(text)
|
||||
if !strings.Contains(text, "User-Agent") {
|
||||
panic("NotImplementedError: " + text)
|
||||
}
|
||||
}
|
||||
|
||||
func GetRSSHeadHeaders(client http.Client, blogID string) (http.Header, int, int64) {
|
||||
req, err := http.NewRequest("HEAD", "https://feed.cnblogs.com/blog/u/"+blogID+"/rss/", nil)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
headers := map[string][]string{
|
||||
"User-Agent": {USER_AGENT},
|
||||
}
|
||||
for k, v := range headers {
|
||||
req.Header[k] = v
|
||||
}
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
_, err = io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
return resp.Header, resp.StatusCode, resp.ContentLength
|
||||
|
||||
}
|
Loading…
Reference in New Issue
Block a user