diff --git a/.gitea/workflows/release.yml b/.gitea/workflows/release.yml
index 1ba298a..6193509 100644
--- a/.gitea/workflows/release.yml
+++ b/.gitea/workflows/release.yml
@@ -28,7 +28,7 @@ jobs:
goos: ${{ matrix.goos }}
goarch: ${{ matrix.goarch }}
multi_binaries: true
- project_path: ./cmd/...
+ project_path: go build ./cmd/cnblogs_posts_list/
upload: false
# output is release_asset_dir
- name: rename artifact
diff --git a/.gitignore b/.gitignore
index fba74fc..a2143ec 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,3 @@
*.conf
-/cnblogs_rss_detect
\ No newline at end of file
+/cnblogs_rss_detect
+/cnblogs_posts_list
diff --git a/cmd/cnblogs_posts_list/cnblogs_posts_list.go b/cmd/cnblogs_posts_list/cnblogs_posts_list.go
new file mode 100644
index 0000000..6a57352
--- /dev/null
+++ b/cmd/cnblogs_posts_list/cnblogs_posts_list.go
@@ -0,0 +1,204 @@
+package main
+
+import (
+ "encoding/json"
+ "fmt"
+ "log"
+ "net/http"
+ "os"
+ "os/signal"
+ "sync"
+ "time"
+
+ "strconv"
+
+ cnblogs_api "git.saveweb.org/saveweb/cnblogs/pkg"
+ savewebtracker "git.saveweb.org/saveweb/saveweb_tracker/src/saveweb_tracker"
+ "github.com/hashicorp/go-retryablehttp"
+)
+
+var BASE_CONCURRENCY = 3
+var WITH_DELAY = true
+
+var tasks_chan = make(chan savewebtracker.Task, BASE_CONCURRENCY)
+var Interrupted = false
+var WaitClaimWorker sync.WaitGroup
+var WaitProcesserWorker sync.WaitGroup
+
+var project_id = "cnblogs_posts_list"
+
+var Logger *log.Logger
+var DEBUG = false
+
+func init() {
+ if os.Getenv("BASE_CONCURRENCY") != "" {
+ fmt.Println("BASE_CONCURRENCY:", os.Getenv("BASE_CONCURRENCY"))
+ BASE_CONCURRENCY, _ = strconv.Atoi(os.Getenv("BASE_CONCURRENCY"))
+ }
+ if os.Getenv("NO_WITH_DELAY") != "" {
+ fmt.Println("NO_WITH_DELAY:", os.Getenv("NO_WITH_DELAY"))
+ WITH_DELAY = false
+ }
+ if os.Getenv("DEBUG") != "" {
+ DEBUG = true
+ }
+ Logger = log.New(os.Stdout, "["+project_id+"] ", log.Ldate|log.Ltime|log.Lmsgprefix)
+}
+
+// ClaimTask 并把任务放入 task_chan
+func claimWorker(i int, tracker *savewebtracker.Tracker) {
+ Logger.Println("[START] ClaimWorker", i)
+ defer Logger.Println("[STOP] ClaimWorker", i, " exited...")
+ defer WaitClaimWorker.Done()
+ for {
+ if Interrupted {
+ return
+ }
+ task := tracker.ClaimTask(WITH_DELAY)
+ if task == nil {
+ notask_sleep := max(
+ time.Duration(tracker.Project().Client.ClaimTaskDelay)*10*time.Second,
+ time.Duration(10)*time.Second,
+ )
+ Logger.Println("No task to claim, sleep", notask_sleep)
+ time.Sleep(notask_sleep)
+ continue
+ }
+ Logger.Println("Claimed task", task.Id)
+ tasks_chan <- *task
+ }
+}
+
+func ProcesserWorker(i int, tracker *savewebtracker.Tracker) {
+ Logger.Println("[START] ProcesserWorker", i)
+ defer Logger.Println("[STOP] ProcesserWorker", i, " exited...")
+ defer WaitProcesserWorker.Done()
+ for task := range tasks_chan {
+ head := "[" + task.Id + "]"
+ Logger.Println("Processing task", task.Id)
+
+ // 在这儿处理任务
+ blogURI, err := cnblogs_api.GetBlogUri(tracker.HTTP_client, task.Id)
+ if err != nil {
+ Logger.Panicln(head, err)
+ }
+ all_postMetas := []cnblogs_api.PostMeta{}
+ for page := 1; ; page++ {
+ Logger.Println(head, "Processing", blogURI, "page:", page, "Got:", len(all_postMetas))
+ htmlBody, statusCode, err := cnblogs_api.GetBlogHomepage(tracker.HTTP_client, blogURI, page)
+ if err != nil {
+ Logger.Panicln(head, err)
+ }
+ if !cnblogs_api.EnsureHomepageOK(string(htmlBody)) {
+ Logger.Panicln(head, "EnsureHomepageOK is false")
+ }
+ if statusCode != 200 {
+ Logger.Panicln(head, "statusCode is not 200")
+ }
+
+ postMetas, err := cnblogs_api.ParsePostMetasFromHomepage(htmlBody)
+ if err != nil {
+ Logger.Panicln(head, err)
+ }
+ if len(postMetas) == 0 {
+ break
+ }
+ Logger.Println(head, "Got", postMetas)
+ all_postMetas = append(all_postMetas, postMetas...)
+ }
+
+ items := []savewebtracker.Item{}
+ for _, postMeta := range all_postMetas {
+ postMeta_json, err := json.Marshal(postMeta)
+ if err != nil {
+ Logger.Panicln(head, err)
+ }
+ items = append(items, savewebtracker.Item{
+ Item_id: postMeta.URL,
+ Item_id_type: "str",
+ Item_status: "None",
+ Item_status_type: "None",
+ Payload: string(postMeta_json),
+ })
+ }
+ resp_msg := tracker.InsertMany(items)
+ Logger.Println(head, "InsertMany", resp_msg)
+ tracker.UpdateTask(task.Id, task.Id_type, savewebtracker.StatusDONE)
+ Logger.Println(head, "Updated task", task.Id)
+ }
+}
+
+func InterruptHandler() {
+ fmt.Println("\n\nPress Ctrl+C to exit\n ")
+ interrupt_c := make(chan os.Signal, 1)
+ signal.Notify(interrupt_c, os.Interrupt)
+ for {
+ s := <-interrupt_c
+ Logger.Println("\n\nInterrupted by", s, "signal (Press Ctrl+C again to force exit)\n\n ")
+ if Interrupted {
+ Logger.Println("Force exit")
+ os.Exit(1)
+ return
+ }
+ Interrupted = true
+ }
+}
+
+func GetRetryableHttpClient(timeout time.Duration, debug bool) *http.Client {
+ retryClient := retryablehttp.NewClient()
+ retryClient.RetryMax = 3
+ retryClient.RetryWaitMin = 1 * time.Second
+ retryClient.RetryWaitMax = 10 * time.Second
+ retryClient.HTTPClient.Timeout = timeout
+ if !debug {
+ retryClient.Logger = nil
+ }
+ standardClient := retryClient.StandardClient() // *http.Client
+ Logger.Println("standardClient.Timeout:", standardClient.Timeout)
+ return standardClient
+}
+
+func ShowStatus(t *savewebtracker.Tracker) {
+ for {
+ project_json, err := json.Marshal(t.Project())
+ if err != nil {
+ panic(err)
+ }
+ Logger.Println("Project:", string(project_json))
+ time.Sleep(60 * time.Second)
+ }
+}
+
+func main() {
+ tracker := savewebtracker.GetTracker(project_id, "0.2", savewebtracker.Archivist())
+ tracker.PING_client = GetRetryableHttpClient(10*time.Second, DEBUG)
+ // tracker.HTTP_client = GetRetryableHttpClient(10*time.Second, DEBUG)
+ tracker.SelectBestTracker().StartSelectTrackerBackground().StartFetchProjectBackground()
+
+ go InterruptHandler()
+ go ShowStatus(tracker)
+
+ cnblogs_api.EnsureConnection(*tracker.HTTP_client)
+
+ Logger.Println("-- Start --")
+
+ for i := 0; i < BASE_CONCURRENCY; i++ {
+ go claimWorker(i, tracker)
+ WaitClaimWorker.Add(1)
+ go ProcesserWorker(i, tracker)
+ WaitProcesserWorker.Add(1)
+ }
+
+ // wait for all claimWorker to finish
+ WaitClaimWorker.Wait()
+ Logger.Println("[STOP] All claimWorker done")
+ // close task_chan
+ close(tasks_chan)
+ Logger.Println("[STOP] task_chan closed")
+ // wait for all task_chan to finish
+ WaitProcesserWorker.Wait()
+ Logger.Println("[STOP] All ProcesserWorker done")
+
+ Logger.Println("-- All done --")
+
+}
diff --git a/go.mod b/go.mod
index 99a7041..f2c6a6a 100644
--- a/go.mod
+++ b/go.mod
@@ -4,7 +4,12 @@ go 1.22.4
require (
git.saveweb.org/saveweb/saveweb_tracker v0.1.12
+ github.com/PuerkitoBio/goquery v1.9.2
github.com/hashicorp/go-retryablehttp v0.7.7
)
-require github.com/hashicorp/go-cleanhttp v0.5.2 // indirect
+require (
+ github.com/andybalholm/cascadia v1.3.2 // indirect
+ github.com/hashicorp/go-cleanhttp v0.5.2 // indirect
+ golang.org/x/net v0.24.0 // indirect
+)
diff --git a/go.sum b/go.sum
index 8baa5d8..31a1240 100644
--- a/go.sum
+++ b/go.sum
@@ -1,5 +1,9 @@
git.saveweb.org/saveweb/saveweb_tracker v0.1.12 h1:zBYkMjABF5wwvSHZI9t3cVUjU0rhFFZJh0dFE0W59Nw=
git.saveweb.org/saveweb/saveweb_tracker v0.1.12/go.mod h1:p891f4fshoA/Wiwmey23f2xJ9sKNEZwd5kmzG6lobik=
+github.com/PuerkitoBio/goquery v1.9.2 h1:4/wZksC3KgkQw7SQgkKotmKljk0M6V8TUvA8Wb4yPeE=
+github.com/PuerkitoBio/goquery v1.9.2/go.mod h1:GHPCaP0ODyyxqcNoFGYlAprUFH81NuRPd0GX3Zu2Mvk=
+github.com/andybalholm/cascadia v1.3.2 h1:3Xi6Dw5lHF15JtdcmAHD3i1+T8plmv7BQ/nsViSLyss=
+github.com/andybalholm/cascadia v1.3.2/go.mod h1:7gtRlve5FxPPgIgX36uWBX58OdBsSS6lUvCFb+h7KvU=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/fatih/color v1.16.0 h1:zmkK9Ngbjj+K0yRhTVONQh1p/HknKYSlNT+vZCzyokM=
@@ -18,7 +22,43 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
+github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
+golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
+golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
+golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
+golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
+golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
+golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns=
+golang.org/x/net v0.24.0 h1:1PcaxkF854Fu3+lvBIx5SYn9wRlBzzcnHZSiaFFAb0w=
+golang.org/x/net v0.24.0/go.mod h1:2Q7sJY5mzlzWjKtYUEXSlBWCdyaioyXzRB2RtU8KVE8=
+golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y=
golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
+golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
+golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
+golang.org/x/term v0.7.0/go.mod h1:P32HKFT3hSsZrRxla30E9HqToFYAQPCMs/zFMBUFqPY=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
+golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
+golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
+golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
+golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
+golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
diff --git a/main b/main
new file mode 100755
index 0000000..e76948f
Binary files /dev/null and b/main differ
diff --git a/pkg/cnblogs_api.go b/pkg/cnblogs_api.go
index 8abd686..f140ad7 100644
--- a/pkg/cnblogs_api.go
+++ b/pkg/cnblogs_api.go
@@ -1,11 +1,20 @@
package cnblogs_api
import (
+ "bytes"
+ "encoding/xml"
+ "errors"
+ "fmt"
"io"
"log"
"net/http"
"os"
+ "regexp"
"strings"
+ "sync"
+ "time"
+
+ "github.com/PuerkitoBio/goquery"
)
var USER_AGENT = "SaveTheWebProject cnblogs-preserve/0.1 (+saveweb@saveweb.org) and not Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0"
@@ -62,3 +71,155 @@ func GetRSSHeadHeaders(client http.Client, blogID string) (http.Header, int, int
return resp.Header, resp.StatusCode, resp.ContentLength
}
+
+var exp = "]*>(.*?)"
+var compiledr = regexp.MustCompile(exp)
+
+func ParseBlogUriByRegex(body []byte) string {
+ // only find the first match
+ matches := compiledr.FindSubmatch(body)
+ if len(matches) > 1 {
+ return string(matches[1])
+ } else {
+ return ""
+ }
+}
+
+func GetBlogUri(client *http.Client, BlogID string) (string, error) {
+ req, err := http.NewRequest("GET", "https://feed.cnblogs.com/blog/u/"+BlogID+"/rss", nil)
+ if err != nil {
+ return "", err
+ }
+ req.Header.Add("User-Agent", USER_AGENT)
+
+ resp, err := client.Do(req)
+ if err != nil {
+ return "", err
+ }
+ defer resp.Body.Close()
+ body, err := io.ReadAll(resp.Body)
+ if err != nil {
+ return "", err
+ }
+
+ //
+ //
+ // 博客园 - 吴松~
+ // uuid:0a75ddf1-c050-403f-937c-cf7790585fb1;id=1761407
+ // 2018-06-19T09:44:31Z
+ //
+ // 吴松~
+ // https://www.cnblogs.com/superws/
+ //
+ //
+ type Author struct {
+ URI string `xml:"uri"`
+ }
+ type Feed struct {
+ XMLName xml.Name `xml:"feed"`
+ Author Author `xml:"author"`
+ }
+
+ var feed Feed
+ err = xml.Unmarshal(body, &feed)
+ if err != nil {
+ Logger.Println("xml.Unmarshal error", err, "fallback to regex")
+ reg_result := ParseBlogUriByRegex(body)
+ if reg_result != "" {
+ return reg_result, nil
+ } else {
+ return "", errors.New("xml.Unmarshal error")
+ }
+ }
+
+ if feed.Author.URI == "" {
+ return "", errors.New("URI is empty")
+ }
+
+ return feed.Author.URI, nil
+}
+
+func EnsureHomepageOK(htmldata string) bool {
+ Flags := []string{
+ "currentBlogId",
+ "currentBlogApp",
+ "application/rss+xml",
+ "antiforgery_token",
+ "poweredby",
+ }
+ for _, flag := range Flags {
+ if !strings.Contains(htmldata, flag) {
+ fmt.Println("EnsureHomepageOK failed for", flag)
+ return false
+ }
+ }
+ return true
+}
+
+var homepage_lock = sync.Mutex{}
+
+func GetBlogHomepage(client *http.Client, BlogUri string, page int) ([]byte, int, error) {
+ // replace last /
+ for strings.HasSuffix(BlogUri, "/") {
+ BlogUri = BlogUri[:len(BlogUri)-1]
+ }
+
+ homepage_lock.Lock()
+ time.Sleep(1 * time.Second)
+ homepage_lock.Unlock()
+
+ req, err := http.NewRequest("GET", BlogUri+"?page="+fmt.Sprintf("%d", page), nil)
+ if err != nil {
+ return nil, 0, err
+ }
+ req.Header.Add("User-Agent", USER_AGENT)
+
+ resp, err := client.Do(req)
+ if err != nil {
+ return nil, 0, err
+ }
+ defer resp.Body.Close()
+ body, err := io.ReadAll(resp.Body)
+ if err != nil {
+ return nil, resp.StatusCode, err
+ }
+
+ return body, resp.StatusCode, nil
+}
+
+type PostMeta struct {
+ Title string
+ URL string
+}
+
+func ParsePostMetasFromHomepage(htmlBody []byte) ([]PostMeta, error) {
+ dom, err := goquery.NewDocumentFromReader(bytes.NewReader(htmlBody))
+ if err != nil {
+ return nil, err
+ }
+
+ postMetas := []PostMeta{}
+
+ err_in_query := false
+ // 类型转换
+ dom.Find(".postTitle2").Each(func(i int, s *goquery.Selection) {
+ href, exists := s.Attr("href")
+ if exists {
+ title := s.Find("span").Text()
+ // strip space
+ title = strings.TrimSpace(title)
+ postMetas = append(postMetas, PostMeta{
+ Title: title,
+ URL: href,
+ })
+ } else {
+ err_in_query = true
+ }
+ })
+ if err_in_query {
+ return postMetas, errors.New("error in query")
+ }
+
+ return postMetas, nil
+
+}
diff --git a/pkg/cnblogs_api_test.go b/pkg/cnblogs_api_test.go
new file mode 100644
index 0000000..84b24e9
--- /dev/null
+++ b/pkg/cnblogs_api_test.go
@@ -0,0 +1,101 @@
+package cnblogs_api
+
+import (
+ "net/http"
+ "testing"
+ "time"
+)
+
+var client = &http.Client{
+ Timeout: 120 * time.Second,
+}
+
+func TestGetBlogUri(t *testing.T) {
+ blogApp, err := GetBlogUri(client, "270749")
+ if err != nil {
+ t.Error(err)
+ }
+ if blogApp == "" {
+ t.Error("blogApp is empty")
+ }
+
+ t.Log(blogApp)
+}
+
+func TestGetBlogHomepage(t *testing.T) {
+ blogApp, err := GetBlogUri(client, "270749")
+ if err != nil {
+ t.Fatal(err)
+ }
+ htmlBody, statusCode, err := GetBlogHomepage(client, blogApp, 7)
+ if EnsureHomepageOK(string(htmlBody)) == false {
+ t.Fatal("EnsureHomepageOK is false")
+ }
+ if err != nil {
+ t.Fatal(err)
+ }
+ if statusCode != 200 {
+ t.Fatal("statusCode is not 200")
+ }
+
+ t.Log(string(htmlBody))
+}
+
+func TestParsePostsURLFromHomepage(t *testing.T) {
+ blogApp, err := GetBlogUri(client, "270749")
+ if err != nil {
+ t.Fatal(err)
+ }
+ htmlBody, _, err := GetBlogHomepage(client, blogApp, 1)
+ if err != nil {
+ t.Fatal(err)
+ }
+ postMetas, err := ParsePostMetasFromHomepage(htmlBody)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ t.Log(postMetas)
+}
+
+func TestParseBlogUriByRegex(t *testing.T) {
+ text := `
+
+ 博客园 - 真幻de现实
+ uuid:0a75ddf1-c050-403f-937c-cf7790585fb1;id=1780685
+ 2018-11-28T09:56:51Z
+
+ 真幻de现实
+ https://www.cnblogs.com/lummon/
+
+ feed.cnblogs.com
+
+ https://www.cnblogs.com/lummon/p/10033657.html
+ EF 基础提供程序在 Open 上失败 - 真幻de现实
+ 搜来的思路: 客户端以管理员身份运行:netsh winsock reset命令,作用是重置 Winsock 目录。如果一台机器上的Winsock协议配置有问题的话将会导致网络连接等问题,就需要用netsh winsock reset命令来重置Winsock目录借以恢复网络。这个命令可以重新初始化网
+ 2018-11-28T09:56:00Z
+ 2018-11-28T09:56:00Z
+
+ 真幻de现实
+ https://www.cnblogs.com/lummon/
+
+
+
+ 【摘要】搜来的思路: 客户端以管理员身份运行:netsh winsock reset命令,作用是重置 Winsock 目录。如果一台机器上的Winsock协议配置有问题的话将会导致网络连接等问题,就需要用netsh winsock reset命令来重置Winsock目录借以恢复网络。这个命令可以重新初始化网 <a href="https://www.cnblogs.com/lummon/p/10033657.html" target="_blank">阅读全文</a>
+
+
+ https://www.cnblogs.com/lummon/p/5950095.html
+ flexbox学习 - 真幻de现实
+ https://philipwalton.github.io/solved-by-flexbox/ http://www.ruanyifeng.com/blog/2015/07/flex-grammar.html?utm_source=tuicool http://www.ruanyifeng.co
+ 2016-10-11T09:24:00Z
+ 2016-10-11T09:24:00Z
+
+ 真幻de现实
+ https://www.cnblogs.com/lummon/
+
+ `
+ uri := ParseBlogUriByRegex([]byte(text))
+ if uri != "https://www.cnblogs.com/lummon/" {
+ t.Error("uri is not https://www.cnblogs.com/lummon/")
+ }
+}