2021-11-21 00:15:48 -08:00
|
|
|
|
import socket
|
|
|
|
|
import os
|
2021-11-21 00:49:49 -08:00
|
|
|
|
import urllib.parse
|
2021-11-21 00:15:48 -08:00
|
|
|
|
num = 1
|
|
|
|
|
|
2021-11-21 00:49:49 -08:00
|
|
|
|
try:
|
|
|
|
|
os.system("wget https://github.com/timqian/chinese-independent-blogs/raw/master/blogs-original.csv -O ./blogs-original.csv")
|
|
|
|
|
with open('blogs-original.csv', 'r') as f:
|
|
|
|
|
lines = f.read()
|
|
|
|
|
except:
|
|
|
|
|
print("您似乎没有安装wget,请手动下载文件 https://github.com/timqian/chinese-independent-blogs/raw/master/blogs-original.csv 并放至当前目录")
|
|
|
|
|
else:
|
|
|
|
|
with open('blogs-original.csv', 'r') as f:
|
|
|
|
|
lines = f.read()
|
|
|
|
|
lines = lines.splitlines()
|
|
|
|
|
with open('gh-domains.txt', 'w') as f:
|
|
|
|
|
for line in lines[1:]:
|
|
|
|
|
print(num)
|
|
|
|
|
num = num + 1
|
|
|
|
|
line = line.replace(" ", "").split(',')
|
|
|
|
|
res = urllib.parse.urlparse(line[1])
|
|
|
|
|
domain = res.netloc
|
|
|
|
|
try:
|
2021-11-28 01:06:40 -08:00
|
|
|
|
target_host = domain
|
|
|
|
|
target_port = 80
|
|
|
|
|
client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
|
|
|
|
client.connect((target_host,target_port))
|
|
|
|
|
request = "GET / HTTP/1.1\r\nHost:%s\r\n\r\n" % target_host
|
|
|
|
|
client.send(request.encode())
|
|
|
|
|
response = client.recv(4096)
|
|
|
|
|
http_response = repr(response)
|
|
|
|
|
http_response_len = len(http_response)
|
|
|
|
|
if str(response).find('Server: GitHub.com') != -1 :
|
2021-11-21 00:49:49 -08:00
|
|
|
|
f.write(domain+"\n")
|
2021-11-28 01:06:40 -08:00
|
|
|
|
print(domain+" is on GH-Pages!")
|
2021-11-21 00:49:49 -08:00
|
|
|
|
except:
|
|
|
|
|
continue
|
2021-11-28 01:06:40 -08:00
|
|
|
|
|
|
|
|
|
os.system("rm blogs-original.csv -rf")
|