2024-02-18 07:33:21 -08:00
|
|
|
|
from functools import wraps
|
|
|
|
|
import asyncio
|
|
|
|
|
import os
|
|
|
|
|
import time
|
|
|
|
|
|
|
|
|
|
from fastapi import FastAPI, Response
|
|
|
|
|
from fastapi.middleware.cors import CORSMiddleware
|
|
|
|
|
from fastapi.responses import HTMLResponse, JSONResponse
|
|
|
|
|
|
|
|
|
|
import meilisearch_python_sdk
|
|
|
|
|
import meilisearch_python_sdk.errors
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
MEILI_KEY = os.getenv('MEILI_KEY', '')
|
|
|
|
|
print('$MEILI_KEY', 'set' if MEILI_KEY else 'not set')
|
2024-02-18 07:42:13 -08:00
|
|
|
|
MEILI_URL = os.getenv('MEILI_URL', 'http://127.0.0.1:7700')
|
|
|
|
|
print('$MEILI_URL', MEILI_URL)
|
2024-02-18 07:33:21 -08:00
|
|
|
|
STWP_SEARCH_MAX_LOAD = float(os.getenv('STWP_SEARCH_MAX_LOAD')) if os.getenv('STWP_SEARCH_MAX_LOAD') else (
|
|
|
|
|
os.cpu_count() / 1.5 if os.cpu_count() else 1.5
|
|
|
|
|
)
|
|
|
|
|
print('$STWP_SEARCH_MAX_LOAD', STWP_SEARCH_MAX_LOAD)
|
|
|
|
|
STWP_SEARCH_MAX_FLYING_OPS = int(os.getenv('STWP_SEARCH_MAX_FLYING_OPS')) if os.getenv('STWP_SEARCH_MAX_FLYING_OPS') else (
|
|
|
|
|
int(STWP_SEARCH_MAX_LOAD * 2)
|
|
|
|
|
)
|
|
|
|
|
STWP_SEARCH_MAX_FLYING_OPS = STWP_SEARCH_MAX_FLYING_OPS if STWP_SEARCH_MAX_FLYING_OPS >= 1 else 1
|
|
|
|
|
print('$STWP_SEARCH_MAX_FLYING_OPS', STWP_SEARCH_MAX_FLYING_OPS)
|
|
|
|
|
STWP_SEARCH_CORS = os.getenv('STWP_SEARCH_CORS', ','.join([
|
|
|
|
|
# 'https://search.saveweb.org',
|
|
|
|
|
'*'
|
|
|
|
|
]))
|
|
|
|
|
print('$STWP_SEARCH_CORS', STWP_SEARCH_CORS)
|
|
|
|
|
|
|
|
|
|
app = FastAPI()
|
|
|
|
|
# set CORS
|
|
|
|
|
app.add_middleware(
|
|
|
|
|
CORSMiddleware,
|
|
|
|
|
allow_origins=STWP_SEARCH_CORS.split(','),
|
|
|
|
|
allow_credentials=True,
|
|
|
|
|
allow_methods=["*"],
|
|
|
|
|
allow_headers=["*"],
|
|
|
|
|
)
|
|
|
|
|
index_name = "entry"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def get_load():
|
|
|
|
|
with open('/proc/loadavg', 'r') as f:
|
|
|
|
|
load = f.read().split()[0]
|
|
|
|
|
return float(load)
|
|
|
|
|
|
|
|
|
|
def load_limiter(func):
|
|
|
|
|
@wraps(func)
|
|
|
|
|
async def wrapper(*args, **kwargs):
|
|
|
|
|
if await get_load() > STWP_SEARCH_MAX_LOAD:
|
|
|
|
|
print('[INFO] 荷载过高')
|
|
|
|
|
return JSONResponse({
|
|
|
|
|
'hits': [
|
|
|
|
|
{
|
|
|
|
|
'title': '丑搜当前荷载过高,请稍后再试',
|
|
|
|
|
'content': '服务器荷载过高,请稍后再试。原因:1. 数据库正在更新全文索引 2. 服务器没有摸鱼,在干其它重荷载的任务',
|
|
|
|
|
'author': ';丑搜',
|
|
|
|
|
'date': int(time.time()),
|
|
|
|
|
'link': '#',
|
|
|
|
|
},
|
|
|
|
|
],
|
|
|
|
|
'error': '丑搜当前荷载过高,请稍后再试',
|
|
|
|
|
}, status_code=503, headers={'Retry-After': '30'})
|
|
|
|
|
return await func(*args, **kwargs)
|
|
|
|
|
return wrapper
|
|
|
|
|
|
|
|
|
|
flying_ops = 0
|
|
|
|
|
def ops_limiter(func):
|
|
|
|
|
@wraps(func)
|
|
|
|
|
async def wrapper(*args, **kwargs):
|
|
|
|
|
global flying_ops
|
|
|
|
|
if flying_ops >= STWP_SEARCH_MAX_FLYING_OPS:
|
|
|
|
|
print('[INFO] 操作过多')
|
|
|
|
|
return JSONResponse({
|
|
|
|
|
'hits': [
|
|
|
|
|
{
|
|
|
|
|
'title': '飞行中的搜索过多,请稍后再试',
|
|
|
|
|
'content': '同一时间内的搜索请求过多。请稍后再试。',
|
|
|
|
|
'author': ';丑搜',
|
|
|
|
|
'date': int(time.time()),
|
|
|
|
|
'link': '#',
|
|
|
|
|
},
|
|
|
|
|
],
|
|
|
|
|
'error': '操作过多,请稍后再试',
|
|
|
|
|
}, status_code=503, headers={'Retry-After': '30'})
|
|
|
|
|
flying_ops += 1
|
|
|
|
|
try:
|
|
|
|
|
return await func(*args, **kwargs)
|
|
|
|
|
finally:
|
|
|
|
|
flying_ops -= 1
|
|
|
|
|
return wrapper
|
|
|
|
|
|
|
|
|
|
|
2024-02-18 07:42:13 -08:00
|
|
|
|
client = meilisearch_python_sdk.AsyncClient(MEILI_URL, MEILI_KEY)
|
2024-02-18 07:33:21 -08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.get('/api/')
|
|
|
|
|
async def go_back_home():
|
|
|
|
|
# redirect to /
|
|
|
|
|
return Response(status_code=302, headers={'Location': '/'})
|
|
|
|
|
|
|
|
|
|
@app.get('/api/entry/{entry_id}')
|
|
|
|
|
@load_limiter
|
|
|
|
|
@ops_limiter
|
|
|
|
|
async def article(entry_id: int):
|
|
|
|
|
results = {}
|
|
|
|
|
results['data'] = await client.index(index_name).get_document(entry_id)
|
|
|
|
|
results['humans.txt'] = 'is_favorite 目前与主数据库不同步'
|
|
|
|
|
|
|
|
|
|
return results
|
|
|
|
|
|
|
|
|
|
@app.get('/api/stats')
|
|
|
|
|
@app.head('/api/stats')
|
|
|
|
|
@load_limiter
|
|
|
|
|
@ops_limiter
|
|
|
|
|
async def stats():
|
|
|
|
|
stats = await client.index(index_name).get_stats()
|
|
|
|
|
return stats
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.get('/api/search')
|
|
|
|
|
@load_limiter
|
|
|
|
|
@ops_limiter
|
|
|
|
|
async def search(q: str = 'saveweb', p: int = 0, f: str = 'false', h: str = 'false'):
|
|
|
|
|
query = q # 搜索词
|
|
|
|
|
page = p # 0-based
|
|
|
|
|
fulltext = f # 返回全文(搜索还是以全文做搜索,只是返回的时候限制一下长度)
|
|
|
|
|
highlight = h # 是否高亮
|
|
|
|
|
|
|
|
|
|
print(query, page, 'fulltext:', fulltext, 'highlight:', highlight)
|
|
|
|
|
with open('search.log', 'a') as fio:
|
|
|
|
|
fio.write(query + '\t' + str(page) + '\n')
|
|
|
|
|
|
|
|
|
|
# 搜空,返空
|
|
|
|
|
if not query:
|
|
|
|
|
return JSONResponse({
|
|
|
|
|
'error': '搜索词为空',
|
|
|
|
|
}, status_code=400)
|
|
|
|
|
|
|
|
|
|
opt_params = {
|
|
|
|
|
'limit': 10,
|
|
|
|
|
'offset': 10 * page,
|
|
|
|
|
'attributes_to_retrieve': ['id', 'id_feed', 'title', 'content', 'link', 'date', 'tags', 'author', 'lastSeen'],
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if fulltext != 'true':
|
|
|
|
|
opt_params['attributes_to_crop'] = ['content']
|
|
|
|
|
opt_params['crop_length'] = 120
|
|
|
|
|
|
|
|
|
|
if highlight == 'true':
|
|
|
|
|
opt_params['attributes_to_highlight'] = ['title', 'content', 'date', 'tags', 'author']
|
|
|
|
|
opt_params['highlight_pre_tag'] = '<span class="uglyHighlight text-purple-500">'
|
|
|
|
|
opt_params['highlight_post_tag'] = '</span>'
|
|
|
|
|
|
|
|
|
|
# 第一次搜索
|
|
|
|
|
try:
|
|
|
|
|
_results = await client.index(index_name).search(query, **opt_params)
|
|
|
|
|
except meilisearch_python_sdk.errors.MeilisearchError as e:
|
|
|
|
|
print('数据库错误', e)
|
|
|
|
|
return {
|
|
|
|
|
'hits': [
|
|
|
|
|
{
|
|
|
|
|
'title': '数据库错误',
|
|
|
|
|
'content': '查询数据库时出错。如果一直出现这个错误,说明数据库寄了,请反馈。',
|
|
|
|
|
'author': ';丑搜',
|
|
|
|
|
'date': int(time.time()),
|
|
|
|
|
'link': '#',
|
|
|
|
|
},
|
|
|
|
|
],
|
|
|
|
|
'error': '数据库错误',
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
lengths : dict[str, int]= {}
|
|
|
|
|
if fulltext != 'true': # 再搜索一次,获取全文长度
|
|
|
|
|
opt_params = {
|
|
|
|
|
'limit': 10,
|
|
|
|
|
'offset': 10 * page,
|
|
|
|
|
'attributes_to_retrieve': ['id', 'id_feed', 'title', 'content', 'link', 'date', 'tags', 'author', 'lastSeen'],
|
|
|
|
|
}
|
|
|
|
|
_results2 = await client.index(index_name).search(query, **opt_params)
|
|
|
|
|
for hit in _results2.hits:
|
|
|
|
|
lengths.update({str(hit['id']): len(hit['content'])})
|
|
|
|
|
|
|
|
|
|
# replace the hit with _formatted
|
|
|
|
|
for hit in _results.hits:
|
|
|
|
|
if fulltext != 'true':
|
|
|
|
|
assert lengths != {}
|
|
|
|
|
if str(hit['id']) in lengths:
|
|
|
|
|
hit['content_length'] = lengths[str(hit['id'])]
|
|
|
|
|
else:
|
|
|
|
|
hit['content_length'] = len(hit['content'])
|
|
|
|
|
if '_formatted' in hit:
|
|
|
|
|
hit.update(hit['_formatted'])
|
|
|
|
|
del hit['_formatted']
|
|
|
|
|
|
|
|
|
|
results = {
|
|
|
|
|
'hits': _results.hits,
|
|
|
|
|
'estimatedTotalHits': _results.estimated_total_hits, #TODO: estimatedTotalHits 改为 estimated_total_hits
|
|
|
|
|
'humans.txt': '使用 API 时请检查 error 字段,高荷载/出错时会返回它。is_favorite 字段目前与主数据库不同步,只有在全库重新索引时才会更新。',
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return results
|
|
|
|
|
|
|
|
|
|
@app.route('/')
|
|
|
|
|
async def root(request):
|
|
|
|
|
return HTMLResponse(open('templates/index.html', 'r').read()) # 反正只有一个页面
|
|
|
|
|
|
|
|
|
|
async def main():
|
|
|
|
|
import hypercorn.asyncio
|
|
|
|
|
config = hypercorn.Config()
|
|
|
|
|
config.bind = ['[::]:8077']
|
|
|
|
|
await hypercorn.asyncio.serve(app, config)
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
# hypercorn --bind '[::]:8077' saveweb-search-backend:app
|
|
|
|
|
asyncio.run(main())
|