563 lines
21 KiB
Python
563 lines
21 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
sohu-publisher:搜狐号自动发布。
|
||
|
||
子命令:
|
||
publish <account_id> <article_id> 发布文章
|
||
logs [--limit N] [--status s] [--account-id a] 查看发布记录
|
||
log-get <log_id> 查看单条发布记录(JSON)
|
||
health | version
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import argparse
|
||
import asyncio
|
||
import io
|
||
import json
|
||
import os
|
||
import sqlite3
|
||
import subprocess
|
||
import sys
|
||
import time
|
||
from datetime import datetime
|
||
from typing import Any, Dict, List, Optional
|
||
|
||
import requests
|
||
from playwright.async_api import async_playwright
|
||
|
||
if sys.platform == "win32":
|
||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
|
||
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding="utf-8", errors="replace")
|
||
|
||
SKILL_SLUG = "sohu-publisher"
|
||
SKILL_VERSION = "1.2.0"
|
||
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||
OPENCLAW_DIR = os.path.dirname(BASE_DIR)
|
||
|
||
|
||
def _now_unix() -> int:
|
||
return int(time.time())
|
||
|
||
|
||
def _unix_to_iso(ts: Optional[int]) -> Optional[str]:
|
||
if ts is None:
|
||
return None
|
||
try:
|
||
return datetime.fromtimestamp(int(ts)).isoformat(timespec="seconds")
|
||
except (ValueError, OSError, OverflowError):
|
||
return None
|
||
|
||
|
||
def get_data_root() -> str:
|
||
env = (os.getenv("CLAW_DATA_ROOT") or os.getenv("JIANGCHANG_DATA_ROOT") or "").strip()
|
||
if env:
|
||
return env
|
||
if sys.platform == "win32":
|
||
return r"D:\claw-data"
|
||
return os.path.join(os.path.expanduser("~"), ".claw-data")
|
||
|
||
|
||
def get_user_id() -> str:
|
||
return (os.getenv("CLAW_USER_ID") or os.getenv("JIANGCHANG_USER_ID") or "").strip() or "_anon"
|
||
|
||
|
||
def get_skills_root() -> str:
|
||
env = (os.getenv("CLAW_SKILLS_ROOT") or os.getenv("JIANGCHANG_SKILLS_ROOT") or "").strip()
|
||
if env:
|
||
return env
|
||
return OPENCLAW_DIR
|
||
|
||
|
||
def get_skill_data_dir() -> str:
|
||
path = os.path.join(get_data_root(), get_user_id(), SKILL_SLUG)
|
||
os.makedirs(path, exist_ok=True)
|
||
return path
|
||
|
||
|
||
def get_db_path() -> str:
|
||
return os.path.join(get_skill_data_dir(), "sohu-publisher.db")
|
||
|
||
|
||
def get_conn() -> sqlite3.Connection:
|
||
return sqlite3.connect(get_db_path())
|
||
|
||
|
||
def init_db() -> None:
|
||
conn = get_conn()
|
||
try:
|
||
cur = conn.cursor()
|
||
cur.execute(
|
||
"""
|
||
CREATE TABLE IF NOT EXISTS publish_logs (
|
||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||
account_id TEXT NOT NULL,
|
||
article_id INTEGER NOT NULL,
|
||
article_title TEXT,
|
||
status TEXT NOT NULL,
|
||
error_msg TEXT,
|
||
created_at INTEGER NOT NULL,
|
||
updated_at INTEGER NOT NULL
|
||
)
|
||
"""
|
||
)
|
||
cur.execute("PRAGMA table_info(publish_logs)")
|
||
cols = {r[1] for r in cur.fetchall()}
|
||
if "article_title" not in cols:
|
||
cur.execute("ALTER TABLE publish_logs ADD COLUMN article_title TEXT")
|
||
if "updated_at" not in cols:
|
||
cur.execute("ALTER TABLE publish_logs ADD COLUMN updated_at INTEGER")
|
||
if "created_at" not in cols:
|
||
cur.execute("ALTER TABLE publish_logs ADD COLUMN created_at INTEGER")
|
||
conn.commit()
|
||
finally:
|
||
conn.close()
|
||
|
||
|
||
def save_publish_log(account_id: str, article_id: int, article_title: str, status: str, error_msg: Optional[str] = None) -> int:
|
||
init_db()
|
||
now = _now_unix()
|
||
conn = get_conn()
|
||
try:
|
||
cur = conn.cursor()
|
||
cur.execute(
|
||
"""
|
||
INSERT INTO publish_logs (account_id, article_id, article_title, status, error_msg, created_at, updated_at)
|
||
VALUES (?, ?, ?, ?, ?, ?, ?)
|
||
""",
|
||
(account_id, int(article_id), article_title or "", status, error_msg, now, now),
|
||
)
|
||
new_id = int(cur.lastrowid)
|
||
conn.commit()
|
||
finally:
|
||
conn.close()
|
||
return new_id
|
||
|
||
|
||
def check_entitlement(skill_slug: str) -> tuple[bool, str]:
|
||
auth_base = (os.getenv("JIANGCHANG_AUTH_BASE_URL") or "").strip().rstrip("/")
|
||
if not auth_base:
|
||
return True, ""
|
||
user_id = (os.getenv("CLAW_USER_ID") or os.getenv("JIANGCHANG_USER_ID") or "").strip()
|
||
if not user_id:
|
||
return False, "鉴权失败:缺少用户身份(CLAW_USER_ID / JIANGCHANG_USER_ID)"
|
||
|
||
auth_api_key = (os.getenv("JIANGCHANG_AUTH_API_KEY") or "").strip()
|
||
timeout = int((os.getenv("JIANGCHANG_AUTH_TIMEOUT_SECONDS") or "5").strip())
|
||
headers = {"Content-Type": "application/json"}
|
||
if auth_api_key:
|
||
headers["Authorization"] = f"Bearer {auth_api_key}"
|
||
payload = {
|
||
"user_id": user_id,
|
||
"skill_slug": skill_slug,
|
||
"trace_id": (os.getenv("JIANGCHANG_TRACE_ID") or "").strip(),
|
||
"context": {"entry": "main.py"},
|
||
}
|
||
try:
|
||
res = requests.post(f"{auth_base}/api/entitlements/check", json=payload, headers=headers, timeout=timeout)
|
||
except requests.RequestException as exc:
|
||
return False, f"鉴权请求失败:{exc}"
|
||
if res.status_code != 200:
|
||
return False, f"鉴权服务异常:HTTP {res.status_code}"
|
||
try:
|
||
body = res.json()
|
||
except ValueError:
|
||
return False, "鉴权服务异常:返回非 JSON"
|
||
code = body.get("code")
|
||
data = body.get("data") or {}
|
||
if code != 200:
|
||
return False, str(body.get("msg") or "鉴权失败")
|
||
if not data.get("allow", False):
|
||
return False, str(data.get("reason") or "未购买或已过期")
|
||
return True, ""
|
||
|
||
|
||
def _call_json_script(script_path: str, args: List[str]) -> Optional[Dict[str, Any]]:
|
||
proc = subprocess.run([sys.executable, script_path, *args], capture_output=True, text=True, encoding="utf-8", errors="replace")
|
||
raw = (proc.stdout or "").strip()
|
||
if not raw or raw.startswith("ERROR"):
|
||
return None
|
||
try:
|
||
return json.loads(raw)
|
||
except json.JSONDecodeError:
|
||
return None
|
||
|
||
|
||
def _normalize_sohu_title(raw_title: str, article: Dict[str, Any]) -> str:
|
||
"""
|
||
搜狐标题约束:5-72 字。
|
||
- >72:截断
|
||
- <5:用正文首行/前缀补足
|
||
"""
|
||
title = (raw_title or "").strip()
|
||
if len(title) > 72:
|
||
title = title[:72].rstrip()
|
||
|
||
if len(title) >= 5:
|
||
return title
|
||
|
||
body = str(article.get("content") or article.get("content_html") or "").strip()
|
||
first_line = ""
|
||
for line in body.splitlines():
|
||
t = line.strip()
|
||
if t:
|
||
first_line = t
|
||
break
|
||
seed = first_line or "搜狐发布稿件"
|
||
seed = seed.replace("\r", " ").replace("\n", " ").strip()
|
||
|
||
# 先把 seed 拼上,仍不足时再补固定后缀,最终保证 >=5
|
||
merged = (title + seed).strip()
|
||
if len(merged) < 5:
|
||
merged = (merged + "发布稿件标题").strip()
|
||
if len(merged) > 72:
|
||
merged = merged[:72].rstrip()
|
||
return merged
|
||
|
||
|
||
def get_account(account_id: str) -> Optional[Dict[str, Any]]:
|
||
script = os.path.join(get_skills_root(), "account-manager", "scripts", "main.py")
|
||
return _call_json_script(script, ["get", str(account_id)])
|
||
|
||
|
||
def get_article(article_id: str) -> Optional[Dict[str, Any]]:
|
||
script = os.path.join(get_skills_root(), "content-manager", "scripts", "main.py")
|
||
return _call_json_script(script, ["get", str(article_id)])
|
||
|
||
|
||
async def publish(account: Dict[str, Any], article: Dict[str, Any], account_id: str) -> str:
|
||
profile_dir = account["profile_dir"]
|
||
original_title = str(article.get("title") or "")
|
||
title = _normalize_sohu_title(original_title, article)
|
||
if title != original_title:
|
||
print(f"ℹ️ 标题已自动修正(搜狐要求 5-72 字):\n 原标题:{original_title}\n 新标题:{title}")
|
||
content_html = article.get("content_html", article.get("content", ""))
|
||
|
||
async with async_playwright() as p:
|
||
browser = await p.chromium.launch_persistent_context(
|
||
user_data_dir=profile_dir,
|
||
headless=False,
|
||
channel="chrome",
|
||
no_viewport=True,
|
||
permissions=["clipboard-read", "clipboard-write"],
|
||
args=["--start-maximized"],
|
||
)
|
||
page = browser.pages[0] if browser.pages else await browser.new_page()
|
||
await page.goto("https://mp.sohu.com/mpfe/v4/contentManagement/news/addarticle?contentStatus=1")
|
||
await page.wait_for_load_state("networkidle")
|
||
|
||
try:
|
||
title_input = page.locator(".publish-title input").first
|
||
await title_input.wait_for(state="visible", timeout=10000)
|
||
except Exception:
|
||
await browser.close()
|
||
return "ERROR:REQUIRE_LOGIN"
|
||
|
||
print("💡 页面加载且已确认登录,开始自动填入文字...")
|
||
await title_input.click()
|
||
await title_input.fill("")
|
||
await page.keyboard.insert_text(title)
|
||
await asyncio.sleep(1)
|
||
|
||
editor = page.locator("#editor .ql-editor").first
|
||
await editor.click()
|
||
await page.evaluate(
|
||
"""(html_str) => {
|
||
const blobHtml = new Blob([html_str], { type: 'text/html' });
|
||
const blobText = new Blob([html_str], { type: 'text/plain' });
|
||
const item = new window.ClipboardItem({
|
||
'text/html': blobHtml,
|
||
'text/plain': blobText
|
||
});
|
||
return navigator.clipboard.write([item]);
|
||
}""",
|
||
content_html,
|
||
)
|
||
|
||
modifier = "Meta" if sys.platform == "darwin" else "Control"
|
||
await page.keyboard.press(f"{modifier}+v")
|
||
await asyncio.sleep(3)
|
||
await page.locator("li.publish-report-btn").first.click()
|
||
print("⌛ 正在提交发布,进入高压视觉核验阶段...")
|
||
|
||
publish_success = False
|
||
error_text = "动作执行阻断:由于特殊元素拦截、频率限制或底层报错,未能成功发出。"
|
||
try:
|
||
async with page.expect_navigation(url=lambda u: "addarticle" not in u, timeout=8000):
|
||
pass
|
||
publish_success = True
|
||
except Exception:
|
||
try:
|
||
limit_text = page.locator("text=/.*已达上限.*/").first
|
||
if await limit_text.is_visible(timeout=1500):
|
||
error_text = await limit_text.inner_text()
|
||
else:
|
||
error_msg = await page.evaluate(
|
||
"""() => {
|
||
const els = Array.from(document.querySelectorAll('div, span, p'));
|
||
for (let el of els) {
|
||
const style = window.getComputedStyle(el);
|
||
if ((style.position === 'fixed' || style.position === 'absolute')
|
||
&& parseInt(style.zIndex || 0) > 80
|
||
&& el.innerText.trim().length > 3
|
||
&& el.innerText.trim().length < 80) {
|
||
return el.innerText.trim();
|
||
}
|
||
}
|
||
return null;
|
||
}"""
|
||
)
|
||
if error_msg:
|
||
error_text = f"抓取到报错原文: {error_msg}"
|
||
except Exception as e:
|
||
error_text = f"抓取报错文案期间遭遇环境隔离: {e}"
|
||
|
||
await asyncio.sleep(5)
|
||
await browser.close()
|
||
if publish_success:
|
||
return "SUCCESS"
|
||
return f"FAIL:{error_text}"
|
||
|
||
|
||
def cmd_publish(account_id: str, article_id: str) -> int:
|
||
ok, reason = check_entitlement(SKILL_SLUG)
|
||
if not ok:
|
||
print(f"❌ {reason}")
|
||
return 1
|
||
if not str(article_id).isdigit():
|
||
print("❌ article_id 必须是数字。请先执行 content-manager 的 list 查看 id。")
|
||
return 1
|
||
|
||
account = get_account(account_id)
|
||
if not account:
|
||
print(f"❌ 查无此配置账号:{account_id}")
|
||
return 1
|
||
platform = str(account.get("platform") or "").strip().lower()
|
||
if platform != "sohu":
|
||
platform_cn_map = {
|
||
"doubao": "豆包",
|
||
"deepseek": "DeepSeek",
|
||
"qianwen": "通义千问",
|
||
"kimi": "Kimi",
|
||
"yiyan": "文心一言",
|
||
"yuanbao": "腾讯元宝",
|
||
"toutiao": "头条号",
|
||
"zhihu": "知乎",
|
||
"wechat": "微信公众号",
|
||
"sohu": "搜狐号",
|
||
}
|
||
got_cn = platform_cn_map.get(platform, platform or "未知平台")
|
||
print("❌ 账号平台不匹配:当前账号不是「搜狐号」。")
|
||
print(f"当前 account_id={account_id} 对应平台:{got_cn}(platform={platform or 'unknown'})")
|
||
print("请换一个搜狐账号 id 后重试。")
|
||
print("可先执行:python account-manager/scripts/main.py list sohu")
|
||
return 1
|
||
login_status = int(account.get("login_status") or 0)
|
||
if login_status != 1:
|
||
print("❌ 该搜狐账号当前未登录,暂不能发布。")
|
||
print("请先手工登录,再执行发布:")
|
||
print(f" python account-manager/scripts/main.py login {account_id}")
|
||
print(f"登录完成后再执行:python sohu-publisher/scripts/main.py publish {account_id} {article_id}")
|
||
return 1
|
||
article = get_article(article_id)
|
||
if not article:
|
||
print(f"❌ 查无此文章编号(库中无 ID: {article_id})")
|
||
return 1
|
||
|
||
result = asyncio.run(publish(account, article, account_id))
|
||
content_script = os.path.join(get_skills_root(), "content-manager", "scripts", "main.py")
|
||
title = article.get("title", "")
|
||
|
||
if result == "ERROR:REQUIRE_LOGIN":
|
||
save_publish_log(account_id, int(article_id), title, "require_login", "账号未登录或登录已失效")
|
||
print(f"⚠️ 搜狐号 ({account_id}) 登录状态已失效,发布流程已中止。")
|
||
print("请先手工完成登录,再重新发布:")
|
||
print(f" python account-manager/scripts/main.py login {account_id}")
|
||
print(f" python sohu-publisher/scripts/main.py publish {account_id} {article_id}")
|
||
return 1
|
||
|
||
if result == "SUCCESS":
|
||
log_id = save_publish_log(account_id, int(article_id), title, "published", None)
|
||
subprocess.run([sys.executable, content_script, "feedback", article_id, "published", account_id])
|
||
print(f"🎉 发布成功:{title}")
|
||
print(f"✅ 发布日志已记录,log_id={log_id}")
|
||
return 0
|
||
|
||
if result.startswith("FAIL:"):
|
||
error_msg = result[len("FAIL:") :]
|
||
log_id = save_publish_log(account_id, int(article_id), title, "failed", error_msg)
|
||
subprocess.run([sys.executable, content_script, "feedback", article_id, "failed", account_id, error_msg])
|
||
print(f"❌ 发布失败:{error_msg}")
|
||
print(f"✅ 失败日志已记录,log_id={log_id}")
|
||
return 1
|
||
|
||
save_publish_log(account_id, int(article_id), title, "failed", f"未知结果:{result}")
|
||
return 1
|
||
|
||
|
||
def cmd_logs(limit: int = 10, status: Optional[str] = None, account_id: Optional[str] = None) -> int:
|
||
init_db()
|
||
if limit <= 0:
|
||
limit = 10
|
||
conn = get_conn()
|
||
try:
|
||
cur = conn.cursor()
|
||
sql = (
|
||
"SELECT id, account_id, article_id, article_title, status, error_msg, created_at, updated_at "
|
||
"FROM publish_logs WHERE 1=1 "
|
||
)
|
||
params: List[Any] = []
|
||
if status:
|
||
sql += "AND status = ? "
|
||
params.append(status)
|
||
if account_id:
|
||
sql += "AND account_id = ? "
|
||
params.append(account_id)
|
||
sql += "ORDER BY created_at DESC, id DESC LIMIT ?"
|
||
params.append(int(limit))
|
||
cur.execute(sql, tuple(params))
|
||
rows = cur.fetchall()
|
||
finally:
|
||
conn.close()
|
||
|
||
if not rows:
|
||
print("暂无发布记录")
|
||
return 0
|
||
|
||
sep_line = "_" * 39
|
||
for idx, r in enumerate(rows):
|
||
rid, aid, arid, title, st, err, cat, uat = r
|
||
print(f"id:{rid}")
|
||
print(f"account_id:{aid or ''}")
|
||
print(f"article_id:{arid}")
|
||
print(f"article_title:{title or ''}")
|
||
print(f"status:{st or ''}")
|
||
print(f"error_msg:{err or ''}")
|
||
print(f"created_at:{_unix_to_iso(cat) or str(cat or '')}")
|
||
print(f"updated_at:{_unix_to_iso(uat) or str(uat or '')}")
|
||
if idx != len(rows) - 1:
|
||
print(sep_line)
|
||
print()
|
||
return 0
|
||
|
||
|
||
def cmd_log_get(log_id: str) -> int:
|
||
if not str(log_id).isdigit():
|
||
print("❌ log_id 必须是数字")
|
||
return 1
|
||
init_db()
|
||
conn = get_conn()
|
||
try:
|
||
cur = conn.cursor()
|
||
cur.execute(
|
||
"SELECT id, account_id, article_id, article_title, status, error_msg, created_at, updated_at FROM publish_logs WHERE id = ?",
|
||
(int(log_id),),
|
||
)
|
||
row = cur.fetchone()
|
||
finally:
|
||
conn.close()
|
||
if not row:
|
||
print("❌ 没有这条发布记录")
|
||
return 1
|
||
rid, aid, arid, title, st, err, cat, uat = row
|
||
print(
|
||
json.dumps(
|
||
{
|
||
"id": int(rid),
|
||
"account_id": aid,
|
||
"article_id": int(arid),
|
||
"article_title": title,
|
||
"status": st,
|
||
"error_msg": err,
|
||
"created_at": _unix_to_iso(cat),
|
||
"updated_at": _unix_to_iso(uat),
|
||
},
|
||
ensure_ascii=False,
|
||
)
|
||
)
|
||
return 0
|
||
|
||
|
||
class ZhArgumentParser(argparse.ArgumentParser):
|
||
def error(self, message: str) -> None:
|
||
print(f"参数错误:{message}\n请执行:python main.py -h 查看帮助", file=sys.stderr)
|
||
self.exit(2)
|
||
|
||
|
||
def _print_full_usage() -> None:
|
||
print("搜狐号发布(main.py)可用命令:")
|
||
print(" python main.py publish <account_id> <article_id> # 发布一篇")
|
||
print(" python main.py logs [--limit N] [--status s] [--account-id a] # 查看发布记录")
|
||
print(" python main.py log-get <log_id> # 查看单条日志(JSON)")
|
||
print(" python main.py health")
|
||
print(" python main.py version")
|
||
print()
|
||
print("常见示例:")
|
||
print(" python main.py publish sohu_account1 12")
|
||
print(" python main.py logs")
|
||
print(" python main.py logs --status failed --limit 20")
|
||
print(" python main.py log-get 7")
|
||
print()
|
||
print("说明:也兼容旧写法:python main.py <account_id> <article_id>")
|
||
|
||
|
||
def build_parser() -> ZhArgumentParser:
|
||
p = ZhArgumentParser(
|
||
prog="main.py",
|
||
description="搜狐号发布:发布文章、查看发布记录、查询单条日志。",
|
||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||
epilog=(
|
||
"示例:\n"
|
||
" python main.py publish sohu_account1 12\n"
|
||
" python main.py logs\n"
|
||
" python main.py logs --status failed --limit 20\n"
|
||
" python main.py log-get 7\n"
|
||
" python main.py health\n"
|
||
" python main.py version"
|
||
),
|
||
)
|
||
sub = p.add_subparsers(dest="cmd", required=True, parser_class=ZhArgumentParser)
|
||
|
||
sp = sub.add_parser("publish", help="发布一篇文章到搜狐号")
|
||
sp.add_argument("account_id", help="账号 id(来自 account-manager list)")
|
||
sp.add_argument("article_id", help="文章 id(来自 content-manager list)")
|
||
sp.set_defaults(handler=lambda a: cmd_publish(a.account_id, a.article_id))
|
||
|
||
sp = sub.add_parser("logs", help="查看发布记录(默认最近 10 条)")
|
||
sp.add_argument("--limit", type=int, default=10, help="最多显示条数(默认 10)")
|
||
sp.add_argument("--status", default=None, help="按状态筛选:published/failed/require_login")
|
||
sp.add_argument("--account-id", default=None, help="按账号 id 筛选")
|
||
sp.set_defaults(handler=lambda a: cmd_logs(limit=a.limit, status=a.status, account_id=a.account_id))
|
||
|
||
sp = sub.add_parser("log-get", help="按 log_id 查看单条发布记录(JSON)")
|
||
sp.add_argument("log_id", help="日志 id(整数)")
|
||
sp.set_defaults(handler=lambda a: cmd_log_get(a.log_id))
|
||
|
||
sp = sub.add_parser("health", help="健康检查")
|
||
sp.set_defaults(handler=lambda _a: 0 if sys.version_info >= (3, 10) else 1)
|
||
|
||
sp = sub.add_parser("version", help="版本信息(JSON)")
|
||
sp.set_defaults(
|
||
handler=lambda _a: (
|
||
print(json.dumps({"version": SKILL_VERSION, "skill": SKILL_SLUG}, ensure_ascii=False)) or 0
|
||
)
|
||
)
|
||
return p
|
||
|
||
|
||
def main(argv: Optional[List[str]] = None) -> int:
|
||
argv = argv if argv is not None else sys.argv[1:]
|
||
if not argv:
|
||
_print_full_usage()
|
||
return 1
|
||
# 兼容旧用法:python main.py <account_id> <article_id>
|
||
if len(argv) == 2 and argv[0] not in {"publish", "logs", "log-get", "health", "version", "-h", "--help"}:
|
||
return cmd_publish(argv[0], argv[1])
|
||
parser = build_parser()
|
||
args = parser.parse_args(argv)
|
||
return int(args.handler(args))
|
||
|
||
|
||
if __name__ == "__main__":
|
||
raise SystemExit(main()) |