Files
skill-template/sohu-publisher/scripts/main.py
2026-04-04 10:35:02 +08:00

563 lines
21 KiB
Python
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
sohu-publisher搜狐号自动发布。
子命令:
publish <account_id> <article_id> 发布文章
logs [--limit N] [--status s] [--account-id a] 查看发布记录
log-get <log_id> 查看单条发布记录(JSON)
health | version
"""
from __future__ import annotations
import argparse
import asyncio
import io
import json
import os
import sqlite3
import subprocess
import sys
import time
from datetime import datetime
from typing import Any, Dict, List, Optional
import requests
from playwright.async_api import async_playwright
if sys.platform == "win32":
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding="utf-8", errors="replace")
SKILL_SLUG = "sohu-publisher"
SKILL_VERSION = "1.2.0"
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
OPENCLAW_DIR = os.path.dirname(BASE_DIR)
def _now_unix() -> int:
return int(time.time())
def _unix_to_iso(ts: Optional[int]) -> Optional[str]:
if ts is None:
return None
try:
return datetime.fromtimestamp(int(ts)).isoformat(timespec="seconds")
except (ValueError, OSError, OverflowError):
return None
def get_data_root() -> str:
env = (os.getenv("CLAW_DATA_ROOT") or os.getenv("JIANGCHANG_DATA_ROOT") or "").strip()
if env:
return env
if sys.platform == "win32":
return r"D:\claw-data"
return os.path.join(os.path.expanduser("~"), ".claw-data")
def get_user_id() -> str:
return (os.getenv("CLAW_USER_ID") or os.getenv("JIANGCHANG_USER_ID") or "").strip() or "_anon"
def get_skills_root() -> str:
env = (os.getenv("CLAW_SKILLS_ROOT") or os.getenv("JIANGCHANG_SKILLS_ROOT") or "").strip()
if env:
return env
return OPENCLAW_DIR
def get_skill_data_dir() -> str:
path = os.path.join(get_data_root(), get_user_id(), SKILL_SLUG)
os.makedirs(path, exist_ok=True)
return path
def get_db_path() -> str:
return os.path.join(get_skill_data_dir(), "sohu-publisher.db")
def get_conn() -> sqlite3.Connection:
return sqlite3.connect(get_db_path())
def init_db() -> None:
conn = get_conn()
try:
cur = conn.cursor()
cur.execute(
"""
CREATE TABLE IF NOT EXISTS publish_logs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
account_id TEXT NOT NULL,
article_id INTEGER NOT NULL,
article_title TEXT,
status TEXT NOT NULL,
error_msg TEXT,
created_at INTEGER NOT NULL,
updated_at INTEGER NOT NULL
)
"""
)
cur.execute("PRAGMA table_info(publish_logs)")
cols = {r[1] for r in cur.fetchall()}
if "article_title" not in cols:
cur.execute("ALTER TABLE publish_logs ADD COLUMN article_title TEXT")
if "updated_at" not in cols:
cur.execute("ALTER TABLE publish_logs ADD COLUMN updated_at INTEGER")
if "created_at" not in cols:
cur.execute("ALTER TABLE publish_logs ADD COLUMN created_at INTEGER")
conn.commit()
finally:
conn.close()
def save_publish_log(account_id: str, article_id: int, article_title: str, status: str, error_msg: Optional[str] = None) -> int:
init_db()
now = _now_unix()
conn = get_conn()
try:
cur = conn.cursor()
cur.execute(
"""
INSERT INTO publish_logs (account_id, article_id, article_title, status, error_msg, created_at, updated_at)
VALUES (?, ?, ?, ?, ?, ?, ?)
""",
(account_id, int(article_id), article_title or "", status, error_msg, now, now),
)
new_id = int(cur.lastrowid)
conn.commit()
finally:
conn.close()
return new_id
def check_entitlement(skill_slug: str) -> tuple[bool, str]:
auth_base = (os.getenv("JIANGCHANG_AUTH_BASE_URL") or "").strip().rstrip("/")
if not auth_base:
return True, ""
user_id = (os.getenv("CLAW_USER_ID") or os.getenv("JIANGCHANG_USER_ID") or "").strip()
if not user_id:
return False, "鉴权失败缺少用户身份CLAW_USER_ID / JIANGCHANG_USER_ID"
auth_api_key = (os.getenv("JIANGCHANG_AUTH_API_KEY") or "").strip()
timeout = int((os.getenv("JIANGCHANG_AUTH_TIMEOUT_SECONDS") or "5").strip())
headers = {"Content-Type": "application/json"}
if auth_api_key:
headers["Authorization"] = f"Bearer {auth_api_key}"
payload = {
"user_id": user_id,
"skill_slug": skill_slug,
"trace_id": (os.getenv("JIANGCHANG_TRACE_ID") or "").strip(),
"context": {"entry": "main.py"},
}
try:
res = requests.post(f"{auth_base}/api/entitlements/check", json=payload, headers=headers, timeout=timeout)
except requests.RequestException as exc:
return False, f"鉴权请求失败:{exc}"
if res.status_code != 200:
return False, f"鉴权服务异常HTTP {res.status_code}"
try:
body = res.json()
except ValueError:
return False, "鉴权服务异常:返回非 JSON"
code = body.get("code")
data = body.get("data") or {}
if code != 200:
return False, str(body.get("msg") or "鉴权失败")
if not data.get("allow", False):
return False, str(data.get("reason") or "未购买或已过期")
return True, ""
def _call_json_script(script_path: str, args: List[str]) -> Optional[Dict[str, Any]]:
proc = subprocess.run([sys.executable, script_path, *args], capture_output=True, text=True, encoding="utf-8", errors="replace")
raw = (proc.stdout or "").strip()
if not raw or raw.startswith("ERROR"):
return None
try:
return json.loads(raw)
except json.JSONDecodeError:
return None
def _normalize_sohu_title(raw_title: str, article: Dict[str, Any]) -> str:
"""
搜狐标题约束5-72 字。
- >72截断
- <5用正文首行/前缀补足
"""
title = (raw_title or "").strip()
if len(title) > 72:
title = title[:72].rstrip()
if len(title) >= 5:
return title
body = str(article.get("content") or article.get("content_html") or "").strip()
first_line = ""
for line in body.splitlines():
t = line.strip()
if t:
first_line = t
break
seed = first_line or "搜狐发布稿件"
seed = seed.replace("\r", " ").replace("\n", " ").strip()
# 先把 seed 拼上,仍不足时再补固定后缀,最终保证 >=5
merged = (title + seed).strip()
if len(merged) < 5:
merged = (merged + "发布稿件标题").strip()
if len(merged) > 72:
merged = merged[:72].rstrip()
return merged
def get_account(account_id: str) -> Optional[Dict[str, Any]]:
script = os.path.join(get_skills_root(), "account-manager", "scripts", "main.py")
return _call_json_script(script, ["get", str(account_id)])
def get_article(article_id: str) -> Optional[Dict[str, Any]]:
script = os.path.join(get_skills_root(), "content-manager", "scripts", "main.py")
return _call_json_script(script, ["get", str(article_id)])
async def publish(account: Dict[str, Any], article: Dict[str, Any], account_id: str) -> str:
profile_dir = account["profile_dir"]
original_title = str(article.get("title") or "")
title = _normalize_sohu_title(original_title, article)
if title != original_title:
print(f" 标题已自动修正(搜狐要求 5-72 字):\n 原标题:{original_title}\n 新标题:{title}")
content_html = article.get("content_html", article.get("content", ""))
async with async_playwright() as p:
browser = await p.chromium.launch_persistent_context(
user_data_dir=profile_dir,
headless=False,
channel="chrome",
no_viewport=True,
permissions=["clipboard-read", "clipboard-write"],
args=["--start-maximized"],
)
page = browser.pages[0] if browser.pages else await browser.new_page()
await page.goto("https://mp.sohu.com/mpfe/v4/contentManagement/news/addarticle?contentStatus=1")
await page.wait_for_load_state("networkidle")
try:
title_input = page.locator(".publish-title input").first
await title_input.wait_for(state="visible", timeout=10000)
except Exception:
await browser.close()
return "ERROR:REQUIRE_LOGIN"
print("💡 页面加载且已确认登录,开始自动填入文字...")
await title_input.click()
await title_input.fill("")
await page.keyboard.insert_text(title)
await asyncio.sleep(1)
editor = page.locator("#editor .ql-editor").first
await editor.click()
await page.evaluate(
"""(html_str) => {
const blobHtml = new Blob([html_str], { type: 'text/html' });
const blobText = new Blob([html_str], { type: 'text/plain' });
const item = new window.ClipboardItem({
'text/html': blobHtml,
'text/plain': blobText
});
return navigator.clipboard.write([item]);
}""",
content_html,
)
modifier = "Meta" if sys.platform == "darwin" else "Control"
await page.keyboard.press(f"{modifier}+v")
await asyncio.sleep(3)
await page.locator("li.publish-report-btn").first.click()
print("⌛ 正在提交发布,进入高压视觉核验阶段...")
publish_success = False
error_text = "动作执行阻断:由于特殊元素拦截、频率限制或底层报错,未能成功发出。"
try:
async with page.expect_navigation(url=lambda u: "addarticle" not in u, timeout=8000):
pass
publish_success = True
except Exception:
try:
limit_text = page.locator("text=/.*已达上限.*/").first
if await limit_text.is_visible(timeout=1500):
error_text = await limit_text.inner_text()
else:
error_msg = await page.evaluate(
"""() => {
const els = Array.from(document.querySelectorAll('div, span, p'));
for (let el of els) {
const style = window.getComputedStyle(el);
if ((style.position === 'fixed' || style.position === 'absolute')
&& parseInt(style.zIndex || 0) > 80
&& el.innerText.trim().length > 3
&& el.innerText.trim().length < 80) {
return el.innerText.trim();
}
}
return null;
}"""
)
if error_msg:
error_text = f"抓取到报错原文: {error_msg}"
except Exception as e:
error_text = f"抓取报错文案期间遭遇环境隔离: {e}"
await asyncio.sleep(5)
await browser.close()
if publish_success:
return "SUCCESS"
return f"FAIL:{error_text}"
def cmd_publish(account_id: str, article_id: str) -> int:
ok, reason = check_entitlement(SKILL_SLUG)
if not ok:
print(f"{reason}")
return 1
if not str(article_id).isdigit():
print("❌ article_id 必须是数字。请先执行 content-manager 的 list 查看 id。")
return 1
account = get_account(account_id)
if not account:
print(f"❌ 查无此配置账号:{account_id}")
return 1
platform = str(account.get("platform") or "").strip().lower()
if platform != "sohu":
platform_cn_map = {
"doubao": "豆包",
"deepseek": "DeepSeek",
"qianwen": "通义千问",
"kimi": "Kimi",
"yiyan": "文心一言",
"yuanbao": "腾讯元宝",
"toutiao": "头条号",
"zhihu": "知乎",
"wechat": "微信公众号",
"sohu": "搜狐号",
}
got_cn = platform_cn_map.get(platform, platform or "未知平台")
print("❌ 账号平台不匹配:当前账号不是「搜狐号」。")
print(f"当前 account_id={account_id} 对应平台:{got_cn}platform={platform or 'unknown'}")
print("请换一个搜狐账号 id 后重试。")
print("可先执行python account-manager/scripts/main.py list sohu")
return 1
login_status = int(account.get("login_status") or 0)
if login_status != 1:
print("❌ 该搜狐账号当前未登录,暂不能发布。")
print("请先手工登录,再执行发布:")
print(f" python account-manager/scripts/main.py login {account_id}")
print(f"登录完成后再执行python sohu-publisher/scripts/main.py publish {account_id} {article_id}")
return 1
article = get_article(article_id)
if not article:
print(f"❌ 查无此文章编号(库中无 ID: {article_id}")
return 1
result = asyncio.run(publish(account, article, account_id))
content_script = os.path.join(get_skills_root(), "content-manager", "scripts", "main.py")
title = article.get("title", "")
if result == "ERROR:REQUIRE_LOGIN":
save_publish_log(account_id, int(article_id), title, "require_login", "账号未登录或登录已失效")
print(f"⚠️ 搜狐号 ({account_id}) 登录状态已失效,发布流程已中止。")
print("请先手工完成登录,再重新发布:")
print(f" python account-manager/scripts/main.py login {account_id}")
print(f" python sohu-publisher/scripts/main.py publish {account_id} {article_id}")
return 1
if result == "SUCCESS":
log_id = save_publish_log(account_id, int(article_id), title, "published", None)
subprocess.run([sys.executable, content_script, "feedback", article_id, "published", account_id])
print(f"🎉 发布成功:{title}")
print(f"✅ 发布日志已记录log_id={log_id}")
return 0
if result.startswith("FAIL:"):
error_msg = result[len("FAIL:") :]
log_id = save_publish_log(account_id, int(article_id), title, "failed", error_msg)
subprocess.run([sys.executable, content_script, "feedback", article_id, "failed", account_id, error_msg])
print(f"❌ 发布失败:{error_msg}")
print(f"✅ 失败日志已记录log_id={log_id}")
return 1
save_publish_log(account_id, int(article_id), title, "failed", f"未知结果:{result}")
return 1
def cmd_logs(limit: int = 10, status: Optional[str] = None, account_id: Optional[str] = None) -> int:
init_db()
if limit <= 0:
limit = 10
conn = get_conn()
try:
cur = conn.cursor()
sql = (
"SELECT id, account_id, article_id, article_title, status, error_msg, created_at, updated_at "
"FROM publish_logs WHERE 1=1 "
)
params: List[Any] = []
if status:
sql += "AND status = ? "
params.append(status)
if account_id:
sql += "AND account_id = ? "
params.append(account_id)
sql += "ORDER BY created_at DESC, id DESC LIMIT ?"
params.append(int(limit))
cur.execute(sql, tuple(params))
rows = cur.fetchall()
finally:
conn.close()
if not rows:
print("暂无发布记录")
return 0
sep_line = "_" * 39
for idx, r in enumerate(rows):
rid, aid, arid, title, st, err, cat, uat = r
print(f"id{rid}")
print(f"account_id{aid or ''}")
print(f"article_id{arid}")
print(f"article_title{title or ''}")
print(f"status{st or ''}")
print(f"error_msg{err or ''}")
print(f"created_at{_unix_to_iso(cat) or str(cat or '')}")
print(f"updated_at{_unix_to_iso(uat) or str(uat or '')}")
if idx != len(rows) - 1:
print(sep_line)
print()
return 0
def cmd_log_get(log_id: str) -> int:
if not str(log_id).isdigit():
print("❌ log_id 必须是数字")
return 1
init_db()
conn = get_conn()
try:
cur = conn.cursor()
cur.execute(
"SELECT id, account_id, article_id, article_title, status, error_msg, created_at, updated_at FROM publish_logs WHERE id = ?",
(int(log_id),),
)
row = cur.fetchone()
finally:
conn.close()
if not row:
print("❌ 没有这条发布记录")
return 1
rid, aid, arid, title, st, err, cat, uat = row
print(
json.dumps(
{
"id": int(rid),
"account_id": aid,
"article_id": int(arid),
"article_title": title,
"status": st,
"error_msg": err,
"created_at": _unix_to_iso(cat),
"updated_at": _unix_to_iso(uat),
},
ensure_ascii=False,
)
)
return 0
class ZhArgumentParser(argparse.ArgumentParser):
def error(self, message: str) -> None:
print(f"参数错误:{message}\n请执行python main.py -h 查看帮助", file=sys.stderr)
self.exit(2)
def _print_full_usage() -> None:
print("搜狐号发布main.py可用命令")
print(" python main.py publish <account_id> <article_id> # 发布一篇")
print(" python main.py logs [--limit N] [--status s] [--account-id a] # 查看发布记录")
print(" python main.py log-get <log_id> # 查看单条日志(JSON)")
print(" python main.py health")
print(" python main.py version")
print()
print("常见示例:")
print(" python main.py publish sohu_account1 12")
print(" python main.py logs")
print(" python main.py logs --status failed --limit 20")
print(" python main.py log-get 7")
print()
print("说明也兼容旧写法python main.py <account_id> <article_id>")
def build_parser() -> ZhArgumentParser:
p = ZhArgumentParser(
prog="main.py",
description="搜狐号发布:发布文章、查看发布记录、查询单条日志。",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=(
"示例:\n"
" python main.py publish sohu_account1 12\n"
" python main.py logs\n"
" python main.py logs --status failed --limit 20\n"
" python main.py log-get 7\n"
" python main.py health\n"
" python main.py version"
),
)
sub = p.add_subparsers(dest="cmd", required=True, parser_class=ZhArgumentParser)
sp = sub.add_parser("publish", help="发布一篇文章到搜狐号")
sp.add_argument("account_id", help="账号 id来自 account-manager list")
sp.add_argument("article_id", help="文章 id来自 content-manager list")
sp.set_defaults(handler=lambda a: cmd_publish(a.account_id, a.article_id))
sp = sub.add_parser("logs", help="查看发布记录(默认最近 10 条)")
sp.add_argument("--limit", type=int, default=10, help="最多显示条数(默认 10")
sp.add_argument("--status", default=None, help="按状态筛选published/failed/require_login")
sp.add_argument("--account-id", default=None, help="按账号 id 筛选")
sp.set_defaults(handler=lambda a: cmd_logs(limit=a.limit, status=a.status, account_id=a.account_id))
sp = sub.add_parser("log-get", help="按 log_id 查看单条发布记录(JSON)")
sp.add_argument("log_id", help="日志 id整数")
sp.set_defaults(handler=lambda a: cmd_log_get(a.log_id))
sp = sub.add_parser("health", help="健康检查")
sp.set_defaults(handler=lambda _a: 0 if sys.version_info >= (3, 10) else 1)
sp = sub.add_parser("version", help="版本信息(JSON)")
sp.set_defaults(
handler=lambda _a: (
print(json.dumps({"version": SKILL_VERSION, "skill": SKILL_SLUG}, ensure_ascii=False)) or 0
)
)
return p
def main(argv: Optional[List[str]] = None) -> int:
argv = argv if argv is not None else sys.argv[1:]
if not argv:
_print_full_usage()
return 1
# 兼容旧用法python main.py <account_id> <article_id>
if len(argv) == 2 and argv[0] not in {"publish", "logs", "log-get", "health", "version", "-h", "--help"}:
return cmd_publish(argv[0], argv[1])
parser = build_parser()
args = parser.parse_args(argv)
return int(args.handler(args))
if __name__ == "__main__":
raise SystemExit(main())