chore: rebuild CentOS7 release package
This commit is contained in:
Binary file not shown.
@@ -22,6 +22,8 @@ const RUNTIME_STREAM_DELTA_MAX_CHARS = readPositiveIntEnv('CC_WEB_CODEX_APP_STRE
|
||||
const RUNTIME_MAX_TOOL_CALLS = readPositiveIntEnv('CC_WEB_CODEX_APP_RUNTIME_MAX_TOOL_CALLS', 120, { min: 1, max: 1000 });
|
||||
const RUNTIME_TRUNCATED_HEAD = '[cc-web: 前文过长,已保留尾部以保护服务稳定性]\n';
|
||||
const RUNTIME_TRUNCATED_TAIL = '\n[cc-web: 内容过长,已截断以保护服务稳定性]';
|
||||
const CODEX_APP_PLAN_ITEM_TYPES = new Set(['plan', 'plan_list', 'planlist', 'todo', 'todo_list', 'todolist', 'task_list']);
|
||||
const CODEX_APP_PLAN_TOOL_NAMES = new Set(['update_plan', 'plan', 'plan_list', 'todo_list', 'updateplan', 'todolist']);
|
||||
|
||||
function createCodexAppRuntime(deps = {}) {
|
||||
const {
|
||||
@@ -131,6 +133,123 @@ function createCodexAppRuntime(deps = {}) {
|
||||
return true;
|
||||
}
|
||||
|
||||
function normalizeIdentifier(value) {
|
||||
return String(value || '')
|
||||
.trim()
|
||||
.toLowerCase()
|
||||
.replace(/[^a-z0-9]+/g, '_')
|
||||
.replace(/^_+|_+$/g, '');
|
||||
}
|
||||
|
||||
function isPlanToolName(value) {
|
||||
const name = normalizeIdentifier(value);
|
||||
return CODEX_APP_PLAN_TOOL_NAMES.has(name) || name.endsWith('_update_plan');
|
||||
}
|
||||
|
||||
function isPlanLikeItem(item) {
|
||||
if (!item || typeof item !== 'object') return false;
|
||||
if (CODEX_APP_PLAN_ITEM_TYPES.has(normalizeIdentifier(item.type))) return true;
|
||||
return isPlanToolName(item.tool || item.name || item.functionName || item.function?.name);
|
||||
}
|
||||
|
||||
function parseMaybeJsonValue(value) {
|
||||
if (typeof value !== 'string') return value;
|
||||
const trimmed = value.trim();
|
||||
if (!trimmed || !/^[{[]/.test(trimmed)) return value;
|
||||
try {
|
||||
return JSON.parse(trimmed);
|
||||
} catch {
|
||||
return value;
|
||||
}
|
||||
}
|
||||
|
||||
function extractPlanEntries(value, depth = 0) {
|
||||
if (value === null || value === undefined || depth > 3) return null;
|
||||
const source = parseMaybeJsonValue(value);
|
||||
if (Array.isArray(source)) return source;
|
||||
if (!source || typeof source !== 'object') return null;
|
||||
const keys = ['plan', 'items', 'todos', 'tasks', 'steps'];
|
||||
for (const key of keys) {
|
||||
if (Array.isArray(source[key])) return source[key];
|
||||
}
|
||||
const nestedKeys = ['arguments', 'input', 'params', 'payload', 'structuredContent', 'result'];
|
||||
for (const key of nestedKeys) {
|
||||
const nested = extractPlanEntries(source[key], depth + 1);
|
||||
if (nested) return nested;
|
||||
}
|
||||
if (Array.isArray(source.contentItems)) {
|
||||
for (const part of source.contentItems) {
|
||||
const text = typeof part?.text === 'string' ? part.text : '';
|
||||
const nested = extractPlanEntries(text, depth + 1);
|
||||
if (nested) return nested;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function planEntryCompleted(entry) {
|
||||
if (!entry || typeof entry !== 'object') return false;
|
||||
if (entry.completed === true || entry.done === true) return true;
|
||||
const status = normalizeIdentifier(entry.status || entry.state);
|
||||
return ['completed', 'complete', 'done', 'success', 'succeeded'].includes(status);
|
||||
}
|
||||
|
||||
function planEntryText(entry) {
|
||||
if (typeof entry === 'string') return entry;
|
||||
if (!entry || typeof entry !== 'object') return '';
|
||||
return entry.step || entry.text || entry.title || entry.name || entry.description || entry.task || entry.item || entry.content || '';
|
||||
}
|
||||
|
||||
function normalizeTodoListFromPlanItem(item) {
|
||||
if (!isPlanLikeItem(item)) return null;
|
||||
const candidates = [
|
||||
item.arguments,
|
||||
item.input,
|
||||
item.params,
|
||||
item.payload,
|
||||
item.structuredContent,
|
||||
item.result?.structuredContent,
|
||||
item.result,
|
||||
item,
|
||||
];
|
||||
let entries = null;
|
||||
for (const candidate of candidates) {
|
||||
entries = extractPlanEntries(candidate);
|
||||
if (entries) break;
|
||||
}
|
||||
if (!Array.isArray(entries)) return null;
|
||||
const items = entries
|
||||
.map((entry) => {
|
||||
const text = truncateEnd(planEntryText(entry), RUNTIME_TOOL_INPUT_MAX_CHARS);
|
||||
if (!text) return null;
|
||||
return {
|
||||
text,
|
||||
completed: planEntryCompleted(entry),
|
||||
};
|
||||
})
|
||||
.filter(Boolean);
|
||||
return {
|
||||
id: item.id || item.itemId || item.planId || 'codex-app-plan',
|
||||
type: 'todo_list',
|
||||
items,
|
||||
};
|
||||
}
|
||||
|
||||
function planUpdateItemFromParams(params = {}) {
|
||||
const item = params.item && typeof params.item === 'object' ? { ...params.item } : {};
|
||||
return {
|
||||
...params,
|
||||
...item,
|
||||
id: item.id || params.itemId || params.id || params.planId || 'codex-app-plan',
|
||||
type: item.type || params.type || 'planList',
|
||||
status: item.status || params.status || 'inProgress',
|
||||
plan: item.plan || params.plan,
|
||||
items: item.items || params.items,
|
||||
todos: item.todos || params.todos,
|
||||
tasks: item.tasks || params.tasks,
|
||||
};
|
||||
}
|
||||
|
||||
function codexAppErrorMessage(value) {
|
||||
if (!value) return '';
|
||||
if (typeof value === 'string') return value;
|
||||
@@ -178,6 +297,7 @@ function createCodexAppRuntime(deps = {}) {
|
||||
}
|
||||
|
||||
function itemKind(item) {
|
||||
if (normalizeTodoListFromPlanItem(item)) return 'todo_list';
|
||||
switch (item?.type) {
|
||||
case 'commandExecution':
|
||||
return 'command_execution';
|
||||
@@ -203,6 +323,7 @@ function createCodexAppRuntime(deps = {}) {
|
||||
}
|
||||
|
||||
function itemName(item) {
|
||||
if (normalizeTodoListFromPlanItem(item)) return 'PlanList';
|
||||
switch (item?.type) {
|
||||
case 'commandExecution':
|
||||
return 'CommandExecution';
|
||||
@@ -227,6 +348,8 @@ function createCodexAppRuntime(deps = {}) {
|
||||
|
||||
function itemInput(item) {
|
||||
if (!item) return null;
|
||||
const todoList = normalizeTodoListFromPlanItem(item);
|
||||
if (todoList) return todoList;
|
||||
switch (item.type) {
|
||||
case 'commandExecution':
|
||||
return { command: truncateEnd(item.command || '', RUNTIME_TOOL_INPUT_MAX_CHARS) };
|
||||
@@ -292,6 +415,14 @@ function createCodexAppRuntime(deps = {}) {
|
||||
|
||||
function itemMeta(item) {
|
||||
if (!item) return null;
|
||||
if (normalizeTodoListFromPlanItem(item)) {
|
||||
return {
|
||||
kind: 'todo_list',
|
||||
title: 'Plan List',
|
||||
subtitle: item.explanation || item.title || item.tool || '',
|
||||
status: item.status || null,
|
||||
};
|
||||
}
|
||||
switch (item.type) {
|
||||
case 'commandExecution':
|
||||
return {
|
||||
@@ -346,6 +477,8 @@ function createCodexAppRuntime(deps = {}) {
|
||||
|
||||
function itemResult(item) {
|
||||
if (!item) return '';
|
||||
const todoList = normalizeTodoListFromPlanItem(item);
|
||||
if (todoList) return JSON.stringify(todoList, null, 2);
|
||||
switch (item.type) {
|
||||
case 'commandExecution':
|
||||
return truncateEnd(item.aggregatedOutput || '', RUNTIME_TOOL_RESULT_MAX_CHARS);
|
||||
@@ -390,7 +523,7 @@ function createCodexAppRuntime(deps = {}) {
|
||||
toolCall.name = itemName(item);
|
||||
toolCall.kind = kind;
|
||||
toolCall.meta = itemMeta(item) || toolCall.meta || null;
|
||||
if (toolCall.input == null) toolCall.input = itemInput(item);
|
||||
if (toolCall.input == null || kind === 'todo_list') toolCall.input = itemInput(item);
|
||||
return toolCall;
|
||||
}
|
||||
|
||||
@@ -608,6 +741,22 @@ function createCodexAppRuntime(deps = {}) {
|
||||
return { done: false };
|
||||
}
|
||||
|
||||
case 'plan/updated':
|
||||
case 'turn/plan/updated':
|
||||
case 'item/plan/updated':
|
||||
case 'item/todoList/updated': {
|
||||
const item = planUpdateItemFromParams(params);
|
||||
const todoList = normalizeTodoListFromPlanItem(item);
|
||||
if (!todoList) return { done: false };
|
||||
updateToolResult(entry, sessionId, todoList.id, JSON.stringify(todoList, null, 2), false, {
|
||||
name: 'PlanList',
|
||||
kind: 'todo_list',
|
||||
input: todoList,
|
||||
meta: itemMeta(item),
|
||||
});
|
||||
return { done: false };
|
||||
}
|
||||
|
||||
case 'item/reasoning/summaryTextDelta':
|
||||
case 'item/reasoning/textDelta': {
|
||||
const itemId = params.itemId;
|
||||
|
||||
447
public/rag-for-pm-v2.html
Normal file
447
public/rag-for-pm-v2.html
Normal file
@@ -0,0 +1,447 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="zh-CN">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width,initial-scale=1">
|
||||
<title>RAG 入门:原理、流程与使用</title>
|
||||
<style>
|
||||
:root{
|
||||
--bg:#f6f8fb;
|
||||
--paper:#ffffff;
|
||||
--ink:#0a2540;
|
||||
--muted:#486176;
|
||||
--soft:#eef3f8;
|
||||
--line:#d9e2ec;
|
||||
--blue:#1d4ed8;
|
||||
--mint:#0f9f8f;
|
||||
--orange:#d97706;
|
||||
--rose:#e11d48;
|
||||
--shadow:0 14px 34px rgba(10,37,64,.10),0 2px 7px rgba(10,37,64,.05);
|
||||
}
|
||||
*{box-sizing:border-box}
|
||||
html,body{margin:0;width:100%;height:100%;overflow:hidden;background:var(--bg);color:var(--ink);font-family:-apple-system,BlinkMacSystemFont,"Segoe UI","Noto Sans SC","Microsoft YaHei",Arial,sans-serif}
|
||||
.deck{position:relative;width:100vw;height:100vh;overflow:hidden;background:linear-gradient(135deg,#f8fafc,#eef3f8)}
|
||||
.slide{position:absolute;inset:0;display:none;padding:72px 88px;background:radial-gradient(circle at 85% 12%,rgba(29,78,216,.08),transparent 28%),var(--paper);align-items:center;justify-content:center}
|
||||
.slide.active{display:flex}
|
||||
.slide-inner{width:100%;max-height:calc(100vh - 150px)}
|
||||
.slide::before{content:"";position:absolute;left:0;top:0;bottom:0;width:8px;background:linear-gradient(180deg,var(--blue),var(--mint))}
|
||||
.topline{position:absolute;left:88px;right:88px;top:28px;display:flex;justify-content:space-between;align-items:center;font-size:12px;letter-spacing:.14em;text-transform:uppercase;color:#8aa0b5}
|
||||
.footer{position:absolute;left:88px;right:88px;bottom:24px;display:flex;justify-content:space-between;color:#8aa0b5;font-size:12px}
|
||||
.progress{position:fixed;left:0;right:0;bottom:0;height:4px;background:#dbe4ee;z-index:20}
|
||||
.progress span{display:block;height:100%;width:0;background:linear-gradient(90deg,var(--blue),var(--mint));transition:width .2s ease}
|
||||
.nav-controls{position:fixed;right:28px;bottom:26px;z-index:30;display:flex;align-items:center;gap:8px;padding:8px;border:1px solid rgba(10,37,64,.12);border-radius:999px;background:rgba(255,255,255,.92);box-shadow:0 8px 26px rgba(10,37,64,.14);backdrop-filter:blur(10px)}
|
||||
.nav-btn{width:38px;height:38px;border:0;border-radius:50%;background:#0a2540;color:#fff;font-size:22px;line-height:1;display:flex;align-items:center;justify-content:center;cursor:pointer}
|
||||
.nav-btn:disabled{opacity:.32;cursor:not-allowed}
|
||||
.nav-index{min-width:58px;text-align:center;font-size:13px;font-weight:800;color:var(--muted)}
|
||||
.kicker{margin:0 0 12px;color:var(--blue);font-size:15px;font-weight:700;letter-spacing:.1em;text-transform:uppercase}
|
||||
h1,h2,h3,h4,p{margin:0}
|
||||
h1{font-size:68px;line-height:1.05;letter-spacing:0;font-weight:800;max-width:980px}
|
||||
h2{font-size:44px;line-height:1.15;letter-spacing:0;font-weight:760;max-width:1040px}
|
||||
h3{font-size:24px;line-height:1.25;font-weight:740}
|
||||
h4{font-size:18px;line-height:1.3;font-weight:730}
|
||||
.lead{font-size:23px;line-height:1.55;color:var(--muted);max-width:900px;margin-top:22px}
|
||||
.muted{color:var(--muted)}
|
||||
.accent{background:linear-gradient(135deg,var(--blue),var(--mint));-webkit-background-clip:text;background-clip:text;color:transparent}
|
||||
.row{display:flex;gap:16px;align-items:stretch}
|
||||
.grid2{display:grid;grid-template-columns:repeat(2,1fr);gap:18px}
|
||||
.grid3{display:grid;grid-template-columns:repeat(3,1fr);gap:18px}
|
||||
.grid4{display:grid;grid-template-columns:repeat(4,1fr);gap:16px}
|
||||
.mt{margin-top:30px}.mt-sm{margin-top:16px}.mt-lg{margin-top:42px}
|
||||
.card{background:#fff;border:1px solid var(--line);border-radius:10px;box-shadow:var(--shadow);padding:22px}
|
||||
.card.soft{background:var(--soft);box-shadow:none}
|
||||
.tag{display:inline-flex;align-items:center;justify-content:center;border-radius:999px;padding:5px 12px;background:#edf4ff;color:var(--blue);font-size:12px;font-weight:800}
|
||||
.tag.mint{background:#e8faf7;color:var(--mint)}
|
||||
.tag.orange{background:#fff4df;color:var(--orange)}
|
||||
.tag.rose{background:#fff0f4;color:var(--rose)}
|
||||
.num{width:40px;height:40px;border-radius:9px;background:var(--blue);color:#fff;display:flex;align-items:center;justify-content:center;font-weight:800;margin-bottom:14px}
|
||||
.num.mint{background:var(--mint)}.num.orange{background:var(--orange)}.num.rose{background:var(--rose)}
|
||||
.pillbar{display:flex;flex-wrap:wrap;gap:10px;margin-top:32px}
|
||||
.pill{border:1px solid var(--line);background:#fff;border-radius:999px;padding:8px 14px;font-size:14px;color:var(--muted);font-weight:650}
|
||||
.stage{display:grid;grid-template-columns:1fr 52px 1fr 52px 1fr;gap:10px;align-items:center;margin-top:38px}
|
||||
.stage .box{text-align:center;padding:28px 18px;border:1px solid var(--line);border-radius:12px;background:#fff;box-shadow:var(--shadow)}
|
||||
.stage .box.main{background:var(--blue);color:#fff}
|
||||
.stage .box.main p{color:#dbeafe}
|
||||
.arrow{font-size:34px;color:#93a4b5;text-align:center;font-weight:900}
|
||||
.compare{display:grid;grid-template-columns:1fr 88px 1fr;gap:22px;align-items:center;margin-top:34px}
|
||||
.compare .mid{text-align:center;font-size:42px;color:#9aacbd;font-weight:900}
|
||||
ul{margin:14px 0 0;padding-left:22px;color:var(--muted);line-height:1.85;font-size:18px}
|
||||
.small{font-size:14px;line-height:1.55;color:var(--muted)}
|
||||
.flow{display:grid;grid-template-columns:repeat(5,1fr);gap:12px;margin-top:30px}
|
||||
.flow .step{position:relative;background:#fff;border:1px solid var(--line);border-radius:10px;box-shadow:var(--shadow);padding:18px;min-height:150px}
|
||||
.flow .step h4{margin:8px 0 6px}.flow .step p{font-size:13px;line-height:1.5;color:var(--muted)}
|
||||
.flow .step.hot{border-color:rgba(29,78,216,.45);box-shadow:0 0 0 3px rgba(29,78,216,.10),var(--shadow)}
|
||||
.chunks{display:grid;grid-template-columns:repeat(3,1fr);gap:14px;margin-top:24px}
|
||||
.chunk{border:1px dashed #9fb0c2;background:#fff;border-radius:9px;padding:17px;min-height:122px}
|
||||
.chunk b{color:var(--blue);font-size:14px}.chunk p{font-size:13px;line-height:1.6;color:var(--muted);margin-top:8px}
|
||||
.vector{display:grid;grid-template-columns:repeat(10,1fr);gap:6px;margin-top:14px}
|
||||
.bar{height:44px;border-radius:5px;background:var(--blue);opacity:.28}.bar:nth-child(2n){opacity:.55}.bar:nth-child(3n){opacity:.82}.bar:nth-child(5n){opacity:.42}
|
||||
.search{display:grid;grid-template-columns:1.02fr .98fr;gap:20px;margin-top:26px}
|
||||
.result{display:flex;gap:12px;border:1px solid var(--line);border-radius:9px;background:#fff;padding:14px;margin-top:10px}
|
||||
.rank{width:34px;height:34px;border-radius:8px;background:#edf4ff;color:var(--blue);display:flex;align-items:center;justify-content:center;font-weight:900;flex-shrink:0}
|
||||
.prompt{background:#09213a;color:#e8f1ff;border-radius:12px;box-shadow:var(--shadow);padding:22px 24px;font-family:"SFMono-Regular",Consolas,"Liberation Mono",monospace;font-size:15px;line-height:1.85}
|
||||
.prompt .dim{color:#8fb1d6}.prompt .mark{color:#fde68a}
|
||||
.agent-map{display:grid;grid-template-columns:1fr 46px 1fr 46px 1fr;gap:10px;align-items:center;margin-top:28px}
|
||||
.agent{background:#fff;border:1px solid var(--line);border-radius:10px;box-shadow:var(--shadow);padding:20px;text-align:center;min-height:170px}
|
||||
.agent p{font-size:13px;line-height:1.55;color:var(--muted);margin-top:8px}
|
||||
.summary{display:grid;grid-template-columns:repeat(4,1fr);gap:16px;margin-top:34px}
|
||||
.summary .card{min-height:150px}
|
||||
.learning-map{display:grid;grid-template-columns:1fr 38px 1fr 38px 1fr 38px 1fr 38px 1fr;gap:10px;align-items:stretch;margin-top:34px}
|
||||
.map-card{background:#fff;border:1px solid var(--line);border-radius:12px;box-shadow:var(--shadow);padding:20px 16px;min-height:270px}
|
||||
.map-card .num{margin-bottom:16px}
|
||||
.map-card h3{font-size:21px}
|
||||
.map-list{margin:14px 0 0;padding-left:18px;color:var(--muted);font-size:14px;line-height:1.75}
|
||||
.map-arrow{display:flex;align-items:center;justify-content:center;font-size:28px;color:#9aacbd;font-weight:900}
|
||||
.mini-flow{display:grid;grid-template-columns:1fr 44px 1fr 44px 1fr;gap:10px;align-items:center;margin-top:32px}
|
||||
.mini-flow .node{background:#fff;border:1px solid var(--line);border-radius:10px;box-shadow:var(--shadow);padding:18px;text-align:center;min-height:126px}
|
||||
.mini-flow .node.main{background:var(--blue);color:#fff}
|
||||
.mini-flow .node.main p{color:#dbeafe}
|
||||
.mini-flow p{font-size:13px;line-height:1.5;color:var(--muted);margin-top:6px}
|
||||
.mcp-grid{display:grid;grid-template-columns:1fr 1fr 1fr;gap:16px;margin-top:30px}
|
||||
.mcp-card{background:#fff;border:1px solid var(--line);border-radius:10px;box-shadow:var(--shadow);padding:22px;min-height:205px}
|
||||
.tool-list{display:grid;grid-template-columns:repeat(5,1fr);gap:10px;margin-top:24px}
|
||||
.tool-list span{display:flex;align-items:center;justify-content:center;min-height:58px;border-radius:10px;background:var(--soft);border:1px solid var(--line);font-size:14px;font-weight:800;color:var(--muted)}
|
||||
.notes{display:none}
|
||||
@media (max-width:900px){
|
||||
.slide{padding:62px 28px 54px;overflow:auto;align-items:flex-start}
|
||||
.slide-inner{max-height:none}
|
||||
.topline,.footer{left:28px;right:28px}
|
||||
h1{font-size:42px}h2{font-size:32px}.lead{font-size:18px}
|
||||
.grid2,.grid3,.grid4,.flow,.chunks,.search,.summary,.learning-map,.mcp-grid,.tool-list{grid-template-columns:1fr}
|
||||
.stage,.compare,.agent-map,.mini-flow{grid-template-columns:1fr}
|
||||
.arrow,.map-arrow,.compare .mid{display:none}
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="deck" id="deck">
|
||||
|
||||
<section class="slide active" data-title="封面">
|
||||
<div class="slide-inner">
|
||||
<div class="topline"><span>RAG Primer</span><span>原理和使用</span></div>
|
||||
<p class="kicker">RAG · LLM · Retrieval · Prompt · Agent · MCP</p>
|
||||
<h1>RAG 入门:让 AI <span class="accent">先查资料</span>再回答</h1>
|
||||
<p class="lead">从 LLM 的对话限制出发,理解 RAG 的建库、检索、排序、提示词和 Agent 扩展。</p>
|
||||
<div class="pillbar">
|
||||
<span class="pill">RAG解释</span><span class="pill">LLM解释</span><span class="pill">上下文限制</span><span class="pill">幻觉</span><span class="pill">注意力顺序</span><span class="pill">切片</span><span class="pill">向量化</span><span class="pill">排序</span><span class="pill">语义检索</span><span class="pill">提示词工程</span><span class="pill">AGENTS</span><span class="pill">MCP</span>
|
||||
</div>
|
||||
<div class="footer"><span>RAG 原理和使用</span><span class="page"></span></div>
|
||||
<div class="notes">开场不用讲算法,先讲主线:大模型会说话,但它不是企业知识库。因为它有上下文、幻觉、注意力和顺序方面的限制,所以需要 RAG 这套“先查资料,再组织答案”的机制。</div>
|
||||
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section class="slide" data-title="目录">
|
||||
<div class="slide-inner">
|
||||
<div class="topline"><span>Roadmap</span><span>学习路径</span></div>
|
||||
<p class="kicker">由浅入深</p>
|
||||
<h2>从“会生成”到“会查资料、会调用工具”</h2>
|
||||
<div class="learning-map">
|
||||
<div class="map-card"><div class="num">1</div><h3>基础概念</h3><ul class="map-list"><li>RAG 是什么</li><li>LLM 是什么</li></ul></div>
|
||||
<div class="map-arrow">→</div>
|
||||
<div class="map-card"><div class="num orange">2</div><h3>LLM 限制</h3><ul class="map-list"><li>上下文</li><li>幻觉</li><li>注意力与顺序</li></ul></div>
|
||||
<div class="map-arrow">→</div>
|
||||
<div class="map-card"><div class="num mint">3</div><h3>离线建库</h3><ul class="map-list"><li>资料清洗</li><li>切片</li><li>向量化入库</li></ul></div>
|
||||
<div class="map-arrow">→</div>
|
||||
<div class="map-card"><div class="num">4</div><h3>在线问答</h3><ul class="map-list"><li>语义检索</li><li>排序 / 重排</li><li>提示词工程</li></ul></div>
|
||||
<div class="map-arrow">→</div>
|
||||
<div class="map-card"><div class="num rose">5</div><h3>能力扩展</h3><ul class="map-list"><li>Agents 分工</li><li>MCP 连接工具</li></ul></div>
|
||||
</div>
|
||||
<div class="footer"><span>目录</span><span class="page"></span></div>
|
||||
<div class="notes">这页把路线说清楚。后面每一页都围绕 LLM 限制到 RAG 方案,再到提示词工程和 Agent 的路径推进。</div>
|
||||
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section class="slide" data-title="RAG解释">
|
||||
<div class="slide-inner">
|
||||
<div class="topline"><span>RAG</span><span>检索增强生成</span></div>
|
||||
<p class="kicker">RAG 解释</p>
|
||||
<h2>RAG = 先检索资料,再增强上下文,最后生成答案</h2>
|
||||
<div class="mini-flow">
|
||||
<div class="node"><span class="tag">Retrieval</span><h3 class="mt-sm">检索</h3><p>从知识库找到相关资料。</p></div>
|
||||
<div class="arrow">→</div>
|
||||
<div class="node main"><span class="tag" style="background:rgba(255,255,255,.18);color:#fff">Augmented</span><h3 class="mt-sm">增强</h3><p>把资料放进上下文。</p></div>
|
||||
<div class="arrow">→</div>
|
||||
<div class="node"><span class="tag mint">Generation</span><h3 class="mt-sm">生成</h3><p>LLM 基于资料回答。</p></div>
|
||||
</div>
|
||||
<div class="grid2 mt">
|
||||
<div class="card"><h3>不是训练模型</h3><p class="lead" style="font-size:19px;margin-top:8px">知识不写进模型参数,而是每次回答前动态查资料。</p></div>
|
||||
<div class="card soft"><h3>不是全量塞资料</h3><p class="lead" style="font-size:19px;margin-top:8px">只取与问题相关的片段,降低上下文压力和噪声。</p></div>
|
||||
</div>
|
||||
<div class="footer"><span>RAG = Retrieval-Augmented Generation</span><span class="page"></span></div>
|
||||
<div class="notes">这页讲 RAG 的基本定义。先给出完整定义,再强调 RAG 不是训练模型,也不是把全部资料塞给模型。</div>
|
||||
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section class="slide" data-title="LLM 是什么">
|
||||
<div class="slide-inner">
|
||||
<div class="topline"><span>LLM</span><span>大语言模型</span></div>
|
||||
<p class="kicker">定义</p>
|
||||
<h2>LLM:理解上下文,生成自然语言</h2>
|
||||
<div class="compare">
|
||||
<div class="card">
|
||||
<span class="tag">能力</span>
|
||||
<h3 class="mt-sm">理解上下文,生成答案</h3>
|
||||
<ul>
|
||||
<li>读懂用户问题的大意</li>
|
||||
<li>把零散信息组织成自然语言</li>
|
||||
<li>按要求改写、总结、解释、翻译</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div class="mid">≠</div>
|
||||
<div class="card">
|
||||
<span class="tag rose">边界</span>
|
||||
<h3 class="mt-sm">企业资料库或事实系统</h3>
|
||||
<ul>
|
||||
<li>不会天然知道最新制度</li>
|
||||
<li>不知道内部文档和私有数据</li>
|
||||
<li>不能保证每句话都有来源</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
<div class="card soft mt"><h3>定位</h3><p class="lead" style="font-size:20px;margin-top:8px">LLM 负责读懂和表达,事实依据来自外部资料。</p></div>
|
||||
<div class="footer"><span>LLM 不是知识库</span><span class="page"></span></div>
|
||||
<div class="notes">这里不要把 LLM 讲成搜索引擎。LLM 最强的是语言理解和生成,事实来源要靠外部知识、数据库或工具补充。</div>
|
||||
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section class="slide" data-title="LLM 的限制">
|
||||
<div class="slide-inner">
|
||||
<div class="topline"><span>Limits</span><span>对话限制</span></div>
|
||||
<p class="kicker">对话限制</p>
|
||||
<h2>LLM 的 4 个对话限制</h2>
|
||||
<div class="grid4 mt">
|
||||
<div class="card"><div class="num">1</div><h3>上下文有限</h3><p class="small mt-sm">输入窗口有限;资料过多会变慢、变贵、变乱。</p></div>
|
||||
<div class="card"><div class="num orange">2</div><h3>会有幻觉</h3><p class="small mt-sm">资料不足或指令不清时,会生成看似合理的错误内容。</p></div>
|
||||
<div class="card"><div class="num mint">3</div><h3>专注度下降</h3><p class="small mt-sm">长资料和噪声会稀释重点,关键信息可能被忽略。</p></div>
|
||||
<div class="card"><div class="num rose">4</div><h3>顺序不稳定</h3><p class="small mt-sm">位置、相似内容、前后冲突都会影响答案。</p></div>
|
||||
</div>
|
||||
<div class="card soft mt"><p class="lead" style="font-size:20px;margin-top:0">处理策略:每次只给最相关、最可信的少量资料。</p></div>
|
||||
<div class="footer"><span>上下文 · 幻觉 · 注意力 · 顺序</span><span class="page"></span></div>
|
||||
<div class="notes">这一页要明确回应用户大纲:上下文限制、幻觉、专注度、不会稳定关注内容顺序。它们共同解释了为什么不能简单粗暴地把所有文档塞进去。</div>
|
||||
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section class="slide" data-title="为什么需要 RAG">
|
||||
<div class="slide-inner">
|
||||
<div class="topline"><span>Why RAG</span><span>限制带来方案</span></div>
|
||||
<p class="kicker">从限制到方案</p>
|
||||
<h2>RAG 的核心:不是让模型记住全部知识,而是让它按需查资料</h2>
|
||||
<div class="stage">
|
||||
<div class="box"><span class="tag rose">全量输入</span><h3 class="mt-sm">全部塞给 LLM</h3><p class="small mt-sm">长、乱、贵,容易混入过期和无权限资料。</p></div>
|
||||
<div class="arrow">→</div>
|
||||
<div class="box main"><span class="tag" style="background:rgba(255,255,255,.18);color:#fff">RAG</span><h3 class="mt-sm">先查,再答</h3><p class="small mt-sm">每次只取跟问题最相关的资料片段。</p></div>
|
||||
<div class="arrow">→</div>
|
||||
<div class="box"><span class="tag mint">生成</span><h3 class="mt-sm">基于资料回答</h3><p class="small mt-sm">LLM 根据资料生成,必要时引用来源。</p></div>
|
||||
</div>
|
||||
<div class="card soft mt"><h3>RAG = 检索增强生成</h3><p class="lead" style="font-size:20px;margin-top:8px">检索相关资料 → 放入上下文 → LLM 生成答案。</p></div>
|
||||
<div class="footer"><span>先查资料,再生成</span><span class="page"></span></div>
|
||||
<div class="notes">这页把 RAG 的必要性讲出来:不是因为向量数据库酷,而是因为 LLM 的上下文和注意力有限,必须把资料筛小、筛准,再交给模型。</div>
|
||||
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section class="slide" data-title="后台建库">
|
||||
<div class="slide-inner">
|
||||
<div class="topline"><span>Offline Pipeline</span><span>后台整理资料</span></div>
|
||||
<p class="kicker">离线流程</p>
|
||||
<h2>资料整理成可检索的知识库</h2>
|
||||
<div class="flow">
|
||||
<div class="step"><span class="tag">01</span><h4>收集资料</h4><p>产品手册、FAQ、制度、接口文档、案例、流程说明。</p></div>
|
||||
<div class="step"><span class="tag orange">02</span><h4>清洗资料</h4><p>去掉重复、过期、广告、目录噪声和格式错误。</p></div>
|
||||
<div class="step hot"><span class="tag mint">03</span><h4>切片</h4><p>把长文档拆成能独立表达意思的小片段。</p></div>
|
||||
<div class="step"><span class="tag">04</span><h4>加元数据</h4><p>来源、版本、时间、部门、权限、适用范围。</p></div>
|
||||
<div class="step"><span class="tag">05</span><h4>向量化入库</h4><p>把每个片段变成语义向量,写入向量库。</p></div>
|
||||
</div>
|
||||
<div class="card soft mt"><p class="lead" style="font-size:20px;margin-top:0">资料质量决定检索质量。</p></div>
|
||||
<div class="footer"><span>资料准备 → 切片 → 向量库</span><span class="page"></span></div>
|
||||
<div class="notes">这里是“前期准备”。强调 RAG 不只是问答界面,后台知识库准备很关键。元数据也很重要,因为后面排序和权限过滤会用到。</div>
|
||||
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section class="slide" data-title="切片和向量库">
|
||||
<div class="slide-inner">
|
||||
<div class="topline"><span>Chunk & Embedding</span><span>切片和向量化</span></div>
|
||||
<p class="kicker">索引构建</p>
|
||||
<h2>切片 + 向量化:支持语义检索</h2>
|
||||
<div class="chunks">
|
||||
<div class="chunk"><b>切片 A:退费条件</b><p>购买后 7 天内,且未使用核心服务,可以申请全额退款。</p></div>
|
||||
<div class="chunk"><b>切片 B:不可退场景</b><p>已开具发票、已交付定制服务、超过合同期限,不支持自动退款。</p></div>
|
||||
<div class="chunk"><b>切片 C:审批路径</b><p>超过 5 万元的退款申请,需要客户成功经理和财务双审批。</p></div>
|
||||
</div>
|
||||
<div class="grid2 mt">
|
||||
<div class="card"><span class="tag">切片</span><h3 class="mt-sm">粒度适中</h3><p class="small mt-sm">过大噪声多,过小语义断;每片覆盖一个局部问题。</p></div>
|
||||
<div class="card"><span class="tag mint">向量库</span><h3 class="mt-sm">语义坐标</h3><p class="small mt-sm">文字转换成数字向量;语义相近,距离更近。</p><div class="vector"><i class="bar"></i><i class="bar"></i><i class="bar"></i><i class="bar"></i><i class="bar"></i><i class="bar"></i><i class="bar"></i><i class="bar"></i><i class="bar"></i><i class="bar"></i></div></div>
|
||||
</div>
|
||||
<div class="footer"><span>Chunking + Embedding</span><span class="page"></span></div>
|
||||
<div class="notes">用卡片和地图类比:切片是把书拆成卡片,向量化是给卡片标语义坐标。用户问法不一样,也能找到意思接近的片段。</div>
|
||||
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section class="slide" data-title="用户提问时怎么查">
|
||||
<div class="slide-inner">
|
||||
<div class="topline"><span>Online Retrieval</span><span>检索和排序</span></div>
|
||||
<p class="kicker">在线流程</p>
|
||||
<h2>用户提问后:检索候选,排序后交给 LLM</h2>
|
||||
<div class="flow">
|
||||
<div class="step"><span class="tag">01</span><h4>问题改写</h4><p>把口语问题改成更适合检索的查询。</p></div>
|
||||
<div class="step"><span class="tag">02</span><h4>问题向量化</h4><p>把用户问题也转成语义向量。</p></div>
|
||||
<div class="step"><span class="tag mint">03</span><h4>召回候选</h4><p>从向量库里找语义距离近的片段。</p></div>
|
||||
<div class="step hot"><span class="tag orange">04</span><h4>排序 / 重排</h4><p>按相关性、时效、权限、来源可信度重新排序。</p></div>
|
||||
<div class="step"><span class="tag">05</span><h4>拼上下文</h4><p>只把最有用的几段资料交给 LLM。</p></div>
|
||||
</div>
|
||||
<div class="search">
|
||||
<div class="card"><h3>排序后的候选资料</h3><div class="result"><div class="rank">1</div><div><h4>退款政策 v2026Q2</h4><p class="small">最相关,且版本最新。</p></div></div><div class="result"><div class="rank">2</div><div><h4>大客户审批流程</h4><p class="small">相关,但只在金额超过 5 万时使用。</p></div></div></div>
|
||||
<div class="card soft"><h3>排序作用</h3><ul><li>过期资料降权</li><li>无权限资料过滤</li><li>可信来源优先</li><li>减少噪声,保留重点</li></ul></div>
|
||||
</div>
|
||||
<div class="footer"><span>Retrieve + Rerank</span><span class="page"></span></div>
|
||||
<div class="notes">这里必须引出“排序”概念。召回只是先捞一批候选,排序/重排才决定哪些片段真正进入上下文。排序质量直接影响最终答案。</div>
|
||||
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section class="slide" data-title="提示词工程">
|
||||
<div class="slide-inner">
|
||||
<div class="topline"><span>Prompt Engineering</span><span>系统提示词</span></div>
|
||||
<p class="kicker">生成约束</p>
|
||||
<h2>系统提示词规定 LLM 的资料使用规则</h2>
|
||||
<div class="search">
|
||||
<div class="prompt">
|
||||
<div class="dim">SYSTEM:</div>
|
||||
<div>你是客服助手。只能基于 CONTEXT 回答;资料不足时说“不确定”,不要编造。</div>
|
||||
<br>
|
||||
<div class="dim">CONTEXT:</div>
|
||||
<div class="mark">[退款政策 v2026Q2] 购买后 7 天内且未使用核心服务,可全额退款。</div>
|
||||
<br>
|
||||
<div class="dim">USER:</div>
|
||||
<div>客户买了 3 天,还没使用,可以退吗?</div>
|
||||
</div>
|
||||
<div class="card">
|
||||
<span class="tag">提示词工程</span>
|
||||
<h3 class="mt-sm">规则</h3>
|
||||
<ul>
|
||||
<li>角色:你是谁</li>
|
||||
<li>边界:只能基于资料回答</li>
|
||||
<li>格式:分点、引用来源、给结论</li>
|
||||
<li>兜底:不知道就说不知道</li>
|
||||
<li>安全:不要泄露无权限信息</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
<div class="card soft mt"><p class="lead" style="font-size:20px;margin-top:0">RAG 找资料;提示词规定资料的使用方式。</p></div>
|
||||
<div class="footer"><span>System Prompt + Context + User Question</span><span class="page"></span></div>
|
||||
<div class="notes">这一页承接用户大纲:给到 LLM 的时候需要系统提示词,由此引出提示词工程。提示词工程重点不是花哨话术,而是角色、边界、格式、引用和兜底。</div>
|
||||
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section class="slide" data-title="Agent">
|
||||
<div class="slide-inner">
|
||||
<div class="topline"><span>Agent</span><span>复杂任务的分工协作</span></div>
|
||||
<p class="kicker">复杂任务</p>
|
||||
<h2>复杂任务:多步骤、多角色、多 LLM 协作</h2>
|
||||
<div class="agent-map">
|
||||
<div class="agent"><span class="tag">规划者</span><h3 class="mt-sm">拆任务</h3><p>规划检索、工具调用和结果组织。</p></div>
|
||||
<div class="arrow">→</div>
|
||||
<div class="agent"><span class="tag mint">检索者</span><h3 class="mt-sm">查资料</h3><p>使用 RAG 从知识库里找依据,必要时多轮检索。</p></div>
|
||||
<div class="arrow">→</div>
|
||||
<div class="agent"><span class="tag orange">执行者</span><h3 class="mt-sm">调用工具</h3><p>查订单、建工单、读数据库、调用业务系统接口。</p></div>
|
||||
</div>
|
||||
<div class="grid2 mt">
|
||||
<div class="card"><h3>Agent</h3><p class="lead" style="font-size:19px;margin-top:8px">目标驱动流程:规划步骤、选择工具、读取结果、继续推进。</p></div>
|
||||
<div class="card soft"><h3>和 RAG 的关系</h3><p class="lead" style="font-size:19px;margin-top:8px">RAG 提供知识入口;Agent 负责任务编排。</p></div>
|
||||
</div>
|
||||
<div class="footer"><span>RAG 是知识入口,Agent 是任务编排</span><span class="page"></span></div>
|
||||
<div class="notes">不要把 Agent 讲玄。它就是更复杂任务里的规划和编排:可能一个 LLM 规划,一个 LLM 检索,一个 LLM 写答案,也可能调用外部工具。</div>
|
||||
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section class="slide" data-title="MCP">
|
||||
<div class="slide-inner">
|
||||
<div class="topline"><span>MCP</span><span>工具连接</span></div>
|
||||
<p class="kicker">MCP</p>
|
||||
<h2>MCP:让 Agent 稳定连接外部工具和业务系统</h2>
|
||||
<div class="mcp-grid">
|
||||
<div class="mcp-card"><span class="tag">统一接口</span><h3 class="mt-sm">工具接入规范</h3><p class="small mt-sm">把不同系统的能力包装成模型可调用的工具。</p></div>
|
||||
<div class="mcp-card"><span class="tag mint">上下文供给</span><h3 class="mt-sm">读取外部信息</h3><p class="small mt-sm">查数据库、读文件、取工单、访问知识系统。</p></div>
|
||||
<div class="mcp-card"><span class="tag orange">动作执行</span><h3 class="mt-sm">调用业务能力</h3><p class="small mt-sm">创建工单、查询订单、发送通知、写入结果。</p></div>
|
||||
</div>
|
||||
<div class="tool-list">
|
||||
<span>CRM</span><span>工单</span><span>数据库</span><span>搜索</span><span>文件</span>
|
||||
</div>
|
||||
<div class="card soft mt"><p class="lead" style="font-size:20px;margin-top:0">RAG 负责查知识;Agent 负责编排任务;MCP 负责连接工具。</p></div>
|
||||
<div class="footer"><span>RAG · Agent · MCP</span><span class="page"></span></div>
|
||||
<div class="notes">这页讲 MCP。它不需要展开协议细节,只要说明 MCP 是让模型/Agent 稳定连接外部工具和系统的接口层。</div>
|
||||
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section class="slide" data-title="总结">
|
||||
<div class="slide-inner">
|
||||
<div class="topline"><span>Takeaway</span><span>关键链路</span></div>
|
||||
<p class="kicker">核心链路</p>
|
||||
<h2>RAG → LLM 限制 → 切片 → 向量化 → 语义检索 → 排序 → 提示词 → Agent → MCP</h2>
|
||||
<div class="summary">
|
||||
<div class="card"><span class="tag rose">限制</span><h3 class="mt-sm">LLM 不能吃下全部知识</h3><p class="small mt-sm">上下文有限、会幻觉、专注度和顺序都不稳定。</p></div>
|
||||
<div class="card"><span class="tag mint">建库</span><h3 class="mt-sm">资料要先整理成片段</h3><p class="small mt-sm">清洗、切片、向量化、加元数据,再放入向量库。</p></div>
|
||||
<div class="card"><span class="tag orange">检索</span><h3 class="mt-sm">提问时先找资料</h3><p class="small mt-sm">召回候选,再排序过滤,把最相关内容放进上下文。</p></div>
|
||||
<div class="card"><span class="tag">扩展</span><h3 class="mt-sm">Agent + MCP</h3><p class="small mt-sm">Agent 编排多步骤任务;MCP 连接外部工具和系统。</p></div>
|
||||
</div>
|
||||
<div class="card soft mt"><p class="lead" style="font-size:21px;margin-top:0">RAG 不是替代 LLM,而是给 LLM 配一套“会查资料的工作台”。</p></div>
|
||||
<div class="footer"><span>End</span><span class="page"></span></div>
|
||||
<div class="notes">收尾不要再加新概念。重复主线:LLM 有限制,所以需要 RAG;RAG 后台建库,前台检索排序,再通过提示词交给 LLM;复杂任务用 Agent。</div>
|
||||
|
||||
</div>
|
||||
</section>
|
||||
|
||||
</div>
|
||||
<div class="progress"><span id="progress"></span></div>
|
||||
<div class="nav-controls" aria-label="翻页导航">
|
||||
<button class="nav-btn" id="prevBtn" type="button" aria-label="上一页">‹</button>
|
||||
<span class="nav-index" id="navIndex">1 / 13</span>
|
||||
<button class="nav-btn" id="nextBtn" type="button" aria-label="下一页">›</button>
|
||||
</div>
|
||||
<script>
|
||||
(function(){
|
||||
const slides = Array.from(document.querySelectorAll('.slide'));
|
||||
const progress = document.getElementById('progress');
|
||||
const prevBtn = document.getElementById('prevBtn');
|
||||
const nextBtn = document.getElementById('nextBtn');
|
||||
const navIndex = document.getElementById('navIndex');
|
||||
let idx = 0;
|
||||
function render(){
|
||||
slides.forEach((s,i)=>s.classList.toggle('active',i===idx));
|
||||
document.querySelectorAll('.page').forEach(el=>{el.textContent=(idx+1)+' / '+slides.length});
|
||||
navIndex.textContent = (idx+1)+' / '+slides.length;
|
||||
prevBtn.disabled = idx === 0;
|
||||
nextBtn.disabled = idx === slides.length - 1;
|
||||
progress.style.width = ((idx+1)/slides.length*100)+'%';
|
||||
if(location.hash !== '#/'+(idx+1)) history.replaceState(null,'','#/'+(idx+1));
|
||||
}
|
||||
function go(n){idx=Math.max(0,Math.min(slides.length-1,n));render()}
|
||||
function fromHash(){
|
||||
const m = location.hash.match(/#\/(\d+)/);
|
||||
if(m) idx = Math.max(0,Math.min(slides.length-1,Number(m[1])-1));
|
||||
render();
|
||||
}
|
||||
window.addEventListener('hashchange',fromHash);
|
||||
prevBtn.addEventListener('click',()=>go(idx-1));
|
||||
nextBtn.addEventListener('click',()=>go(idx+1));
|
||||
document.addEventListener('keydown',e=>{
|
||||
if(['ArrowRight','PageDown',' '].includes(e.key)){e.preventDefault();go(idx+1)}
|
||||
if(['ArrowLeft','PageUp'].includes(e.key)){e.preventDefault();go(idx-1)}
|
||||
if(e.key==='Home'){e.preventDefault();go(0)}
|
||||
if(e.key==='End'){e.preventDefault();go(slides.length-1)}
|
||||
if(e.key==='f'||e.key==='F'){document.documentElement.requestFullscreen?.()}
|
||||
});
|
||||
fromHash();
|
||||
})();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
447
public/rag-for-pm.html
Normal file
447
public/rag-for-pm.html
Normal file
@@ -0,0 +1,447 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="zh-CN">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width,initial-scale=1">
|
||||
<title>RAG 入门:原理、流程与使用</title>
|
||||
<style>
|
||||
:root{
|
||||
--bg:#f6f8fb;
|
||||
--paper:#ffffff;
|
||||
--ink:#0a2540;
|
||||
--muted:#486176;
|
||||
--soft:#eef3f8;
|
||||
--line:#d9e2ec;
|
||||
--blue:#1d4ed8;
|
||||
--mint:#0f9f8f;
|
||||
--orange:#d97706;
|
||||
--rose:#e11d48;
|
||||
--shadow:0 14px 34px rgba(10,37,64,.10),0 2px 7px rgba(10,37,64,.05);
|
||||
}
|
||||
*{box-sizing:border-box}
|
||||
html,body{margin:0;width:100%;height:100%;overflow:hidden;background:var(--bg);color:var(--ink);font-family:-apple-system,BlinkMacSystemFont,"Segoe UI","Noto Sans SC","Microsoft YaHei",Arial,sans-serif}
|
||||
.deck{position:relative;width:100vw;height:100vh;overflow:hidden;background:linear-gradient(135deg,#f8fafc,#eef3f8)}
|
||||
.slide{position:absolute;inset:0;display:none;padding:72px 88px;background:radial-gradient(circle at 85% 12%,rgba(29,78,216,.08),transparent 28%),var(--paper);align-items:center;justify-content:center}
|
||||
.slide.active{display:flex}
|
||||
.slide-inner{width:100%;max-height:calc(100vh - 150px)}
|
||||
.slide::before{content:"";position:absolute;left:0;top:0;bottom:0;width:8px;background:linear-gradient(180deg,var(--blue),var(--mint))}
|
||||
.topline{position:absolute;left:88px;right:88px;top:28px;display:flex;justify-content:space-between;align-items:center;font-size:12px;letter-spacing:.14em;text-transform:uppercase;color:#8aa0b5}
|
||||
.footer{position:absolute;left:88px;right:88px;bottom:24px;display:flex;justify-content:space-between;color:#8aa0b5;font-size:12px}
|
||||
.progress{position:fixed;left:0;right:0;bottom:0;height:4px;background:#dbe4ee;z-index:20}
|
||||
.progress span{display:block;height:100%;width:0;background:linear-gradient(90deg,var(--blue),var(--mint));transition:width .2s ease}
|
||||
.nav-controls{position:fixed;right:28px;bottom:26px;z-index:30;display:flex;align-items:center;gap:8px;padding:8px;border:1px solid rgba(10,37,64,.12);border-radius:999px;background:rgba(255,255,255,.92);box-shadow:0 8px 26px rgba(10,37,64,.14);backdrop-filter:blur(10px)}
|
||||
.nav-btn{width:38px;height:38px;border:0;border-radius:50%;background:#0a2540;color:#fff;font-size:22px;line-height:1;display:flex;align-items:center;justify-content:center;cursor:pointer}
|
||||
.nav-btn:disabled{opacity:.32;cursor:not-allowed}
|
||||
.nav-index{min-width:58px;text-align:center;font-size:13px;font-weight:800;color:var(--muted)}
|
||||
.kicker{margin:0 0 12px;color:var(--blue);font-size:15px;font-weight:700;letter-spacing:.1em;text-transform:uppercase}
|
||||
h1,h2,h3,h4,p{margin:0}
|
||||
h1{font-size:68px;line-height:1.05;letter-spacing:0;font-weight:800;max-width:980px}
|
||||
h2{font-size:44px;line-height:1.15;letter-spacing:0;font-weight:760;max-width:1040px}
|
||||
h3{font-size:24px;line-height:1.25;font-weight:740}
|
||||
h4{font-size:18px;line-height:1.3;font-weight:730}
|
||||
.lead{font-size:23px;line-height:1.55;color:var(--muted);max-width:900px;margin-top:22px}
|
||||
.muted{color:var(--muted)}
|
||||
.accent{background:linear-gradient(135deg,var(--blue),var(--mint));-webkit-background-clip:text;background-clip:text;color:transparent}
|
||||
.row{display:flex;gap:16px;align-items:stretch}
|
||||
.grid2{display:grid;grid-template-columns:repeat(2,1fr);gap:18px}
|
||||
.grid3{display:grid;grid-template-columns:repeat(3,1fr);gap:18px}
|
||||
.grid4{display:grid;grid-template-columns:repeat(4,1fr);gap:16px}
|
||||
.mt{margin-top:30px}.mt-sm{margin-top:16px}.mt-lg{margin-top:42px}
|
||||
.card{background:#fff;border:1px solid var(--line);border-radius:10px;box-shadow:var(--shadow);padding:22px}
|
||||
.card.soft{background:var(--soft);box-shadow:none}
|
||||
.tag{display:inline-flex;align-items:center;justify-content:center;border-radius:999px;padding:5px 12px;background:#edf4ff;color:var(--blue);font-size:12px;font-weight:800}
|
||||
.tag.mint{background:#e8faf7;color:var(--mint)}
|
||||
.tag.orange{background:#fff4df;color:var(--orange)}
|
||||
.tag.rose{background:#fff0f4;color:var(--rose)}
|
||||
.num{width:40px;height:40px;border-radius:9px;background:var(--blue);color:#fff;display:flex;align-items:center;justify-content:center;font-weight:800;margin-bottom:14px}
|
||||
.num.mint{background:var(--mint)}.num.orange{background:var(--orange)}.num.rose{background:var(--rose)}
|
||||
.pillbar{display:flex;flex-wrap:wrap;gap:10px;margin-top:32px}
|
||||
.pill{border:1px solid var(--line);background:#fff;border-radius:999px;padding:8px 14px;font-size:14px;color:var(--muted);font-weight:650}
|
||||
.stage{display:grid;grid-template-columns:1fr 52px 1fr 52px 1fr;gap:10px;align-items:center;margin-top:38px}
|
||||
.stage .box{text-align:center;padding:28px 18px;border:1px solid var(--line);border-radius:12px;background:#fff;box-shadow:var(--shadow)}
|
||||
.stage .box.main{background:var(--blue);color:#fff}
|
||||
.stage .box.main p{color:#dbeafe}
|
||||
.arrow{font-size:34px;color:#93a4b5;text-align:center;font-weight:900}
|
||||
.compare{display:grid;grid-template-columns:1fr 88px 1fr;gap:22px;align-items:center;margin-top:34px}
|
||||
.compare .mid{text-align:center;font-size:42px;color:#9aacbd;font-weight:900}
|
||||
ul{margin:14px 0 0;padding-left:22px;color:var(--muted);line-height:1.85;font-size:18px}
|
||||
.small{font-size:14px;line-height:1.55;color:var(--muted)}
|
||||
.flow{display:grid;grid-template-columns:repeat(5,1fr);gap:12px;margin-top:30px}
|
||||
.flow .step{position:relative;background:#fff;border:1px solid var(--line);border-radius:10px;box-shadow:var(--shadow);padding:18px;min-height:150px}
|
||||
.flow .step h4{margin:8px 0 6px}.flow .step p{font-size:13px;line-height:1.5;color:var(--muted)}
|
||||
.flow .step.hot{border-color:rgba(29,78,216,.45);box-shadow:0 0 0 3px rgba(29,78,216,.10),var(--shadow)}
|
||||
.chunks{display:grid;grid-template-columns:repeat(3,1fr);gap:14px;margin-top:24px}
|
||||
.chunk{border:1px dashed #9fb0c2;background:#fff;border-radius:9px;padding:17px;min-height:122px}
|
||||
.chunk b{color:var(--blue);font-size:14px}.chunk p{font-size:13px;line-height:1.6;color:var(--muted);margin-top:8px}
|
||||
.vector{display:grid;grid-template-columns:repeat(10,1fr);gap:6px;margin-top:14px}
|
||||
.bar{height:44px;border-radius:5px;background:var(--blue);opacity:.28}.bar:nth-child(2n){opacity:.55}.bar:nth-child(3n){opacity:.82}.bar:nth-child(5n){opacity:.42}
|
||||
.search{display:grid;grid-template-columns:1.02fr .98fr;gap:20px;margin-top:26px}
|
||||
.result{display:flex;gap:12px;border:1px solid var(--line);border-radius:9px;background:#fff;padding:14px;margin-top:10px}
|
||||
.rank{width:34px;height:34px;border-radius:8px;background:#edf4ff;color:var(--blue);display:flex;align-items:center;justify-content:center;font-weight:900;flex-shrink:0}
|
||||
.prompt{background:#09213a;color:#e8f1ff;border-radius:12px;box-shadow:var(--shadow);padding:22px 24px;font-family:"SFMono-Regular",Consolas,"Liberation Mono",monospace;font-size:15px;line-height:1.85}
|
||||
.prompt .dim{color:#8fb1d6}.prompt .mark{color:#fde68a}
|
||||
.agent-map{display:grid;grid-template-columns:1fr 46px 1fr 46px 1fr;gap:10px;align-items:center;margin-top:28px}
|
||||
.agent{background:#fff;border:1px solid var(--line);border-radius:10px;box-shadow:var(--shadow);padding:20px;text-align:center;min-height:170px}
|
||||
.agent p{font-size:13px;line-height:1.55;color:var(--muted);margin-top:8px}
|
||||
.summary{display:grid;grid-template-columns:repeat(4,1fr);gap:16px;margin-top:34px}
|
||||
.summary .card{min-height:150px}
|
||||
.learning-map{display:grid;grid-template-columns:1fr 38px 1fr 38px 1fr 38px 1fr 38px 1fr;gap:10px;align-items:stretch;margin-top:34px}
|
||||
.map-card{background:#fff;border:1px solid var(--line);border-radius:12px;box-shadow:var(--shadow);padding:20px 16px;min-height:270px}
|
||||
.map-card .num{margin-bottom:16px}
|
||||
.map-card h3{font-size:21px}
|
||||
.map-list{margin:14px 0 0;padding-left:18px;color:var(--muted);font-size:14px;line-height:1.75}
|
||||
.map-arrow{display:flex;align-items:center;justify-content:center;font-size:28px;color:#9aacbd;font-weight:900}
|
||||
.mini-flow{display:grid;grid-template-columns:1fr 44px 1fr 44px 1fr;gap:10px;align-items:center;margin-top:32px}
|
||||
.mini-flow .node{background:#fff;border:1px solid var(--line);border-radius:10px;box-shadow:var(--shadow);padding:18px;text-align:center;min-height:126px}
|
||||
.mini-flow .node.main{background:var(--blue);color:#fff}
|
||||
.mini-flow .node.main p{color:#dbeafe}
|
||||
.mini-flow p{font-size:13px;line-height:1.5;color:var(--muted);margin-top:6px}
|
||||
.mcp-grid{display:grid;grid-template-columns:1fr 1fr 1fr;gap:16px;margin-top:30px}
|
||||
.mcp-card{background:#fff;border:1px solid var(--line);border-radius:10px;box-shadow:var(--shadow);padding:22px;min-height:205px}
|
||||
.tool-list{display:grid;grid-template-columns:repeat(5,1fr);gap:10px;margin-top:24px}
|
||||
.tool-list span{display:flex;align-items:center;justify-content:center;min-height:58px;border-radius:10px;background:var(--soft);border:1px solid var(--line);font-size:14px;font-weight:800;color:var(--muted)}
|
||||
.notes{display:none}
|
||||
@media (max-width:900px){
|
||||
.slide{padding:62px 28px 54px;overflow:auto;align-items:flex-start}
|
||||
.slide-inner{max-height:none}
|
||||
.topline,.footer{left:28px;right:28px}
|
||||
h1{font-size:42px}h2{font-size:32px}.lead{font-size:18px}
|
||||
.grid2,.grid3,.grid4,.flow,.chunks,.search,.summary,.learning-map,.mcp-grid,.tool-list{grid-template-columns:1fr}
|
||||
.stage,.compare,.agent-map,.mini-flow{grid-template-columns:1fr}
|
||||
.arrow,.map-arrow,.compare .mid{display:none}
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="deck" id="deck">
|
||||
|
||||
<section class="slide active" data-title="封面">
|
||||
<div class="slide-inner">
|
||||
<div class="topline"><span>RAG Primer</span><span>原理和使用</span></div>
|
||||
<p class="kicker">RAG · LLM · Retrieval · Prompt · Agent · MCP</p>
|
||||
<h1>RAG 入门:让 AI <span class="accent">先查资料</span>再回答</h1>
|
||||
<p class="lead">从 LLM 的对话限制出发,理解 RAG 的建库、检索、排序、提示词和 Agent 扩展。</p>
|
||||
<div class="pillbar">
|
||||
<span class="pill">RAG解释</span><span class="pill">LLM解释</span><span class="pill">上下文限制</span><span class="pill">幻觉</span><span class="pill">注意力顺序</span><span class="pill">切片</span><span class="pill">向量化</span><span class="pill">排序</span><span class="pill">语义检索</span><span class="pill">提示词工程</span><span class="pill">AGENTS</span><span class="pill">MCP</span>
|
||||
</div>
|
||||
<div class="footer"><span>RAG 原理和使用</span><span class="page"></span></div>
|
||||
<div class="notes">开场不用讲算法,先讲主线:大模型会说话,但它不是企业知识库。因为它有上下文、幻觉、注意力和顺序方面的限制,所以需要 RAG 这套“先查资料,再组织答案”的机制。</div>
|
||||
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section class="slide" data-title="目录">
|
||||
<div class="slide-inner">
|
||||
<div class="topline"><span>Roadmap</span><span>学习路径</span></div>
|
||||
<p class="kicker">由浅入深</p>
|
||||
<h2>从“会生成”到“会查资料、会调用工具”</h2>
|
||||
<div class="learning-map">
|
||||
<div class="map-card"><div class="num">1</div><h3>基础概念</h3><ul class="map-list"><li>RAG 是什么</li><li>LLM 是什么</li></ul></div>
|
||||
<div class="map-arrow">→</div>
|
||||
<div class="map-card"><div class="num orange">2</div><h3>LLM 限制</h3><ul class="map-list"><li>上下文</li><li>幻觉</li><li>注意力与顺序</li></ul></div>
|
||||
<div class="map-arrow">→</div>
|
||||
<div class="map-card"><div class="num mint">3</div><h3>离线建库</h3><ul class="map-list"><li>资料清洗</li><li>切片</li><li>向量化入库</li></ul></div>
|
||||
<div class="map-arrow">→</div>
|
||||
<div class="map-card"><div class="num">4</div><h3>在线问答</h3><ul class="map-list"><li>语义检索</li><li>排序 / 重排</li><li>提示词工程</li></ul></div>
|
||||
<div class="map-arrow">→</div>
|
||||
<div class="map-card"><div class="num rose">5</div><h3>能力扩展</h3><ul class="map-list"><li>Agents 分工</li><li>MCP 连接工具</li></ul></div>
|
||||
</div>
|
||||
<div class="footer"><span>目录</span><span class="page"></span></div>
|
||||
<div class="notes">这页把路线说清楚。后面每一页都围绕 LLM 限制到 RAG 方案,再到提示词工程和 Agent 的路径推进。</div>
|
||||
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section class="slide" data-title="RAG解释">
|
||||
<div class="slide-inner">
|
||||
<div class="topline"><span>RAG</span><span>检索增强生成</span></div>
|
||||
<p class="kicker">RAG 解释</p>
|
||||
<h2>RAG = 先检索资料,再增强上下文,最后生成答案</h2>
|
||||
<div class="mini-flow">
|
||||
<div class="node"><span class="tag">Retrieval</span><h3 class="mt-sm">检索</h3><p>从知识库找到相关资料。</p></div>
|
||||
<div class="arrow">→</div>
|
||||
<div class="node main"><span class="tag" style="background:rgba(255,255,255,.18);color:#fff">Augmented</span><h3 class="mt-sm">增强</h3><p>把资料放进上下文。</p></div>
|
||||
<div class="arrow">→</div>
|
||||
<div class="node"><span class="tag mint">Generation</span><h3 class="mt-sm">生成</h3><p>LLM 基于资料回答。</p></div>
|
||||
</div>
|
||||
<div class="grid2 mt">
|
||||
<div class="card"><h3>不是训练模型</h3><p class="lead" style="font-size:19px;margin-top:8px">知识不写进模型参数,而是每次回答前动态查资料。</p></div>
|
||||
<div class="card soft"><h3>不是全量塞资料</h3><p class="lead" style="font-size:19px;margin-top:8px">只取与问题相关的片段,降低上下文压力和噪声。</p></div>
|
||||
</div>
|
||||
<div class="footer"><span>RAG = Retrieval-Augmented Generation</span><span class="page"></span></div>
|
||||
<div class="notes">这页讲 RAG 的基本定义。先给出完整定义,再强调 RAG 不是训练模型,也不是把全部资料塞给模型。</div>
|
||||
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section class="slide" data-title="LLM 是什么">
|
||||
<div class="slide-inner">
|
||||
<div class="topline"><span>LLM</span><span>大语言模型</span></div>
|
||||
<p class="kicker">定义</p>
|
||||
<h2>LLM:理解上下文,生成自然语言</h2>
|
||||
<div class="compare">
|
||||
<div class="card">
|
||||
<span class="tag">能力</span>
|
||||
<h3 class="mt-sm">理解上下文,生成答案</h3>
|
||||
<ul>
|
||||
<li>读懂用户问题的大意</li>
|
||||
<li>把零散信息组织成自然语言</li>
|
||||
<li>按要求改写、总结、解释、翻译</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div class="mid">≠</div>
|
||||
<div class="card">
|
||||
<span class="tag rose">边界</span>
|
||||
<h3 class="mt-sm">企业资料库或事实系统</h3>
|
||||
<ul>
|
||||
<li>不会天然知道最新制度</li>
|
||||
<li>不知道内部文档和私有数据</li>
|
||||
<li>不能保证每句话都有来源</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
<div class="card soft mt"><h3>定位</h3><p class="lead" style="font-size:20px;margin-top:8px">LLM 负责读懂和表达,事实依据来自外部资料。</p></div>
|
||||
<div class="footer"><span>LLM 不是知识库</span><span class="page"></span></div>
|
||||
<div class="notes">这里不要把 LLM 讲成搜索引擎。LLM 最强的是语言理解和生成,事实来源要靠外部知识、数据库或工具补充。</div>
|
||||
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section class="slide" data-title="LLM 的限制">
|
||||
<div class="slide-inner">
|
||||
<div class="topline"><span>Limits</span><span>对话限制</span></div>
|
||||
<p class="kicker">对话限制</p>
|
||||
<h2>LLM 的 4 个对话限制</h2>
|
||||
<div class="grid4 mt">
|
||||
<div class="card"><div class="num">1</div><h3>上下文有限</h3><p class="small mt-sm">输入窗口有限;资料过多会变慢、变贵、变乱。</p></div>
|
||||
<div class="card"><div class="num orange">2</div><h3>会有幻觉</h3><p class="small mt-sm">资料不足或指令不清时,会生成看似合理的错误内容。</p></div>
|
||||
<div class="card"><div class="num mint">3</div><h3>专注度下降</h3><p class="small mt-sm">长资料和噪声会稀释重点,关键信息可能被忽略。</p></div>
|
||||
<div class="card"><div class="num rose">4</div><h3>顺序不稳定</h3><p class="small mt-sm">位置、相似内容、前后冲突都会影响答案。</p></div>
|
||||
</div>
|
||||
<div class="card soft mt"><p class="lead" style="font-size:20px;margin-top:0">处理策略:每次只给最相关、最可信的少量资料。</p></div>
|
||||
<div class="footer"><span>上下文 · 幻觉 · 注意力 · 顺序</span><span class="page"></span></div>
|
||||
<div class="notes">这一页要明确回应用户大纲:上下文限制、幻觉、专注度、不会稳定关注内容顺序。它们共同解释了为什么不能简单粗暴地把所有文档塞进去。</div>
|
||||
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section class="slide" data-title="为什么需要 RAG">
|
||||
<div class="slide-inner">
|
||||
<div class="topline"><span>Why RAG</span><span>限制带来方案</span></div>
|
||||
<p class="kicker">从限制到方案</p>
|
||||
<h2>RAG 的核心:不是让模型记住全部知识,而是让它按需查资料</h2>
|
||||
<div class="stage">
|
||||
<div class="box"><span class="tag rose">全量输入</span><h3 class="mt-sm">全部塞给 LLM</h3><p class="small mt-sm">长、乱、贵,容易混入过期和无权限资料。</p></div>
|
||||
<div class="arrow">→</div>
|
||||
<div class="box main"><span class="tag" style="background:rgba(255,255,255,.18);color:#fff">RAG</span><h3 class="mt-sm">先查,再答</h3><p class="small mt-sm">每次只取跟问题最相关的资料片段。</p></div>
|
||||
<div class="arrow">→</div>
|
||||
<div class="box"><span class="tag mint">生成</span><h3 class="mt-sm">基于资料回答</h3><p class="small mt-sm">LLM 根据资料生成,必要时引用来源。</p></div>
|
||||
</div>
|
||||
<div class="card soft mt"><h3>RAG = 检索增强生成</h3><p class="lead" style="font-size:20px;margin-top:8px">检索相关资料 → 放入上下文 → LLM 生成答案。</p></div>
|
||||
<div class="footer"><span>先查资料,再生成</span><span class="page"></span></div>
|
||||
<div class="notes">这页把 RAG 的必要性讲出来:不是因为向量数据库酷,而是因为 LLM 的上下文和注意力有限,必须把资料筛小、筛准,再交给模型。</div>
|
||||
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section class="slide" data-title="后台建库">
|
||||
<div class="slide-inner">
|
||||
<div class="topline"><span>Offline Pipeline</span><span>后台整理资料</span></div>
|
||||
<p class="kicker">离线流程</p>
|
||||
<h2>资料整理成可检索的知识库</h2>
|
||||
<div class="flow">
|
||||
<div class="step"><span class="tag">01</span><h4>收集资料</h4><p>产品手册、FAQ、制度、接口文档、案例、流程说明。</p></div>
|
||||
<div class="step"><span class="tag orange">02</span><h4>清洗资料</h4><p>去掉重复、过期、广告、目录噪声和格式错误。</p></div>
|
||||
<div class="step hot"><span class="tag mint">03</span><h4>切片</h4><p>把长文档拆成能独立表达意思的小片段。</p></div>
|
||||
<div class="step"><span class="tag">04</span><h4>加元数据</h4><p>来源、版本、时间、部门、权限、适用范围。</p></div>
|
||||
<div class="step"><span class="tag">05</span><h4>向量化入库</h4><p>把每个片段变成语义向量,写入向量库。</p></div>
|
||||
</div>
|
||||
<div class="card soft mt"><p class="lead" style="font-size:20px;margin-top:0">资料质量决定检索质量。</p></div>
|
||||
<div class="footer"><span>资料准备 → 切片 → 向量库</span><span class="page"></span></div>
|
||||
<div class="notes">这里是“前期准备”。强调 RAG 不只是问答界面,后台知识库准备很关键。元数据也很重要,因为后面排序和权限过滤会用到。</div>
|
||||
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section class="slide" data-title="切片和向量库">
|
||||
<div class="slide-inner">
|
||||
<div class="topline"><span>Chunk & Embedding</span><span>切片和向量化</span></div>
|
||||
<p class="kicker">索引构建</p>
|
||||
<h2>切片 + 向量化:支持语义检索</h2>
|
||||
<div class="chunks">
|
||||
<div class="chunk"><b>切片 A:退费条件</b><p>购买后 7 天内,且未使用核心服务,可以申请全额退款。</p></div>
|
||||
<div class="chunk"><b>切片 B:不可退场景</b><p>已开具发票、已交付定制服务、超过合同期限,不支持自动退款。</p></div>
|
||||
<div class="chunk"><b>切片 C:审批路径</b><p>超过 5 万元的退款申请,需要客户成功经理和财务双审批。</p></div>
|
||||
</div>
|
||||
<div class="grid2 mt">
|
||||
<div class="card"><span class="tag">切片</span><h3 class="mt-sm">粒度适中</h3><p class="small mt-sm">过大噪声多,过小语义断;每片覆盖一个局部问题。</p></div>
|
||||
<div class="card"><span class="tag mint">向量库</span><h3 class="mt-sm">语义坐标</h3><p class="small mt-sm">文字转换成数字向量;语义相近,距离更近。</p><div class="vector"><i class="bar"></i><i class="bar"></i><i class="bar"></i><i class="bar"></i><i class="bar"></i><i class="bar"></i><i class="bar"></i><i class="bar"></i><i class="bar"></i><i class="bar"></i></div></div>
|
||||
</div>
|
||||
<div class="footer"><span>Chunking + Embedding</span><span class="page"></span></div>
|
||||
<div class="notes">用卡片和地图类比:切片是把书拆成卡片,向量化是给卡片标语义坐标。用户问法不一样,也能找到意思接近的片段。</div>
|
||||
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section class="slide" data-title="用户提问时怎么查">
|
||||
<div class="slide-inner">
|
||||
<div class="topline"><span>Online Retrieval</span><span>检索和排序</span></div>
|
||||
<p class="kicker">在线流程</p>
|
||||
<h2>用户提问后:检索候选,排序后交给 LLM</h2>
|
||||
<div class="flow">
|
||||
<div class="step"><span class="tag">01</span><h4>问题改写</h4><p>把口语问题改成更适合检索的查询。</p></div>
|
||||
<div class="step"><span class="tag">02</span><h4>问题向量化</h4><p>把用户问题也转成语义向量。</p></div>
|
||||
<div class="step"><span class="tag mint">03</span><h4>召回候选</h4><p>从向量库里找语义距离近的片段。</p></div>
|
||||
<div class="step hot"><span class="tag orange">04</span><h4>排序 / 重排</h4><p>按相关性、时效、权限、来源可信度重新排序。</p></div>
|
||||
<div class="step"><span class="tag">05</span><h4>拼上下文</h4><p>只把最有用的几段资料交给 LLM。</p></div>
|
||||
</div>
|
||||
<div class="search">
|
||||
<div class="card"><h3>排序后的候选资料</h3><div class="result"><div class="rank">1</div><div><h4>退款政策 v2026Q2</h4><p class="small">最相关,且版本最新。</p></div></div><div class="result"><div class="rank">2</div><div><h4>大客户审批流程</h4><p class="small">相关,但只在金额超过 5 万时使用。</p></div></div></div>
|
||||
<div class="card soft"><h3>排序作用</h3><ul><li>过期资料降权</li><li>无权限资料过滤</li><li>可信来源优先</li><li>减少噪声,保留重点</li></ul></div>
|
||||
</div>
|
||||
<div class="footer"><span>Retrieve + Rerank</span><span class="page"></span></div>
|
||||
<div class="notes">这里必须引出“排序”概念。召回只是先捞一批候选,排序/重排才决定哪些片段真正进入上下文。排序质量直接影响最终答案。</div>
|
||||
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section class="slide" data-title="提示词工程">
|
||||
<div class="slide-inner">
|
||||
<div class="topline"><span>Prompt Engineering</span><span>系统提示词</span></div>
|
||||
<p class="kicker">生成约束</p>
|
||||
<h2>系统提示词规定 LLM 的资料使用规则</h2>
|
||||
<div class="search">
|
||||
<div class="prompt">
|
||||
<div class="dim">SYSTEM:</div>
|
||||
<div>你是客服助手。只能基于 CONTEXT 回答;资料不足时说“不确定”,不要编造。</div>
|
||||
<br>
|
||||
<div class="dim">CONTEXT:</div>
|
||||
<div class="mark">[退款政策 v2026Q2] 购买后 7 天内且未使用核心服务,可全额退款。</div>
|
||||
<br>
|
||||
<div class="dim">USER:</div>
|
||||
<div>客户买了 3 天,还没使用,可以退吗?</div>
|
||||
</div>
|
||||
<div class="card">
|
||||
<span class="tag">提示词工程</span>
|
||||
<h3 class="mt-sm">规则</h3>
|
||||
<ul>
|
||||
<li>角色:你是谁</li>
|
||||
<li>边界:只能基于资料回答</li>
|
||||
<li>格式:分点、引用来源、给结论</li>
|
||||
<li>兜底:不知道就说不知道</li>
|
||||
<li>安全:不要泄露无权限信息</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
<div class="card soft mt"><p class="lead" style="font-size:20px;margin-top:0">RAG 找资料;提示词规定资料的使用方式。</p></div>
|
||||
<div class="footer"><span>System Prompt + Context + User Question</span><span class="page"></span></div>
|
||||
<div class="notes">这一页承接用户大纲:给到 LLM 的时候需要系统提示词,由此引出提示词工程。提示词工程重点不是花哨话术,而是角色、边界、格式、引用和兜底。</div>
|
||||
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section class="slide" data-title="Agent">
|
||||
<div class="slide-inner">
|
||||
<div class="topline"><span>Agent</span><span>复杂任务的分工协作</span></div>
|
||||
<p class="kicker">复杂任务</p>
|
||||
<h2>复杂任务:多步骤、多角色、多 LLM 协作</h2>
|
||||
<div class="agent-map">
|
||||
<div class="agent"><span class="tag">规划者</span><h3 class="mt-sm">拆任务</h3><p>规划检索、工具调用和结果组织。</p></div>
|
||||
<div class="arrow">→</div>
|
||||
<div class="agent"><span class="tag mint">检索者</span><h3 class="mt-sm">查资料</h3><p>使用 RAG 从知识库里找依据,必要时多轮检索。</p></div>
|
||||
<div class="arrow">→</div>
|
||||
<div class="agent"><span class="tag orange">执行者</span><h3 class="mt-sm">调用工具</h3><p>查订单、建工单、读数据库、调用业务系统接口。</p></div>
|
||||
</div>
|
||||
<div class="grid2 mt">
|
||||
<div class="card"><h3>Agent</h3><p class="lead" style="font-size:19px;margin-top:8px">目标驱动流程:规划步骤、选择工具、读取结果、继续推进。</p></div>
|
||||
<div class="card soft"><h3>和 RAG 的关系</h3><p class="lead" style="font-size:19px;margin-top:8px">RAG 提供知识入口;Agent 负责任务编排。</p></div>
|
||||
</div>
|
||||
<div class="footer"><span>RAG 是知识入口,Agent 是任务编排</span><span class="page"></span></div>
|
||||
<div class="notes">不要把 Agent 讲玄。它就是更复杂任务里的规划和编排:可能一个 LLM 规划,一个 LLM 检索,一个 LLM 写答案,也可能调用外部工具。</div>
|
||||
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section class="slide" data-title="MCP">
|
||||
<div class="slide-inner">
|
||||
<div class="topline"><span>MCP</span><span>工具连接</span></div>
|
||||
<p class="kicker">MCP</p>
|
||||
<h2>MCP:让 Agent 稳定连接外部工具和业务系统</h2>
|
||||
<div class="mcp-grid">
|
||||
<div class="mcp-card"><span class="tag">统一接口</span><h3 class="mt-sm">工具接入规范</h3><p class="small mt-sm">把不同系统的能力包装成模型可调用的工具。</p></div>
|
||||
<div class="mcp-card"><span class="tag mint">上下文供给</span><h3 class="mt-sm">读取外部信息</h3><p class="small mt-sm">查数据库、读文件、取工单、访问知识系统。</p></div>
|
||||
<div class="mcp-card"><span class="tag orange">动作执行</span><h3 class="mt-sm">调用业务能力</h3><p class="small mt-sm">创建工单、查询订单、发送通知、写入结果。</p></div>
|
||||
</div>
|
||||
<div class="tool-list">
|
||||
<span>CRM</span><span>工单</span><span>数据库</span><span>搜索</span><span>文件</span>
|
||||
</div>
|
||||
<div class="card soft mt"><p class="lead" style="font-size:20px;margin-top:0">RAG 负责查知识;Agent 负责编排任务;MCP 负责连接工具。</p></div>
|
||||
<div class="footer"><span>RAG · Agent · MCP</span><span class="page"></span></div>
|
||||
<div class="notes">这页讲 MCP。它不需要展开协议细节,只要说明 MCP 是让模型/Agent 稳定连接外部工具和系统的接口层。</div>
|
||||
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section class="slide" data-title="总结">
|
||||
<div class="slide-inner">
|
||||
<div class="topline"><span>Takeaway</span><span>关键链路</span></div>
|
||||
<p class="kicker">核心链路</p>
|
||||
<h2>RAG → LLM 限制 → 切片 → 向量化 → 语义检索 → 排序 → 提示词 → Agent → MCP</h2>
|
||||
<div class="summary">
|
||||
<div class="card"><span class="tag rose">限制</span><h3 class="mt-sm">LLM 不能吃下全部知识</h3><p class="small mt-sm">上下文有限、会幻觉、专注度和顺序都不稳定。</p></div>
|
||||
<div class="card"><span class="tag mint">建库</span><h3 class="mt-sm">资料要先整理成片段</h3><p class="small mt-sm">清洗、切片、向量化、加元数据,再放入向量库。</p></div>
|
||||
<div class="card"><span class="tag orange">检索</span><h3 class="mt-sm">提问时先找资料</h3><p class="small mt-sm">召回候选,再排序过滤,把最相关内容放进上下文。</p></div>
|
||||
<div class="card"><span class="tag">扩展</span><h3 class="mt-sm">Agent + MCP</h3><p class="small mt-sm">Agent 编排多步骤任务;MCP 连接外部工具和系统。</p></div>
|
||||
</div>
|
||||
<div class="card soft mt"><p class="lead" style="font-size:21px;margin-top:0">RAG 不是替代 LLM,而是给 LLM 配一套“会查资料的工作台”。</p></div>
|
||||
<div class="footer"><span>End</span><span class="page"></span></div>
|
||||
<div class="notes">收尾不要再加新概念。重复主线:LLM 有限制,所以需要 RAG;RAG 后台建库,前台检索排序,再通过提示词交给 LLM;复杂任务用 Agent。</div>
|
||||
|
||||
</div>
|
||||
</section>
|
||||
|
||||
</div>
|
||||
<div class="progress"><span id="progress"></span></div>
|
||||
<div class="nav-controls" aria-label="翻页导航">
|
||||
<button class="nav-btn" id="prevBtn" type="button" aria-label="上一页">‹</button>
|
||||
<span class="nav-index" id="navIndex">1 / 13</span>
|
||||
<button class="nav-btn" id="nextBtn" type="button" aria-label="下一页">›</button>
|
||||
</div>
|
||||
<script>
|
||||
(function(){
|
||||
const slides = Array.from(document.querySelectorAll('.slide'));
|
||||
const progress = document.getElementById('progress');
|
||||
const prevBtn = document.getElementById('prevBtn');
|
||||
const nextBtn = document.getElementById('nextBtn');
|
||||
const navIndex = document.getElementById('navIndex');
|
||||
let idx = 0;
|
||||
function render(){
|
||||
slides.forEach((s,i)=>s.classList.toggle('active',i===idx));
|
||||
document.querySelectorAll('.page').forEach(el=>{el.textContent=(idx+1)+' / '+slides.length});
|
||||
navIndex.textContent = (idx+1)+' / '+slides.length;
|
||||
prevBtn.disabled = idx === 0;
|
||||
nextBtn.disabled = idx === slides.length - 1;
|
||||
progress.style.width = ((idx+1)/slides.length*100)+'%';
|
||||
if(location.hash !== '#/'+(idx+1)) history.replaceState(null,'','#/'+(idx+1));
|
||||
}
|
||||
function go(n){idx=Math.max(0,Math.min(slides.length-1,n));render()}
|
||||
function fromHash(){
|
||||
const m = location.hash.match(/#\/(\d+)/);
|
||||
if(m) idx = Math.max(0,Math.min(slides.length-1,Number(m[1])-1));
|
||||
render();
|
||||
}
|
||||
window.addEventListener('hashchange',fromHash);
|
||||
prevBtn.addEventListener('click',()=>go(idx-1));
|
||||
nextBtn.addEventListener('click',()=>go(idx+1));
|
||||
document.addEventListener('keydown',e=>{
|
||||
if(['ArrowRight','PageDown',' '].includes(e.key)){e.preventDefault();go(idx+1)}
|
||||
if(['ArrowLeft','PageUp'].includes(e.key)){e.preventDefault();go(idx-1)}
|
||||
if(e.key==='Home'){e.preventDefault();go(0)}
|
||||
if(e.key==='End'){e.preventDefault();go(slides.length-1)}
|
||||
if(e.key==='f'||e.key==='F'){document.documentElement.requestFullscreen?.()}
|
||||
});
|
||||
fromHash();
|
||||
})();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
@@ -24,6 +24,7 @@ if (args[0] !== 'app-server') {
|
||||
|
||||
const threads = new Map();
|
||||
const pendingServerRequests = new Map();
|
||||
const resumeMismatchThreads = new Set();
|
||||
let nextServerRequestId = 1;
|
||||
let mcpReloadCount = 0;
|
||||
|
||||
@@ -64,6 +65,12 @@ function tokenUsage(text) {
|
||||
};
|
||||
}
|
||||
|
||||
function retryScenarioKey(text, marker) {
|
||||
return new RegExp(marker.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), 'i').test(String(text || ''))
|
||||
? marker
|
||||
: String(text || '');
|
||||
}
|
||||
|
||||
function collaborationSummary(params = {}) {
|
||||
const collaborationMode = params.collaborationMode;
|
||||
const settings = collaborationMode?.settings || {};
|
||||
@@ -623,8 +630,9 @@ function startTurn(params) {
|
||||
}
|
||||
|
||||
if (/codexapp capacity retry/i.test(text)) {
|
||||
const attempts = (thread.capacityRetryAttempts.get(text) || 0) + 1;
|
||||
thread.capacityRetryAttempts.set(text, attempts);
|
||||
const retryKey = retryScenarioKey(text, 'codexapp capacity retry');
|
||||
const attempts = (thread.capacityRetryAttempts.get(retryKey) || 0) + 1;
|
||||
thread.capacityRetryAttempts.set(retryKey, attempts);
|
||||
if (attempts <= 2) {
|
||||
if (attempts === 2) emitPartialCapacityOutput(thread, turnId);
|
||||
emitCapacityError(thread, turnId);
|
||||
@@ -633,8 +641,9 @@ function startTurn(params) {
|
||||
}
|
||||
|
||||
if (/codexapp reconnect retry/i.test(text)) {
|
||||
const attempts = (thread.reconnectRetryAttempts.get(text) || 0) + 1;
|
||||
thread.reconnectRetryAttempts.set(text, attempts);
|
||||
const retryKey = retryScenarioKey(text, 'codexapp reconnect retry');
|
||||
const attempts = (thread.reconnectRetryAttempts.get(retryKey) || 0) + 1;
|
||||
thread.reconnectRetryAttempts.set(retryKey, attempts);
|
||||
if (attempts === 1) {
|
||||
emitPartialCapacityOutput(thread, turnId);
|
||||
send({
|
||||
@@ -650,6 +659,17 @@ function startTurn(params) {
|
||||
}
|
||||
}
|
||||
|
||||
if (/codexapp retry thread mismatch/i.test(text)) {
|
||||
const retryKey = retryScenarioKey(text, 'codexapp retry thread mismatch');
|
||||
const attempts = (thread.capacityRetryAttempts.get(retryKey) || 0) + 1;
|
||||
thread.capacityRetryAttempts.set(retryKey, attempts);
|
||||
if (attempts === 1) {
|
||||
resumeMismatchThreads.add(thread.id);
|
||||
emitCapacityError(thread, turnId);
|
||||
return { turn: { id: turnId, status: 'running', items: [] } };
|
||||
}
|
||||
}
|
||||
|
||||
if (/collaboration/i.test(text)) {
|
||||
completeTurn(thread, turnId, `collaboration mode: ${collaborationSummary(params)}`);
|
||||
return { turn: { id: turnId, status: 'running', items: [] } };
|
||||
@@ -787,6 +807,11 @@ function handleRequest(message) {
|
||||
return;
|
||||
}
|
||||
if (method === 'thread/resume') {
|
||||
if (params.threadId && resumeMismatchThreads.delete(params.threadId)) {
|
||||
const thread = ensureThread(null, params);
|
||||
send({ id, result: { thread: threadPayload(thread), model: params.model || 'gpt-5.5', cwd: thread.cwd, modelProvider: 'mock', approvalPolicy: params.approvalPolicy || 'never', approvalsReviewer: 'user', sandbox: params.sandbox || 'danger-full-access' } });
|
||||
return;
|
||||
}
|
||||
const thread = ensureThread(params.threadId, params);
|
||||
send({ id, result: { thread: threadPayload(thread), model: params.model || 'gpt-5.5', cwd: thread.cwd, modelProvider: 'mock', approvalPolicy: params.approvalPolicy || 'never', approvalsReviewer: 'user', sandbox: params.sandbox || 'danger-full-access' } });
|
||||
return;
|
||||
|
||||
@@ -1311,17 +1311,21 @@ async function main() {
|
||||
/自动重试/.test(msg.message || '')
|
||||
), 10000);
|
||||
assert(/Codex 服务暂时繁忙/.test(codexAppCapacityRetryNotice.message || ''), 'Codex App transient capacity failure should announce automatic retry');
|
||||
assert(/第 1\/2 次/.test(codexAppCapacityRetryNotice.message || ''), 'Codex App transient retry should start at attempt 1');
|
||||
assert(/从中断处继续/.test(codexAppCapacityRetryNotice.message || ''), 'Codex App retry after a started turn should announce continuation mode');
|
||||
const codexAppPartialCapacityRetryNotice = await nextMessage(messages, ws, (msg) => (
|
||||
msg.type === 'system_message' &&
|
||||
msg.sessionId === codexAppSession.sessionId &&
|
||||
/自动重试/.test(msg.message || '')
|
||||
), 10000);
|
||||
assert(/第 2\/2 次/.test(codexAppPartialCapacityRetryNotice.message || ''), 'Codex App transient retry should continue after partial output');
|
||||
assert(/从中断处继续/.test(codexAppPartialCapacityRetryNotice.message || ''), 'Codex App partial-output retry should stay in continuation mode');
|
||||
await nextMessage(messages, ws, (msg) => msg.type === 'done' && msg.sessionId === codexAppSession.sessionId, 20000);
|
||||
const storedCodexAppAfterCapacityRetry = JSON.parse(fs.readFileSync(path.join(sessionsDir, `${codexAppSession.sessionId}.json`), 'utf8'));
|
||||
const codexAppCapacityRetryUsers = storedCodexAppAfterCapacityRetry.messages.filter((message) => message.role === 'user' && message.content === codexAppRetryText);
|
||||
assert(codexAppCapacityRetryUsers.length === 1, 'Codex App transient retry should not duplicate the user message');
|
||||
assert(storedCodexAppAfterCapacityRetry.messages.some((message) => message.role === 'assistant' && /codexapp capacity retry prompt/.test(String(message.content || ''))), 'Codex App transient retry should persist the successful assistant response');
|
||||
assert(storedCodexAppAfterCapacityRetry.messages.some((message) => message.role === 'assistant' && /继续上一轮/.test(String(message.content || ''))), 'Codex App transient retry should ask the model to continue instead of replaying the original prompt');
|
||||
|
||||
const codexAppReconnectRetryText = 'codexapp reconnect retry prompt';
|
||||
ws.send(JSON.stringify({ type: 'message', text: codexAppReconnectRetryText, sessionId: codexAppSession.sessionId, mode: 'yolo', agent: 'codexapp' }));
|
||||
@@ -1331,11 +1335,40 @@ async function main() {
|
||||
/自动重试/.test(msg.message || '')
|
||||
), 10000);
|
||||
assert(/Codex 服务暂时繁忙/.test(codexAppReconnectRetryNotice.message || ''), 'Codex App reconnect failure should announce automatic retry');
|
||||
assert(/第 1\/2 次/.test(codexAppReconnectRetryNotice.message || ''), 'Codex App retry counter should reset after the previous retry succeeds');
|
||||
assert(/从中断处继续/.test(codexAppReconnectRetryNotice.message || ''), 'Codex App reconnect retry after a started turn should announce continuation mode');
|
||||
await nextMessage(messages, ws, (msg) => msg.type === 'done' && msg.sessionId === codexAppSession.sessionId, 20000);
|
||||
const storedCodexAppAfterReconnectRetry = JSON.parse(fs.readFileSync(path.join(sessionsDir, `${codexAppSession.sessionId}.json`), 'utf8'));
|
||||
const codexAppReconnectRetryUsers = storedCodexAppAfterReconnectRetry.messages.filter((message) => message.role === 'user' && message.content === codexAppReconnectRetryText);
|
||||
assert(codexAppReconnectRetryUsers.length === 1, 'Codex App reconnect retry should not duplicate the user message');
|
||||
assert(storedCodexAppAfterReconnectRetry.messages.some((message) => message.role === 'assistant' && /codexapp reconnect retry prompt/.test(String(message.content || ''))), 'Codex App reconnect retry should persist the successful assistant response');
|
||||
assert(storedCodexAppAfterReconnectRetry.messages.some((message) => message.role === 'assistant' && /继续上一轮/.test(String(message.content || ''))), 'Codex App reconnect retry should continue the interrupted turn instead of replaying the original prompt');
|
||||
|
||||
const codexAppThreadBeforeMismatch = storedCodexAppAfterReconnectRetry.codexAppThreadId;
|
||||
assert(codexAppThreadBeforeMismatch, 'Codex App retry mismatch regression needs an existing app-server thread');
|
||||
const codexAppRetryMismatchText = 'codexapp retry thread mismatch prompt';
|
||||
ws.send(JSON.stringify({ type: 'message', text: codexAppRetryMismatchText, sessionId: codexAppSession.sessionId, mode: 'yolo', agent: 'codexapp' }));
|
||||
const codexAppRetryMismatchNotice = await nextMessage(messages, ws, (msg) => (
|
||||
msg.type === 'system_message' &&
|
||||
msg.sessionId === codexAppSession.sessionId &&
|
||||
/自动重试/.test(msg.message || '')
|
||||
), 10000);
|
||||
assert(/Codex 服务暂时繁忙/.test(codexAppRetryMismatchNotice.message || ''), 'Codex App thread mismatch retry should first announce automatic retry');
|
||||
assert(/第 1\/2 次/.test(codexAppRetryMismatchNotice.message || ''), 'Codex App retry counter should reset for the next independent retryable turn');
|
||||
assert(/从中断处继续/.test(codexAppRetryMismatchNotice.message || ''), 'Codex App thread mismatch retry should also be a continuation retry');
|
||||
const codexAppRetryMismatchError = await nextMessage(messages, ws, (msg) => (
|
||||
msg.type === 'error' &&
|
||||
msg.sessionId === codexAppSession.sessionId &&
|
||||
/不同线程/.test(msg.message || '') &&
|
||||
/上下文丢失/.test(msg.message || '')
|
||||
), 20000);
|
||||
assert(/已停止/.test(codexAppRetryMismatchError.message || ''), 'Codex App retry should stop when resume returns a different thread');
|
||||
await nextMessage(messages, ws, (msg) => msg.type === 'done' && msg.sessionId === codexAppSession.sessionId, 20000);
|
||||
const storedCodexAppAfterRetryMismatch = JSON.parse(fs.readFileSync(path.join(sessionsDir, `${codexAppSession.sessionId}.json`), 'utf8'));
|
||||
assert(storedCodexAppAfterRetryMismatch.codexAppThreadId === codexAppThreadBeforeMismatch, 'Codex App retry mismatch must not replace the persisted app-server thread id');
|
||||
const codexAppRetryMismatchUsers = storedCodexAppAfterRetryMismatch.messages.filter((message) => message.role === 'user' && message.content === codexAppRetryMismatchText);
|
||||
assert(codexAppRetryMismatchUsers.length === 1, 'Codex App retry mismatch should not duplicate the user message');
|
||||
assert(!storedCodexAppAfterRetryMismatch.messages.some((message) => message.role === 'assistant' && /codexapp retry thread mismatch prompt/.test(String(message.content || ''))), 'Codex App retry mismatch should not persist a successful assistant response on the wrong thread');
|
||||
|
||||
ws.send(JSON.stringify({ type: 'message', text: '/goal improve benchmark coverage', sessionId: codexAppSession.sessionId, mode: 'yolo', agent: 'codexapp' }));
|
||||
const codexAppGoalSet = await nextMessage(messages, ws, (msg) => msg.type === 'system_message' && msg.sessionId === codexAppSession.sessionId && /Goal active/.test(msg.message || '') && /improve benchmark coverage/.test(msg.message || ''));
|
||||
|
||||
172
server.js
172
server.js
@@ -4689,6 +4689,77 @@ function hasRuntimeOutput(entry) {
|
||||
return Array.isArray(entry.toolCalls) && entry.toolCalls.length > 0;
|
||||
}
|
||||
|
||||
function retryTailText(value, maxChars) {
|
||||
const text = String(value || '').trim();
|
||||
if (!text || text.length <= maxChars) return text;
|
||||
const marker = '[cc-web: 前文过长,下面仅保留尾部]\n';
|
||||
return `${marker}${text.slice(Math.max(0, text.length - Math.max(0, maxChars - marker.length)))}`;
|
||||
}
|
||||
|
||||
function retryPreviewValue(value, maxChars) {
|
||||
if (value === null || value === undefined) return '';
|
||||
if (typeof value === 'string') return retryTailText(value, maxChars);
|
||||
try {
|
||||
return retryTailText(JSON.stringify(sanitizePersistValue(value, {
|
||||
maxString: maxChars,
|
||||
maxDepth: 4,
|
||||
maxArray: 20,
|
||||
maxKeys: 40,
|
||||
}), null, 2), maxChars);
|
||||
} catch {
|
||||
return retryTailText(String(value), maxChars);
|
||||
}
|
||||
}
|
||||
|
||||
function buildCodexRetryToolSummary(toolCalls) {
|
||||
const list = Array.isArray(toolCalls) ? toolCalls.filter(Boolean).slice(-8) : [];
|
||||
if (list.length === 0) return '';
|
||||
return list.map((tool, index) => {
|
||||
const name = tool.name || tool.kind || tool.id || `tool-${index + 1}`;
|
||||
const status = tool.done ? 'done' : (tool.status || tool.meta?.status || 'inProgress');
|
||||
const lines = [`${index + 1}. ${name} (${status})`];
|
||||
if (tool.input !== undefined && tool.input !== null) {
|
||||
lines.push(` input: ${retryPreviewValue(tool.input, 1200)}`);
|
||||
}
|
||||
if (tool.result !== undefined && tool.result !== null) {
|
||||
lines.push(` result: ${retryPreviewValue(tool.result, 2400)}`);
|
||||
}
|
||||
return lines.join('\n');
|
||||
}).join('\n');
|
||||
}
|
||||
|
||||
function shouldUseCodexAppContinuationRetry(entry) {
|
||||
return (entry?.agent || '') === 'codexapp' && !!(entry.turnId || hasRuntimeOutput(entry));
|
||||
}
|
||||
|
||||
function buildCodexAppContinuationRetryText(entry, retryRequest, rawError) {
|
||||
const original = retryPreviewValue(
|
||||
retryRequest.originalRuntimeText || retryRequest.runtimeText || retryRequest.originalText || retryRequest.text || '',
|
||||
5000,
|
||||
);
|
||||
const partialText = retryPreviewValue(entry.fullText || '', 7000);
|
||||
const toolSummary = buildCodexRetryToolSummary(entry.toolCalls || []);
|
||||
const errorText = retryPreviewValue(rawError || entry.lastError || '', 1200);
|
||||
const parts = [
|
||||
'继续上一轮被临时服务或网络错误中断的 Codex App 任务。',
|
||||
'不要从头重做,不要重复已经完成的命令、工具调用或文件修改;请基于现有线程上下文和下面 cc-web 已观察到的中断前状态继续执行。不要在回复中复述这段内部重试说明。',
|
||||
];
|
||||
if (original) {
|
||||
parts.push(`原始用户请求(仅用于理解目标,不要当成新请求从头执行):\n${original}`);
|
||||
}
|
||||
if (partialText) {
|
||||
parts.push(`cc-web 已观察到的中断前助手输出(尾部):\n${partialText}`);
|
||||
}
|
||||
if (toolSummary) {
|
||||
parts.push(`cc-web 已观察到的工具/执行摘要:\n${toolSummary}`);
|
||||
}
|
||||
if (errorText) {
|
||||
parts.push(`中断原因:\n${errorText}`);
|
||||
}
|
||||
parts.push('请从中断处继续完成剩余工作。');
|
||||
return parts.filter(Boolean).join('\n\n');
|
||||
}
|
||||
|
||||
function getCodexRetryConfig() {
|
||||
return normalizeCodexRetryConfig(loadCodexConfig().retry);
|
||||
}
|
||||
@@ -4723,7 +4794,7 @@ function scheduleCodexCapacityRetry(sessionId, entry, rawError) {
|
||||
cancelCodexCapacityRetry(sessionId);
|
||||
return false;
|
||||
}
|
||||
const attempts = (previous?.attempts || 0) + 1;
|
||||
const attempts = (previous?.attempts || entry.codexRetry?.attempt || 0) + 1;
|
||||
if (retryConfig.mode === 'limited' && attempts > retryConfig.maxAttempts) {
|
||||
cancelCodexCapacityRetry(sessionId);
|
||||
return false;
|
||||
@@ -4731,14 +4802,32 @@ function scheduleCodexCapacityRetry(sessionId, entry, rawError) {
|
||||
|
||||
const delayMs = codexTransientRetryDelayMs(retryConfig);
|
||||
if (previous?.timer) clearTimeout(previous.timer);
|
||||
const expectedThreadId = retryRequest.expectedThreadId
|
||||
|| entry.expectedThreadId
|
||||
|| entry.codexRetry?.expectedThreadId
|
||||
|| entry.threadId
|
||||
|| previous?.expectedThreadId
|
||||
|| null;
|
||||
const originalText = retryRequest.originalText || retryRequest.text || retryRequest.runtimeText || '';
|
||||
const originalRuntimeText = retryRequest.originalRuntimeText || retryRequest.runtimeText || retryRequest.text || '';
|
||||
const useContinuationRetry = shouldUseCodexAppContinuationRetry(entry);
|
||||
const continuationText = useContinuationRetry
|
||||
? buildCodexAppContinuationRetryText(entry, retryRequest, rawError)
|
||||
: '';
|
||||
const retryRuntimeText = continuationText || originalRuntimeText || originalText;
|
||||
const retryText = retryRuntimeText || originalText;
|
||||
|
||||
const retry = {
|
||||
text: retryRequest.text || retryRequest.runtimeText || '',
|
||||
runtimeText: retryRequest.runtimeText || retryRequest.text || '',
|
||||
text: retryText,
|
||||
runtimeText: retryRuntimeText,
|
||||
originalText,
|
||||
originalRuntimeText,
|
||||
mode: retryRequest.mode || 'yolo',
|
||||
agent: retryRequest.agent || entry.agent || 'codex',
|
||||
attachments: Array.isArray(retryRequest.attachments) ? retryRequest.attachments : [],
|
||||
attachments: useContinuationRetry ? [] : (Array.isArray(retryRequest.attachments) ? retryRequest.attachments : []),
|
||||
mcpContext: retryRequest.mcpContext || {},
|
||||
expectedThreadId,
|
||||
useContinuationRetry,
|
||||
attempts,
|
||||
retryMode: retryConfig.mode,
|
||||
timer: null,
|
||||
@@ -4756,17 +4845,22 @@ function scheduleCodexCapacityRetry(sessionId, entry, rawError) {
|
||||
return;
|
||||
}
|
||||
if (activeProcesses.has(sessionId) || activeCodexAppTurns.has(sessionId)) {
|
||||
pendingCodexCapacityRetries.delete(sessionId);
|
||||
plog('WARN', 'codex_capacity_retry_skipped_busy', {
|
||||
sessionId: sessionId.slice(0, 8),
|
||||
attempt: latest.attempts,
|
||||
});
|
||||
if (latest.ws && latest.ws.readyState === 1) sendSessionList(latest.ws);
|
||||
return;
|
||||
}
|
||||
|
||||
pendingCodexCapacityRetries.delete(sessionId);
|
||||
const ws = latest.ws && latest.ws.readyState === 1 ? latest.ws : null;
|
||||
plog('INFO', 'codex_capacity_retry_start', {
|
||||
sessionId: sessionId.slice(0, 8),
|
||||
attempt: latest.attempts,
|
||||
expectedThreadId: latest.expectedThreadId ? String(latest.expectedThreadId).slice(0, 24) : null,
|
||||
continuation: !!latest.useContinuationRetry,
|
||||
});
|
||||
handleMessage(ws, {
|
||||
type: 'message',
|
||||
@@ -4779,6 +4873,17 @@ function scheduleCodexCapacityRetry(sessionId, entry, rawError) {
|
||||
hideInHistory: true,
|
||||
runtimeText: latest.runtimeText,
|
||||
mcpContext: latest.mcpContext,
|
||||
codexRetry: latest.agent === 'codexapp'
|
||||
? {
|
||||
isAutoRetry: true,
|
||||
attempt: latest.attempts,
|
||||
retryMode: latest.retryMode,
|
||||
expectedThreadId: latest.expectedThreadId || null,
|
||||
originalText: latest.originalText || latest.text || '',
|
||||
originalRuntimeText: latest.originalRuntimeText || latest.runtimeText || '',
|
||||
useContinuationRetry: !!latest.useContinuationRetry,
|
||||
}
|
||||
: null,
|
||||
skipPendingCrossConversationFlush: true,
|
||||
});
|
||||
}, delayMs);
|
||||
@@ -4790,16 +4895,19 @@ function scheduleCodexCapacityRetry(sessionId, entry, rawError) {
|
||||
maxAttempts: retryConfig.mode === 'limited' ? retryConfig.maxAttempts : null,
|
||||
retryMode: retryConfig.mode,
|
||||
delayMs,
|
||||
expectedThreadId: expectedThreadId ? String(expectedThreadId).slice(0, 24) : null,
|
||||
continuation: useContinuationRetry,
|
||||
error: String(rawError || '').slice(0, 300),
|
||||
});
|
||||
if (entry.ws) {
|
||||
const attemptText = retryConfig.mode === 'forever'
|
||||
? `第 ${attempts} 次`
|
||||
: `第 ${attempts}/${retryConfig.maxAttempts} 次`;
|
||||
const continuationText = useContinuationRetry ? ',将从中断处继续' : '';
|
||||
wsSend(entry.ws, {
|
||||
type: 'system_message',
|
||||
sessionId,
|
||||
message: `Codex 服务暂时繁忙,${retryConfig.intervalSeconds} 秒后自动重试(${attemptText})。`,
|
||||
message: `Codex 服务暂时繁忙,${retryConfig.intervalSeconds} 秒后自动重试(${attemptText}${continuationText})。`,
|
||||
});
|
||||
}
|
||||
return true;
|
||||
@@ -6869,6 +6977,7 @@ function handleMessage(ws, msg, options = {}) {
|
||||
return handleCodexAppMessage(ws, session, runtimeTextValue, resolvedAttachments, {
|
||||
mcpContext: options.mcpContext || {},
|
||||
crossConversation: options.crossConversation || null,
|
||||
codexRetry: options.codexRetry || null,
|
||||
});
|
||||
}
|
||||
|
||||
@@ -8236,6 +8345,19 @@ function handleCodexAppMessage(ws, session, runtimeTextValue, resolvedAttachment
|
||||
return { ok: false, code: 'empty_message', message: '消息内容不能为空。' };
|
||||
}
|
||||
|
||||
const codexRetry = options.codexRetry && typeof options.codexRetry === 'object'
|
||||
? {
|
||||
isAutoRetry: !!options.codexRetry.isAutoRetry,
|
||||
attempt: Number.isFinite(Number(options.codexRetry.attempt)) ? Number(options.codexRetry.attempt) : null,
|
||||
retryMode: options.codexRetry.retryMode || null,
|
||||
expectedThreadId: options.codexRetry.expectedThreadId || null,
|
||||
originalText: options.codexRetry.originalText || null,
|
||||
originalRuntimeText: options.codexRetry.originalRuntimeText || null,
|
||||
useContinuationRetry: !!options.codexRetry.useContinuationRetry,
|
||||
}
|
||||
: null;
|
||||
const currentThreadId = getRuntimeSessionId(session);
|
||||
const expectedThreadId = codexRetry?.expectedThreadId || currentThreadId || null;
|
||||
const retryAttachments = resolvedAttachments.map((attachment) => ({
|
||||
id: attachment.id,
|
||||
kind: 'image',
|
||||
@@ -8250,24 +8372,30 @@ function handleCodexAppMessage(ws, session, runtimeTextValue, resolvedAttachment
|
||||
ws,
|
||||
agent: 'codexapp',
|
||||
cwd: session.cwd || getDefaultSessionCwd(),
|
||||
threadId: getRuntimeSessionId(session),
|
||||
threadId: expectedThreadId,
|
||||
expectedThreadId,
|
||||
turnId: null,
|
||||
fullText: '',
|
||||
toolCalls: [],
|
||||
toolOutputDeltas: new Map(),
|
||||
agentMessageItems: new Map(),
|
||||
mcpContext: options.mcpContext || {},
|
||||
codexRetry,
|
||||
lastUsage: null,
|
||||
lastError: null,
|
||||
errorSent: false,
|
||||
crossConversationReplyRequestId: options.crossConversation?.replyRequestId || null,
|
||||
retryRequest: {
|
||||
text: runtimeTextValue,
|
||||
runtimeText: runtimeTextValue,
|
||||
text: codexRetry?.originalText || runtimeTextValue,
|
||||
runtimeText: codexRetry?.originalRuntimeText || runtimeTextValue,
|
||||
originalText: codexRetry?.originalText || runtimeTextValue,
|
||||
originalRuntimeText: codexRetry?.originalRuntimeText || runtimeTextValue,
|
||||
lastRetryText: runtimeTextValue,
|
||||
mode: session.permissionMode || 'yolo',
|
||||
agent: 'codexapp',
|
||||
attachments: retryAttachments,
|
||||
mcpContext: options.mcpContext || {},
|
||||
expectedThreadId,
|
||||
},
|
||||
clientUserMessageId: crypto.randomUUID(),
|
||||
startedAt: new Date().toISOString(),
|
||||
@@ -8293,11 +8421,33 @@ async function startCodexAppTurn(sessionId, input) {
|
||||
const client = clientResult.client;
|
||||
await client.start();
|
||||
|
||||
let threadId = getRuntimeSessionId(session);
|
||||
const currentThreadId = getRuntimeSessionId(session);
|
||||
const expectedThreadId = entry.expectedThreadId
|
||||
|| entry.codexRetry?.expectedThreadId
|
||||
|| entry.retryRequest?.expectedThreadId
|
||||
|| entry.threadId
|
||||
|| currentThreadId
|
||||
|| null;
|
||||
let threadId = expectedThreadId || currentThreadId;
|
||||
const threadParams = codexAppThreadParams(session, { mcpContext: entry.mcpContext || {} });
|
||||
if (threadId) {
|
||||
const resumed = await client.request('thread/resume', { ...threadParams, threadId }, 60000);
|
||||
threadId = resumed?.thread?.id || threadId;
|
||||
const requestedThreadId = threadId;
|
||||
const resumed = await client.request('thread/resume', { ...threadParams, threadId: requestedThreadId }, 60000);
|
||||
const resumedThreadId = resumed?.thread?.id || requestedThreadId;
|
||||
if (expectedThreadId && resumedThreadId !== expectedThreadId) {
|
||||
const expectedShort = String(expectedThreadId).slice(0, 24);
|
||||
const actualShort = String(resumedThreadId).slice(0, 24);
|
||||
plog('WARN', 'codex_app_thread_resume_mismatch', {
|
||||
sessionId: sessionId.slice(0, 8),
|
||||
expectedThreadId: expectedShort,
|
||||
actualThreadId: actualShort,
|
||||
autoRetry: !!entry.codexRetry?.isAutoRetry,
|
||||
retryAttempt: entry.codexRetry?.attempt || null,
|
||||
});
|
||||
const prefix = entry.codexRetry?.isAutoRetry ? 'Codex App 自动重试' : 'Codex App';
|
||||
throw new Error(`${prefix}恢复到不同线程,已停止以避免上下文丢失(期望 ${expectedShort},实际 ${actualShort})。`);
|
||||
}
|
||||
threadId = resumedThreadId;
|
||||
} else {
|
||||
const started = await client.request('thread/start', { ...threadParams, sessionStartSource: 'startup' }, 60000);
|
||||
threadId = started?.thread?.id || null;
|
||||
|
||||
Reference in New Issue
Block a user