feat: enhance codex app and cross-conversation messaging

This commit is contained in:
shiyue
2026-06-13 22:13:30 +08:00
parent 04e15c9c89
commit 4a1c988990
10 changed files with 3740 additions and 179 deletions

View File

@@ -11,6 +11,7 @@ const REPO_DIR = path.resolve(__dirname, '..');
const SERVER_PATH = path.join(REPO_DIR, 'server.js');
const MOCK_CLAUDE = path.join(REPO_DIR, 'scripts', 'mock-claude.js');
const MOCK_CODEX = path.join(REPO_DIR, 'scripts', 'mock-codex.js');
const MOCK_CODEX_APP_SERVER = path.join(REPO_DIR, 'scripts', 'mock-codex-app-server.js');
const HAS_SQLITE3 = spawnSync('sqlite3', ['-version'], { stdio: 'ignore' }).status === 0;
function mkdirp(dir) {
@@ -397,6 +398,29 @@ async function main() {
qqbot: { qmsgKey: '' },
}, null, 2));
const skillDir = path.join(homeDir, '.codex', 'skills', 'regression-skill');
mkdirp(skillDir);
fs.writeFileSync(path.join(skillDir, 'SKILL.md'), [
'---',
'name: regression-skill',
'description: Regression skill for composer suggestions.',
'---',
'',
'# Regression Skill',
'',
'Use this only in regression tests.',
].join('\n'));
fs.writeFileSync(path.join(configDir, 'prompts.json'), JSON.stringify({
prompts: [
{
name: 'shipit',
title: 'Ship It',
description: 'Regression prompt template.',
content: 'Regression prompt body from @shipit.',
},
],
}, null, 2));
createFakeClaudeHistory(homeDir);
createFakeCodexConfig(homeDir);
const codexFixture = createFakeCodexHistory(homeDir);
@@ -414,7 +438,7 @@ async function main() {
CC_WEB_LOGS_DIR: logsDir,
HOME: homeDir,
CLAUDE_PATH: MOCK_CLAUDE,
CODEX_PATH: MOCK_CODEX,
CODEX_PATH: MOCK_CODEX_APP_SERVER,
}, async () => {
await assertWsUpgradeRejected(port, '/not-ws');
@@ -467,11 +491,64 @@ async function main() {
const codexInitCwd = path.join(tempRoot, 'codex-space');
mkdirp(codexInitCwd);
fs.writeFileSync(path.join(codexInitCwd, 'context.txt'), 'Composer file context body.');
ws.send(JSON.stringify({ type: 'new_session', agent: 'codex', cwd: codexInitCwd, mode: 'plan' }));
const codexSession = await nextMessage(messages, ws, (msg) => msg.type === 'session_info' && msg.agent === 'codex' && msg.cwd === codexInitCwd);
assert(codexSession.mode === 'plan', 'Codex new_session should follow requested mode');
assert(codexSession.model === 'gpt-5.5(xhigh)', 'Codex new_session should read default model from ~/.codex/config.toml');
ws.send(JSON.stringify({ type: 'set_session_pinned', sessionId: codexSession.sessionId, pinned: true }));
const pinnedAck = await nextMessage(messages, ws, (msg) => msg.type === 'session_pinned' && msg.sessionId === codexSession.sessionId);
assert(pinnedAck.pinnedAt, 'Pinning a session should return pinnedAt');
const pinnedList = await nextMessage(messages, ws, (msg) => msg.type === 'session_list' && msg.sessions.some((s) => s.id === codexSession.sessionId && s.pinnedAt));
assert(pinnedList.sessions[0].id === codexSession.sessionId, 'Pinned session should sort before regular sessions');
let storedPinnedSession = JSON.parse(fs.readFileSync(path.join(sessionsDir, `${codexSession.sessionId}.json`), 'utf8'));
assert(storedPinnedSession.pinnedAt === pinnedAck.pinnedAt, 'Pinned state should persist to session JSON');
ws.send(JSON.stringify({ type: 'set_session_pinned', sessionId: codexSession.sessionId, pinned: false }));
const unpinnedAck = await nextMessage(messages, ws, (msg) => msg.type === 'session_pinned' && msg.sessionId === codexSession.sessionId && !msg.pinnedAt);
assert(unpinnedAck.pinnedAt === null, 'Unpinning a session should clear pinnedAt');
await nextMessage(messages, ws, (msg) => msg.type === 'session_list' && msg.sessions.some((s) => s.id === codexSession.sessionId && !s.pinnedAt));
storedPinnedSession = JSON.parse(fs.readFileSync(path.join(sessionsDir, `${codexSession.sessionId}.json`), 'utf8'));
assert(storedPinnedSession.pinnedAt === null, 'Unpinned state should persist to session JSON');
ws.send(JSON.stringify({ type: 'composer_suggestions', requestId: 'reg-slash', trigger: '/', query: 'mo', sessionId: codexSession.sessionId, agent: 'codex' }));
const slashComposer = await nextMessage(messages, ws, (msg) => msg.type === 'composer_suggestions' && msg.requestId === 'reg-slash');
assert(slashComposer.items.some((item) => item.kind === 'command' && item.name === '/model'), 'Composer slash suggestions should include /model');
ws.send(JSON.stringify({ type: 'composer_suggestions', requestId: 'reg-skill', trigger: '$', query: 'reg', sessionId: codexSession.sessionId, agent: 'codex' }));
const skillComposer = await nextMessage(messages, ws, (msg) => msg.type === 'composer_suggestions' && msg.requestId === 'reg-skill');
assert(skillComposer.items.some((item) => item.kind === 'skill' && item.name === 'regression-skill'), 'Composer skill suggestions should include local Codex skill');
ws.send(JSON.stringify({ type: 'composer_suggestions', requestId: 'reg-prompt', trigger: '@', query: 'ship', sessionId: codexSession.sessionId, agent: 'codex' }));
const promptComposer = await nextMessage(messages, ws, (msg) => msg.type === 'composer_suggestions' && msg.requestId === 'reg-prompt');
assert(promptComposer.items.some((item) => item.kind === 'prompt' && item.name === 'shipit'), 'Composer prompt suggestions should include configured prompt');
ws.send(JSON.stringify({ type: 'composer_suggestions', requestId: 'reg-file', trigger: '@', query: 'context', sessionId: codexSession.sessionId, agent: 'codex' }));
const fileComposer = await nextMessage(messages, ws, (msg) => msg.type === 'composer_suggestions' && msg.requestId === 'reg-file');
assert(fileComposer.items.some((item) => item.kind === 'file' && item.name === 'context.txt'), 'Composer file suggestions should include cwd file');
ws.send(JSON.stringify({
type: 'message',
text: '@shipit @context.txt $regression-skill run composer regression',
sessionId: codexSession.sessionId,
mode: 'plan',
agent: 'codex',
}));
const composerExpanded = await nextMessage(messages, ws, (msg) => (
msg.type === 'text_delta' &&
/BEGIN CC-WEB PROMPT: shipit/.test(msg.text || '') &&
/Composer file context body/.test(msg.text || '')
));
assert(/Regression prompt body from @shipit/.test(composerExpanded.text || ''), 'Composer runtime prompt should expand @prompt content');
await nextMessage(messages, ws, (msg) => msg.type === 'done' && msg.sessionId === codexSession.sessionId);
const storedComposerSession = JSON.parse(fs.readFileSync(path.join(sessionsDir, `${codexSession.sessionId}.json`), 'utf8'));
const storedComposerMessage = storedComposerSession.messages.find((message) => message.content === '@shipit @context.txt $regression-skill run composer regression');
assert(storedComposerMessage, 'Composer message should persist original user text');
assert(storedComposerMessage.composerMentions?.some((mention) => mention.kind === 'prompt' && mention.name === 'shipit'), 'Composer message should persist prompt mention metadata');
assert(storedComposerMessage.composerMentions?.some((mention) => mention.kind === 'file' && mention.name === 'context.txt'), 'Composer message should persist file mention metadata');
assert(storedComposerMessage.composerMentions?.some((mention) => mention.kind === 'skill' && mention.name === 'regression-skill'), 'Composer message should persist skill mention metadata');
const mcpList = await callInternalMcp(port, internalMcpToken, {
tool: 'ccweb_list_conversations',
sourceSessionId: codexSession.sessionId,
@@ -504,21 +581,76 @@ async function main() {
assert(crossUserBubble.message.crossConversation.hopCount === 1, 'Cross message should persist hop count');
await nextMessage(messages, ws, (msg) => msg.type === 'done' && msg.sessionId === crossTargetSession.sessionId);
const storedCrossTarget = JSON.parse(fs.readFileSync(path.join(sessionsDir, `${crossTargetSession.sessionId}.json`), 'utf8'));
const storedCrossSource = JSON.parse(fs.readFileSync(path.join(sessionsDir, `${codexSession.sessionId}.json`), 'utf8'));
const storedCrossMessage = storedCrossTarget.messages.find((message) => message.crossConversation?.messageId === crossSend.body.messageId);
assert(storedCrossMessage?.content === 'cross hello from mcp', 'Cross message should be persisted in target session');
assert(storedCrossMessage.crossConversation.sourceTitle === codexSession.title, 'Cross message should persist source title');
assert(storedCrossMessage.crossConversation.sourceTitle === storedCrossSource.title, 'Cross message should persist source title');
assert(storedCrossTarget.messages.some((message) => message.role === 'assistant' && /来自/.test(String(message.content || ''))), 'Cross message runtime prompt should include source context for the target agent');
const hopLimit = await callInternalMcp(port, internalMcpToken, {
const hopAllowed = await callInternalMcp(port, internalMcpToken, {
tool: 'ccweb_send_message',
sourceSessionId: crossTargetSession.sessionId,
sourceSessionId: codexSession.sessionId,
sourceHopCount: 1,
args: {
targetConversationId: codexSession.sessionId,
content: 'this should be blocked by hop limit',
targetConversationId: crossTargetSession.sessionId,
content: 'cross hop still allowed',
},
});
assert(hopLimit.status === 400 && hopLimit.body?.code === 'hop_limit_exceeded', 'MCP cross send should enforce hop limit');
assert(hopAllowed.status === 200 && hopAllowed.body?.ok, `MCP cross send should not enforce hop limit: ${JSON.stringify(hopAllowed.body)}`);
const hopAllowedBubble = await nextMessage(messages, ws, (msg) => (
msg.type === 'session_message' &&
msg.sessionId === crossTargetSession.sessionId &&
msg.message?.crossConversation?.messageId === hopAllowed.body.messageId &&
msg.message?.content === 'cross hop still allowed'
));
assert(hopAllowedBubble.message.crossConversation.hopCount === 2, 'Cross message should keep incrementing hop count without blocking');
await nextMessage(messages, ws, (msg) => msg.type === 'done' && msg.sessionId === crossTargetSession.sessionId);
const crossReplyTargetCwd = path.join(tempRoot, 'codex-mcp-cross-reply-target');
mkdirp(crossReplyTargetCwd);
ws.send(JSON.stringify({ type: 'new_session', agent: 'codex', cwd: crossReplyTargetCwd, mode: 'yolo' }));
const crossReplyTargetSession = await nextMessage(messages, ws, (msg) => msg.type === 'session_info' && msg.agent === 'codex' && msg.cwd === crossReplyTargetCwd);
const requestReply = await callInternalMcp(port, internalMcpToken, {
tool: 'ccweb_request_reply',
sourceSessionId: codexSession.sessionId,
sourceHopCount: 0,
args: {
targetConversationId: crossReplyTargetSession.sessionId,
content: 'cross reply requested',
},
});
assert(requestReply.status === 200 && requestReply.body?.ok, `MCP request reply should succeed: ${JSON.stringify(requestReply.body)}`);
assert(requestReply.body.requestId && requestReply.body.status === 'waiting', 'MCP request reply should return a waiting request id');
const requestReplyTargetBubble = await nextMessage(messages, ws, (msg) => (
msg.type === 'session_message' &&
msg.sessionId === crossReplyTargetSession.sessionId &&
msg.message?.crossConversation?.replyRequestId === requestReply.body.requestId &&
msg.message?.crossConversation?.expectsReply === true &&
msg.message?.content === 'cross reply requested'
));
assert(requestReplyTargetBubble.message.crossConversation.hopCount === 1, 'Request reply target message should persist hop count');
await nextMessage(messages, ws, (msg) => msg.type === 'done' && msg.sessionId === crossReplyTargetSession.sessionId);
await nextMessage(messages, ws, (msg) => (
(msg.type === 'done' || msg.type === 'background_done') &&
msg.sessionId === codexSession.sessionId
));
const storedReplyTarget = JSON.parse(fs.readFileSync(path.join(sessionsDir, `${crossReplyTargetSession.sessionId}.json`), 'utf8'));
const storedReplySource = JSON.parse(fs.readFileSync(path.join(sessionsDir, `${codexSession.sessionId}.json`), 'utf8'));
const storedReplyRequestMessage = storedReplyTarget.messages.find((message) => message.crossConversation?.replyRequestId === requestReply.body.requestId);
assert(storedReplyRequestMessage?.crossConversation?.expectsReply === true, 'Request reply target message should persist waiting metadata');
assert(storedReplyTarget.messages.some((message) => message.role === 'assistant' && /cross reply requested/.test(String(message.content || ''))), 'Request reply target should produce an assistant reply');
const storedReplyMessageIndex = storedReplySource.messages.findIndex((message) => message.crossConversation?.replyToRequestId === requestReply.body.requestId);
assert(storedReplyMessageIndex >= 0, 'Request reply should send the target reply back to source session');
const storedReplyMessage = storedReplySource.messages[storedReplyMessageIndex];
assert(storedReplyMessage.crossConversation.reply === true, 'Returned cross message should be marked as a reply');
assert(/线程「/.test(storedReplyMessage.content || '') && /已返回消息/.test(storedReplyMessage.content || ''), 'Returned cross message should include reply heading');
assert(/Codex mock handled/.test(storedReplyMessage.content || ''), 'Returned cross message should include target assistant output');
assert(storedReplySource.messages.slice(storedReplyMessageIndex + 1).some((message) => (
message.role === 'assistant' &&
/Codex mock handled/.test(String(message.content || '')) &&
/已返回消息/.test(String(message.content || ''))
)), 'Returned cross message should trigger the source session to run again');
const processLogAfterMcp = fs.readFileSync(path.join(logsDir, 'process.log'), 'utf8');
const mcpSpawnLine = processLogAfterMcp
@@ -634,6 +766,100 @@ async function main() {
assert(/trigger codex context limit/.test(autoCompactRetry.text || ''), 'Codex auto /compact should replay the failed prompt after compact');
}
const codexAppCwd = path.join(tempRoot, 'codexapp-space');
mkdirp(codexAppCwd);
ws.send(JSON.stringify({ type: 'new_session', agent: 'codexapp', cwd: codexAppCwd, mode: 'yolo' }));
const codexAppSession = await nextMessage(messages, ws, (msg) => msg.type === 'session_info' && msg.agent === 'codexapp' && msg.cwd === codexAppCwd);
assert(codexAppSession.model === 'gpt-5.5(xhigh)', 'Codex App new_session should read default Codex model');
ws.send(JSON.stringify({ type: 'composer_suggestions', requestId: 'reg-codexapp-skill', trigger: '$', query: 'reg', sessionId: codexAppSession.sessionId, agent: 'codexapp' }));
const codexAppSkillComposer = await nextMessage(messages, ws, (msg) => msg.type === 'composer_suggestions' && msg.requestId === 'reg-codexapp-skill');
assert(codexAppSkillComposer.items.some((item) => item.kind === 'skill' && item.name === 'regression-skill'), 'Codex App composer skill suggestions should include local Codex skill');
ws.send(JSON.stringify({ type: 'message', text: 'codexapp collaboration default probe', sessionId: codexAppSession.sessionId, mode: 'yolo', agent: 'codexapp' }));
const codexAppDefaultCollab = await nextMessage(messages, ws, (msg) => msg.type === 'text_delta' && msg.sessionId === codexAppSession.sessionId && /collaboration mode:/.test(msg.text || ''));
assert(/"mode":"default"/.test(codexAppDefaultCollab.text || ''), 'Codex App YOLO mode should pass default collaboration mode');
assert(/"hasModel":true/.test(codexAppDefaultCollab.text || ''), 'Codex App collaboration settings should include model');
assert(/"hasDeveloperInstructions":true/.test(codexAppDefaultCollab.text || ''), 'Codex App collaboration settings should include sub-agent developer instructions');
assert(/"hasTopLevelModel":false/.test(codexAppDefaultCollab.text || ''), 'Codex App collaboration turn should not duplicate model at top level');
assert(/"hasTopLevelEffort":false/.test(codexAppDefaultCollab.text || ''), 'Codex App collaboration turn should not duplicate effort at top level');
await nextMessage(messages, ws, (msg) => msg.type === 'done' && msg.sessionId === codexAppSession.sessionId);
ws.send(JSON.stringify({ type: 'message', text: 'codexapp empty reasoning prompt', sessionId: codexAppSession.sessionId, mode: 'yolo', agent: 'codexapp' }));
await nextMessage(messages, ws, (msg) => msg.type === 'done' && msg.sessionId === codexAppSession.sessionId);
const storedCodexAppAfterReasoning = JSON.parse(fs.readFileSync(path.join(sessionsDir, `${codexAppSession.sessionId}.json`), 'utf8'));
const hasEmptyReasoningTool = storedCodexAppAfterReasoning.messages
.flatMap((message) => Array.isArray(message.toolCalls) ? message.toolCalls : [])
.some((tool) => (tool.kind === 'reasoning' || tool.meta?.kind === 'reasoning') && !String(tool.result || '').trim());
assert(!hasEmptyReasoningTool, 'Codex App should not persist empty reasoning tool calls');
ws.send(JSON.stringify({ type: 'message', text: 'codexapp tool prompt', sessionId: codexAppSession.sessionId, mode: 'yolo', agent: 'codexapp' }));
await nextMessage(messages, ws, (msg) => msg.type === 'session_list' && msg.sessions.some((s) => s.id === codexAppSession.sessionId && s.isRunning));
const codexAppTool = await nextMessage(messages, ws, (msg) => msg.type === 'tool_end' && msg.sessionId === codexAppSession.sessionId && msg.toolUseId === 'tool-cmd');
assert(/codexapp/.test(codexAppTool.result || ''), 'Codex App should stream app-server tool results');
await nextMessage(messages, ws, (msg) => msg.type === 'done' && msg.sessionId === codexAppSession.sessionId);
let storedCodexApp = JSON.parse(fs.readFileSync(path.join(sessionsDir, `${codexAppSession.sessionId}.json`), 'utf8'));
const codexAppThreadId = storedCodexApp.codexAppThreadId;
assert(codexAppThreadId, 'Codex App thread id should be persisted');
assert(storedCodexApp.messages.some((message) => message.role === 'assistant' && /codexapp tool prompt/.test(String(message.content || ''))), 'Codex App assistant response should be persisted');
assert((storedCodexApp.totalUsage?.inputTokens || 0) > 0, 'Codex App token usage should be persisted');
ws.send(JSON.stringify({ type: 'message', text: 'codexapp dynamic prompt', sessionId: codexAppSession.sessionId, mode: 'yolo', agent: 'codexapp' }));
const codexAppDynamicTool = await nextMessage(messages, ws, (msg) => msg.type === 'tool_end' && msg.sessionId === codexAppSession.sessionId && msg.toolUseId === 'mcp-ccweb-list');
assert(codexAppDynamicTool.kind === 'mcp_tool_call', 'Codex App should surface ccweb MCP tool calls');
assert(/currentConversationId/.test(codexAppDynamicTool.result || ''), 'Codex App MCP tool should return ccweb conversation data');
assert(/"hasCcwebMcpConfig": true/.test(codexAppDynamicTool.result || ''), 'Codex App thread/start should pass ccweb MCP config');
await nextMessage(messages, ws, (msg) => msg.type === 'done' && msg.sessionId === codexAppSession.sessionId);
ws.send(JSON.stringify({ type: 'message', text: 'codexapp collaboration plan probe', sessionId: codexAppSession.sessionId, mode: 'plan', agent: 'codexapp' }));
const codexAppPlanCollab = await nextMessage(messages, ws, (msg) => msg.type === 'text_delta' && msg.sessionId === codexAppSession.sessionId && /collaboration mode:/.test(msg.text || ''));
assert(/"mode":"plan"/.test(codexAppPlanCollab.text || ''), 'Codex App Plan mode should pass plan collaboration mode');
assert(/"hasDeveloperInstructions":true/.test(codexAppPlanCollab.text || ''), 'Codex App Plan collaboration settings should keep sub-agent developer instructions');
assert(/"hasTopLevelModel":false/.test(codexAppPlanCollab.text || ''), 'Codex App Plan collaboration turn should not duplicate model at top level');
assert(/"hasTopLevelEffort":false/.test(codexAppPlanCollab.text || ''), 'Codex App Plan collaboration turn should not duplicate effort at top level');
await nextMessage(messages, ws, (msg) => msg.type === 'done' && msg.sessionId === codexAppSession.sessionId);
ws.send(JSON.stringify({ type: 'message', text: 'codexapp guided prompt', sessionId: codexAppSession.sessionId, mode: 'plan', agent: 'codexapp' }));
const guidedRequest = await nextMessage(messages, ws, (msg) => msg.type === 'codex_app_user_input_request' && msg.sessionId === codexAppSession.sessionId);
assert(guidedRequest.questions?.[0]?.id === 'choice', 'Codex App should forward request_user_input questions');
ws.send(JSON.stringify({
type: 'codex_app_user_input_response',
sessionId: codexAppSession.sessionId,
requestId: guidedRequest.requestId,
answers: { choice: { answers: ['A'] } },
}));
const guidedDelta = await nextMessage(messages, ws, (msg) => msg.type === 'text_delta' && msg.sessionId === codexAppSession.sessionId && /guided answer: A/.test(msg.text || ''));
assert(/guided answer: A/.test(guidedDelta.text || ''), 'Codex App should continue after guided input response');
await nextMessage(messages, ws, (msg) => msg.type === 'done' && msg.sessionId === codexAppSession.sessionId);
ws.send(JSON.stringify({ type: 'message', text: 'slow codexapp prompt', sessionId: codexAppSession.sessionId, mode: 'yolo', agent: 'codexapp' }));
await nextMessage(messages, ws, (msg) => msg.type === 'session_list' && msg.sessions.some((s) => s.id === codexAppSession.sessionId && s.isRunning));
await sleep(150);
ws.send(JSON.stringify({ type: 'message', text: 'runtime steer insert', sessionId: codexAppSession.sessionId, mode: 'yolo', agent: 'codexapp' }));
const steerDelta = await nextMessage(messages, ws, (msg) => msg.type === 'text_delta' && msg.sessionId === codexAppSession.sessionId && /steer accepted: runtime steer insert/.test(msg.text || ''));
assert(/runtime steer insert/.test(steerDelta.text || ''), 'Codex App running message should use turn/steer');
await nextMessage(messages, ws, (msg) => msg.type === 'done' && msg.sessionId === codexAppSession.sessionId);
storedCodexApp = JSON.parse(fs.readFileSync(path.join(sessionsDir, `${codexAppSession.sessionId}.json`), 'utf8'));
assert(storedCodexApp.codexAppThreadId === codexAppThreadId, 'Codex App follow-up should resume the same app-server thread');
assert(storedCodexApp.messages.some((message) => message.role === 'user' && message.content === 'runtime steer insert'), 'Codex App steer message should be persisted as user history');
assert(storedCodexApp.messages.some((message) => message.role === 'assistant' && /runtime steer insert/.test(String(message.content || ''))), 'Codex App steered assistant output should be persisted');
ws.send(JSON.stringify({ type: 'message', text: 'slow codexapp abort prompt', sessionId: codexAppSession.sessionId, mode: 'yolo', agent: 'codexapp' }));
await nextMessage(messages, ws, (msg) => msg.type === 'session_list' && msg.sessions.some((s) => s.id === codexAppSession.sessionId && s.isRunning));
const codexAppRunningMcp = await callInternalMcp(port, internalMcpToken, {
tool: 'ccweb_send_message',
sourceSessionId: codexSession.sessionId,
sourceHopCount: 0,
args: {
targetConversationId: codexAppSession.sessionId,
content: 'running codexapp target should reject this',
},
});
assert(codexAppRunningMcp.status === 400 && codexAppRunningMcp.body?.code === 'target_running', 'MCP cross send should reject running Codex App targets');
await sleep(150);
ws.send(JSON.stringify({ type: 'abort' }));
await nextMessage(messages, ws, (msg) => msg.type === 'done' && msg.sessionId === codexAppSession.sessionId);
const claudeAttachment = await uploadAttachment(port, token, {
filename: 'claude-test.png',
mime: 'image/png',