From 85661a5ba951bbf7c90e0beb7b66165c07908290 Mon Sep 17 00:00:00 2001 From: SpecialX <47072643+wangxiner55@users.noreply.github.com> Date: Wed, 24 Jun 2026 14:53:05 +0800 Subject: [PATCH] fix(exams): auto-detect composite sub-questions from text patterns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Root cause: When users paste reading comprehension content into a composite question block without manually marking each sub-question as a questionBlock, the sub-questions were treated as plain text and merged into the question stem. This caused: 1. Sub-questions not showing in the preview's sub-question area 2. Text from different paragraphs being concatenated without newlines 3. "A." appearing because parseOptions misidentified list items Fix: 1. extractText now inserts newlines between paragraphs/listItems, preserving text structure so pattern detection can work 2. Added detectSubQuestionsFromText: for composite questions without explicit questionBlock children, auto-detect sub-questions from text patterns: - Numbered lines: "1.xxx", "2.xxx", "(1)xxx", "①xxx" - Lines with score markers: "xxx(3分)" - If numbered sub-questions are found, check preceding lines for un-numbered sub-questions (e.g., the first sub-question that lacks a number but has a score marker) 3. extractMaterialText removes detected sub-question text from the question stem, keeping only the reading material/passage This allows users to paste reading comprehension content directly into a composite question block and have sub-questions automatically detected, without needing to manually mark each one. --- .../exams/editor/editor-to-structure.ts | 134 ++++++++++++++++-- 1 file changed, 124 insertions(+), 10 deletions(-) diff --git a/src/modules/exams/editor/editor-to-structure.ts b/src/modules/exams/editor/editor-to-structure.ts index 64610ab..7ca19d8 100644 --- a/src/modules/exams/editor/editor-to-structure.ts +++ b/src/modules/exams/editor/editor-to-structure.ts @@ -11,7 +11,19 @@ import type { const extractText = (node: JSONContent | undefined): string => { if (!node) return "" if (node.type === "text") return node.text ?? "" - if (Array.isArray(node.content)) return node.content.map(extractText).join("") + if (Array.isArray(node.content)) { + // 段落之间插入换行符,避免不同段落文本被直接连接 + return node.content + .map((child) => { + const text = extractText(child) + // 段落/列表项后加换行,保持文本结构 + if (child.type === "paragraph" || child.type === "listItem") { + return text + "\n" + } + return text + }) + .join("") + } return "" } @@ -106,15 +118,14 @@ const buildQuestion = (qb: JSONContent): EditorQuestion => { return true }) - const text = extractText({ - type: "doc", - content: nonQuestionBlocks.filter( - (n) => - n.type !== "orderedList" && - n.type !== "bulletList" && - n.type !== "image" - ), - }) + // 提取题干文本(过滤掉列表和图片,它们单独处理) + const textNodes = nonQuestionBlocks.filter( + (n) => + n.type !== "orderedList" && + n.type !== "bulletList" && + n.type !== "image" + ) + const text = extractText({ type: "doc", content: textNodes }) const options = parseOptions(nonQuestionBlocks) const blanks = collectBlanks(nonQuestionBlocks) const images = collectImages(nonQuestionBlocks) @@ -123,9 +134,112 @@ const buildQuestion = (qb: JSONContent): EditorQuestion => { if (blanks.length > 0) content.blanks = blanks if (images.length > 0) content.images = images if (subQuestions.length > 0) content.subQuestions = subQuestions + + // 复合题:如果没有显式子题,尝试从文本模式识别子题 + // (如 "1.xxx", "2.xxx", "(1)xxx", "①xxx" 等) + if (type === "composite" && subQuestions.length === 0 && content.text) { + const detected = detectSubQuestionsFromText(content.text) + if (detected.length > 0) { + content.subQuestions = detected + // 移除被识别为子题的文本,保留选段/材料部分 + const materialText = extractMaterialText(content.text, detected) + content.text = materialText + } + } + return { id, type, score, content } } +/** + * 从文本中检测子题(如 "1.xxx", "2.xxx", "(1)xxx", "①xxx" 等) + * 返回检测到的子题列表(不含原文中的材料部分) + * + * 检测策略: + * 1. 优先识别带编号的行(1.xxx, (1)xxx, ①xxx 等) + * 2. 如果检测到编号子题,且其前一行带分值(如"xxx(3分)"), + * 则把前一行也作为子题1 + */ +const detectSubQuestionsFromText = ( + text: string +): Array<{ id: string; text: string; score?: number }> => { + const lines = text.split("\n").map((l) => l.trim()).filter(Boolean) + const subQuestionPattern = /^(?:\(?(\d+)\)?|①|②|③|④|⑤|⑥|⑦|⑧|⑨|⑩)[.、))]?\s*(.+)/ + const scorePattern = /[((](\d+)\s*分[))]/ + const subs: Array<{ id: string; text: string; score?: number }> = [] + + // 先找所有带编号的子题 + const numberedIndices: number[] = [] + for (let i = 0; i < lines.length; i++) { + if (lines[i].match(subQuestionPattern)) { + numberedIndices.push(i) + } + } + + if (numberedIndices.length === 0) return [] + + // 如果第一个编号子题前面有带分值的行,把它们也作为子题 + const firstNumberedIdx = numberedIndices[0] + if (firstNumberedIdx > 0) { + // 检查前一行是否带分值(可能是未编号的子题1) + for (let i = firstNumberedIdx - 1; i >= 0; i--) { + const line = lines[i] + const scoreMatch = line.match(scorePattern) + if (scoreMatch && line.length < 100) { + // 短行 + 带分值 = 可能是子题 + subs.unshift({ + id: createId(), + text: line, + score: Number(scoreMatch[1]), + }) + } else { + break + } + } + } + + // 处理带编号的子题 + let currentSub: { id: string; text: string } | null = null + for (let i = firstNumberedIdx; i < lines.length; i++) { + const line = lines[i] + const match = line.match(subQuestionPattern) + if (match) { + if (currentSub) subs.push(currentSub) + const content = match[2] || "" + const scoreMatch = content.match(scorePattern) + const score = scoreMatch ? Number(scoreMatch[1]) : undefined + currentSub = { id: createId(), text: content } + if (score !== undefined) { + subs.push({ ...currentSub, score }) + currentSub = null + } + } else if (currentSub) { + currentSub.text += "\n" + line + } + } + if (currentSub) subs.push(currentSub) + + return subs +} + +/** + * 从原文中提取材料文本(移除被识别为子题的部分) + */ +const extractMaterialText = ( + fullText: string, + subs: Array<{ id: string; text: string }> +): string => { + let material = fullText + for (const sub of subs) { + // 移除子题文本(取第一行作为匹配依据) + const firstLine = sub.text.split("\n")[0] + if (firstLine) { + material = material.replace(firstLine, "") + } + } + // 清理多余的空行 + return material.replace(/\n{3,}/g, "\n\n").trim() +} + /** 统计结构节点的题目数和总分(递归) */ const computeStats = (node: EditorStructureNode): { count: number; score: number } => { if (node.type === "question") {