From 85661a5ba951bbf7c90e0beb7b66165c07908290 Mon Sep 17 00:00:00 2001
From: SpecialX <47072643+wangxiner55@users.noreply.github.com>
Date: Wed, 24 Jun 2026 14:53:05 +0800
Subject: [PATCH] fix(exams): auto-detect composite sub-questions from text
 patterns
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Root cause: When users paste reading comprehension content into a
composite question block without manually marking each sub-question
as a questionBlock, the sub-questions were treated as plain text and
merged into the question stem. This caused:
1. Sub-questions not showing in the preview's sub-question area
2. Text from different paragraphs being concatenated without newlines
3. "A." appearing because parseOptions misidentified list items

Fix:
1. extractText now inserts newlines between paragraphs/listItems,
   preserving text structure so pattern detection can work
2. Added detectSubQuestionsFromText: for composite questions without
   explicit questionBlock children, auto-detect sub-questions from
   text patterns:
   - Numbered lines: "1.xxx", "2.xxx", "(1)xxx", "①xxx"
   - Lines with score markers: "xxx（3分）"
   - If numbered sub-questions are found, check preceding lines for
     un-numbered sub-questions (e.g., the first sub-question that
     lacks a number but has a score marker)
3. extractMaterialText removes detected sub-question text from the
   question stem, keeping only the reading material/passage

This allows users to paste reading comprehension content directly
into a composite question block and have sub-questions automatically
detected, without needing to manually mark each one.
---
 .../exams/editor/editor-to-structure.ts       | 134 ++++++++++++++++--
 1 file changed, 124 insertions(+), 10 deletions(-)

diff --git a/src/modules/exams/editor/editor-to-structure.ts b/src/modules/exams/editor/editor-to-structure.ts
index 64610ab..7ca19d8 100644
--- a/src/modules/exams/editor/editor-to-structure.ts
+++ b/src/modules/exams/editor/editor-to-structure.ts
@@ -11,7 +11,19 @@ import type {
 const extractText = (node: JSONContent | undefined): string => {
   if (!node) return ""
   if (node.type === "text") return node.text ?? ""
-  if (Array.isArray(node.content)) return node.content.map(extractText).join("")
+  if (Array.isArray(node.content)) {
+    // 段落之间插入换行符,避免不同段落文本被直接连接
+    return node.content
+      .map((child) => {
+        const text = extractText(child)
+        // 段落/列表项后加换行,保持文本结构
+        if (child.type === "paragraph" || child.type === "listItem") {
+          return text + "\n"
+        }
+        return text
+      })
+      .join("")
+  }
   return ""
 }
 
@@ -106,15 +118,14 @@ const buildQuestion = (qb: JSONContent): EditorQuestion => {
     return true
   })
 
-  const text = extractText({
-    type: "doc",
-    content: nonQuestionBlocks.filter(
-      (n) =>
-        n.type !== "orderedList" &&
-        n.type !== "bulletList" &&
-        n.type !== "image"
-    ),
-  })
+  // 提取题干文本(过滤掉列表和图片,它们单独处理)
+  const textNodes = nonQuestionBlocks.filter(
+    (n) =>
+      n.type !== "orderedList" &&
+      n.type !== "bulletList" &&
+      n.type !== "image"
+  )
+  const text = extractText({ type: "doc", content: textNodes })
   const options = parseOptions(nonQuestionBlocks)
   const blanks = collectBlanks(nonQuestionBlocks)
   const images = collectImages(nonQuestionBlocks)
@@ -123,9 +134,112 @@ const buildQuestion = (qb: JSONContent): EditorQuestion => {
   if (blanks.length > 0) content.blanks = blanks
   if (images.length > 0) content.images = images
   if (subQuestions.length > 0) content.subQuestions = subQuestions
+
+  // 复合题:如果没有显式子题,尝试从文本模式识别子题
+  // (如 "1.xxx", "2.xxx", "(1)xxx", "①xxx" 等)
+  if (type === "composite" && subQuestions.length === 0 && content.text) {
+    const detected = detectSubQuestionsFromText(content.text)
+    if (detected.length > 0) {
+      content.subQuestions = detected
+      // 移除被识别为子题的文本,保留选段/材料部分
+      const materialText = extractMaterialText(content.text, detected)
+      content.text = materialText
+    }
+  }
+
   return { id, type, score, content }
 }
 
+/**
+ * 从文本中检测子题(如 "1.xxx", "2.xxx", "(1)xxx", "①xxx" 等)
+ * 返回检测到的子题列表(不含原文中的材料部分)
+ *
+ * 检测策略:
+ * 1. 优先识别带编号的行(1.xxx, (1)xxx, ①xxx 等)
+ * 2. 如果检测到编号子题,且其前一行带分值(如"xxx（3分）"),
+ *    则把前一行也作为子题1
+ */
+const detectSubQuestionsFromText = (
+  text: string
+): Array<{ id: string; text: string; score?: number }> => {
+  const lines = text.split("\n").map((l) => l.trim()).filter(Boolean)
+  const subQuestionPattern = /^(?:\(?(\d+)\)?|①|②|③|④|⑤|⑥|⑦|⑧|⑨|⑩)[.、)）]?\s*(.+)/
+  const scorePattern = /[（(](\d+)\s*分[）)]/
+  const subs: Array<{ id: string; text: string; score?: number }> = []
+
+  // 先找所有带编号的子题
+  const numberedIndices: number[] = []
+  for (let i = 0; i < lines.length; i++) {
+    if (lines[i].match(subQuestionPattern)) {
+      numberedIndices.push(i)
+    }
+  }
+
+  if (numberedIndices.length === 0) return []
+
+  // 如果第一个编号子题前面有带分值的行,把它们也作为子题
+  const firstNumberedIdx = numberedIndices[0]
+  if (firstNumberedIdx > 0) {
+    // 检查前一行是否带分值(可能是未编号的子题1)
+    for (let i = firstNumberedIdx - 1; i >= 0; i--) {
+      const line = lines[i]
+      const scoreMatch = line.match(scorePattern)
+      if (scoreMatch && line.length < 100) {
+        // 短行 + 带分值 = 可能是子题
+        subs.unshift({
+          id: createId(),
+          text: line,
+          score: Number(scoreMatch[1]),
+        })
+      } else {
+        break
+      }
+    }
+  }
+
+  // 处理带编号的子题
+  let currentSub: { id: string; text: string } | null = null
+  for (let i = firstNumberedIdx; i < lines.length; i++) {
+    const line = lines[i]
+    const match = line.match(subQuestionPattern)
+    if (match) {
+      if (currentSub) subs.push(currentSub)
+      const content = match[2] || ""
+      const scoreMatch = content.match(scorePattern)
+      const score = scoreMatch ? Number(scoreMatch[1]) : undefined
+      currentSub = { id: createId(), text: content }
+      if (score !== undefined) {
+        subs.push({ ...currentSub, score })
+        currentSub = null
+      }
+    } else if (currentSub) {
+      currentSub.text += "\n" + line
+    }
+  }
+  if (currentSub) subs.push(currentSub)
+
+  return subs
+}
+
+/**
+ * 从原文中提取材料文本(移除被识别为子题的部分)
+ */
+const extractMaterialText = (
+  fullText: string,
+  subs: Array<{ id: string; text: string }>
+): string => {
+  let material = fullText
+  for (const sub of subs) {
+    // 移除子题文本(取第一行作为匹配依据)
+    const firstLine = sub.text.split("\n")[0]
+    if (firstLine) {
+      material = material.replace(firstLine, "")
+    }
+  }
+  // 清理多余的空行
+  return material.replace(/\n{3,}/g, "\n\n").trim()
+}
+
 /** 统计结构节点的题目数和总分(递归) */
 const computeStats = (node: EditorStructureNode): { count: number; score: number } => {
   if (node.type === "question") {