fix(exams): auto-detect composite sub-questions from text patterns
Root cause: When users paste reading comprehension content into a
composite question block without manually marking each sub-question
as a questionBlock, the sub-questions were treated as plain text and
merged into the question stem. This caused:
1. Sub-questions not showing in the preview's sub-question area
2. Text from different paragraphs being concatenated without newlines
3. "A." appearing because parseOptions misidentified list items
Fix:
1. extractText now inserts newlines between paragraphs/listItems,
preserving text structure so pattern detection can work
2. Added detectSubQuestionsFromText: for composite questions without
explicit questionBlock children, auto-detect sub-questions from
text patterns:
- Numbered lines: "1.xxx", "2.xxx", "(1)xxx", "①xxx"
- Lines with score markers: "xxx(3分)"
- If numbered sub-questions are found, check preceding lines for
un-numbered sub-questions (e.g., the first sub-question that
lacks a number but has a score marker)
3. extractMaterialText removes detected sub-question text from the
question stem, keeping only the reading material/passage
This allows users to paste reading comprehension content directly
into a composite question block and have sub-questions automatically
detected, without needing to manually mark each one.
This commit is contained in:
@@ -11,7 +11,19 @@ import type {
|
|||||||
const extractText = (node: JSONContent | undefined): string => {
|
const extractText = (node: JSONContent | undefined): string => {
|
||||||
if (!node) return ""
|
if (!node) return ""
|
||||||
if (node.type === "text") return node.text ?? ""
|
if (node.type === "text") return node.text ?? ""
|
||||||
if (Array.isArray(node.content)) return node.content.map(extractText).join("")
|
if (Array.isArray(node.content)) {
|
||||||
|
// 段落之间插入换行符,避免不同段落文本被直接连接
|
||||||
|
return node.content
|
||||||
|
.map((child) => {
|
||||||
|
const text = extractText(child)
|
||||||
|
// 段落/列表项后加换行,保持文本结构
|
||||||
|
if (child.type === "paragraph" || child.type === "listItem") {
|
||||||
|
return text + "\n"
|
||||||
|
}
|
||||||
|
return text
|
||||||
|
})
|
||||||
|
.join("")
|
||||||
|
}
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -106,15 +118,14 @@ const buildQuestion = (qb: JSONContent): EditorQuestion => {
|
|||||||
return true
|
return true
|
||||||
})
|
})
|
||||||
|
|
||||||
const text = extractText({
|
// 提取题干文本(过滤掉列表和图片,它们单独处理)
|
||||||
type: "doc",
|
const textNodes = nonQuestionBlocks.filter(
|
||||||
content: nonQuestionBlocks.filter(
|
(n) =>
|
||||||
(n) =>
|
n.type !== "orderedList" &&
|
||||||
n.type !== "orderedList" &&
|
n.type !== "bulletList" &&
|
||||||
n.type !== "bulletList" &&
|
n.type !== "image"
|
||||||
n.type !== "image"
|
)
|
||||||
),
|
const text = extractText({ type: "doc", content: textNodes })
|
||||||
})
|
|
||||||
const options = parseOptions(nonQuestionBlocks)
|
const options = parseOptions(nonQuestionBlocks)
|
||||||
const blanks = collectBlanks(nonQuestionBlocks)
|
const blanks = collectBlanks(nonQuestionBlocks)
|
||||||
const images = collectImages(nonQuestionBlocks)
|
const images = collectImages(nonQuestionBlocks)
|
||||||
@@ -123,9 +134,112 @@ const buildQuestion = (qb: JSONContent): EditorQuestion => {
|
|||||||
if (blanks.length > 0) content.blanks = blanks
|
if (blanks.length > 0) content.blanks = blanks
|
||||||
if (images.length > 0) content.images = images
|
if (images.length > 0) content.images = images
|
||||||
if (subQuestions.length > 0) content.subQuestions = subQuestions
|
if (subQuestions.length > 0) content.subQuestions = subQuestions
|
||||||
|
|
||||||
|
// 复合题:如果没有显式子题,尝试从文本模式识别子题
|
||||||
|
// (如 "1.xxx", "2.xxx", "(1)xxx", "①xxx" 等)
|
||||||
|
if (type === "composite" && subQuestions.length === 0 && content.text) {
|
||||||
|
const detected = detectSubQuestionsFromText(content.text)
|
||||||
|
if (detected.length > 0) {
|
||||||
|
content.subQuestions = detected
|
||||||
|
// 移除被识别为子题的文本,保留选段/材料部分
|
||||||
|
const materialText = extractMaterialText(content.text, detected)
|
||||||
|
content.text = materialText
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return { id, type, score, content }
|
return { id, type, score, content }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 从文本中检测子题(如 "1.xxx", "2.xxx", "(1)xxx", "①xxx" 等)
|
||||||
|
* 返回检测到的子题列表(不含原文中的材料部分)
|
||||||
|
*
|
||||||
|
* 检测策略:
|
||||||
|
* 1. 优先识别带编号的行(1.xxx, (1)xxx, ①xxx 等)
|
||||||
|
* 2. 如果检测到编号子题,且其前一行带分值(如"xxx(3分)"),
|
||||||
|
* 则把前一行也作为子题1
|
||||||
|
*/
|
||||||
|
const detectSubQuestionsFromText = (
|
||||||
|
text: string
|
||||||
|
): Array<{ id: string; text: string; score?: number }> => {
|
||||||
|
const lines = text.split("\n").map((l) => l.trim()).filter(Boolean)
|
||||||
|
const subQuestionPattern = /^(?:\(?(\d+)\)?|①|②|③|④|⑤|⑥|⑦|⑧|⑨|⑩)[.、))]?\s*(.+)/
|
||||||
|
const scorePattern = /[((](\d+)\s*分[))]/
|
||||||
|
const subs: Array<{ id: string; text: string; score?: number }> = []
|
||||||
|
|
||||||
|
// 先找所有带编号的子题
|
||||||
|
const numberedIndices: number[] = []
|
||||||
|
for (let i = 0; i < lines.length; i++) {
|
||||||
|
if (lines[i].match(subQuestionPattern)) {
|
||||||
|
numberedIndices.push(i)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (numberedIndices.length === 0) return []
|
||||||
|
|
||||||
|
// 如果第一个编号子题前面有带分值的行,把它们也作为子题
|
||||||
|
const firstNumberedIdx = numberedIndices[0]
|
||||||
|
if (firstNumberedIdx > 0) {
|
||||||
|
// 检查前一行是否带分值(可能是未编号的子题1)
|
||||||
|
for (let i = firstNumberedIdx - 1; i >= 0; i--) {
|
||||||
|
const line = lines[i]
|
||||||
|
const scoreMatch = line.match(scorePattern)
|
||||||
|
if (scoreMatch && line.length < 100) {
|
||||||
|
// 短行 + 带分值 = 可能是子题
|
||||||
|
subs.unshift({
|
||||||
|
id: createId(),
|
||||||
|
text: line,
|
||||||
|
score: Number(scoreMatch[1]),
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 处理带编号的子题
|
||||||
|
let currentSub: { id: string; text: string } | null = null
|
||||||
|
for (let i = firstNumberedIdx; i < lines.length; i++) {
|
||||||
|
const line = lines[i]
|
||||||
|
const match = line.match(subQuestionPattern)
|
||||||
|
if (match) {
|
||||||
|
if (currentSub) subs.push(currentSub)
|
||||||
|
const content = match[2] || ""
|
||||||
|
const scoreMatch = content.match(scorePattern)
|
||||||
|
const score = scoreMatch ? Number(scoreMatch[1]) : undefined
|
||||||
|
currentSub = { id: createId(), text: content }
|
||||||
|
if (score !== undefined) {
|
||||||
|
subs.push({ ...currentSub, score })
|
||||||
|
currentSub = null
|
||||||
|
}
|
||||||
|
} else if (currentSub) {
|
||||||
|
currentSub.text += "\n" + line
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (currentSub) subs.push(currentSub)
|
||||||
|
|
||||||
|
return subs
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 从原文中提取材料文本(移除被识别为子题的部分)
|
||||||
|
*/
|
||||||
|
const extractMaterialText = (
|
||||||
|
fullText: string,
|
||||||
|
subs: Array<{ id: string; text: string }>
|
||||||
|
): string => {
|
||||||
|
let material = fullText
|
||||||
|
for (const sub of subs) {
|
||||||
|
// 移除子题文本(取第一行作为匹配依据)
|
||||||
|
const firstLine = sub.text.split("\n")[0]
|
||||||
|
if (firstLine) {
|
||||||
|
material = material.replace(firstLine, "")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// 清理多余的空行
|
||||||
|
return material.replace(/\n{3,}/g, "\n\n").trim()
|
||||||
|
}
|
||||||
|
|
||||||
/** 统计结构节点的题目数和总分(递归) */
|
/** 统计结构节点的题目数和总分(递归) */
|
||||||
const computeStats = (node: EditorStructureNode): { count: number; score: number } => {
|
const computeStats = (node: EditorStructureNode): { count: number; score: number } => {
|
||||||
if (node.type === "question") {
|
if (node.type === "question") {
|
||||||
|
|||||||
Reference in New Issue
Block a user