Finishddd

This commit is contained in:
SpecialX
2025-06-24 19:05:13 +08:00
parent 0ee411bf50
commit f9ff57ff72
15 changed files with 385 additions and 1173 deletions

View File

@@ -1,18 +1,24 @@
using Entities.DTO;
using System;
using System.Collections.Generic;
using System.Linq;
using Entities.Contracts; // 假设这些实体合约仍然是必需的
using System.Text.RegularExpressions;
using System.Text;
namespace TechHelper.Client.Exam
{
public enum ParseErrorType
{
Validation = 1,
DataParsing = 2,
Structural = 3,
RegexMatchIssue = 4,
UnexpectedError = 5
}
public class ParseError
{
public ParseErrorType Type { get; }
public string Message { get; }
public int? Index { get; }
public string MatchedText { get; }
public int? Index { get; }
public string MatchedText { get; }
public Exception InnerException { get; }
public ParseError(ParseErrorType type, string message, int? index = null, string matchedText = null, Exception innerException = null)
@@ -26,7 +32,7 @@ namespace TechHelper.Client.Exam
public override string ToString()
{
var sb = new System.Text.StringBuilder();
var sb = new StringBuilder();
sb.Append($"[{Type}] {Message}");
if (Index.HasValue) sb.Append($" (Index: {Index.Value})");
if (!string.IsNullOrEmpty(MatchedText)) sb.Append($" (MatchedText: '{MatchedText}')");
@@ -35,47 +41,32 @@ namespace TechHelper.Client.Exam
}
}
public enum ParseErrorType
public class AssignmentEx
{
Validation = 1, // 输入验证失败
DataParsing = 2, // 数据解析失败(如数字转换)
Structural = 3, // 结构性问题(如选项没有对应的问题)
RegexMatchIssue = 4, // 正则表达式匹配结果不符合预期
UnexpectedError = 5 // 未预料到的通用错误
}
public class ExamPaper
{
public string AssignmentTitle { get; set; } = "未识别试卷标题";
public string Description { get; set; } = "未识别试卷描述";
public string SubjectArea { get; set; } = "试卷类别";
public List<MajorQuestionGroup> QuestionGroups { get; set; } = new List<MajorQuestionGroup>();
public List<PaperQuestion> TopLevelQuestions { get; set; } = new List<PaperQuestion>();
public string Title { get; set; } = "Title";
public string Description { get; set; } = "Description";
public SubjectAreaEnum SubjectArea { get; set; } = SubjectAreaEnum.Unknown;
public AssignmentQuestionEx ExamStruct { get; set; } = new AssignmentQuestionEx();
public List<ParseError> Errors { get; set; } = new List<ParseError>();
}
public class MajorQuestionGroup
public class AssignmentQuestionEx
{
public string Title { get; set; } = string.Empty;
public string Descript { get; set; } = string.Empty;
public string Description { get; set; } = string.Empty;
public byte Index { get; set; } = 0;
public float Score { get; set; }
public List<MajorQuestionGroup> SubQuestionGroups { get; set; } = new List<MajorQuestionGroup>();
public List<PaperQuestion> SubQuestions { get; set; } = new List<PaperQuestion>();
public QuestionEx? Question { get; set; }
public AssignmentStructType Type { get; set; }
public List<AssignmentQuestionEx> ChildrenAssignmentQuestion { get; set; } = new List<AssignmentQuestionEx>();
public int Priority { get; set; }
public bool bGroup { get; set; } = true;
}
public class PaperQuestion
public class QuestionEx
{
public string Number { get; set; } = string.Empty;
public string Stem { get; set; } = string.Empty;
public float Score { get; set; }
public string Title { get; set; } = string.Empty;
public string Answer { get; set; } = string.Empty;
public List<Option> Options { get; set; } = new List<Option>();
public List<PaperQuestion> SubQuestions { get; set; } = new List<PaperQuestion>();
public string SampleAnswer { get; set; } = string.Empty;
public string QuestionType { get; set; } = string.Empty;
public int Priority { get; set; }
}
public class Option
@@ -90,155 +81,116 @@ namespace TechHelper.Client.Exam
/// </summary>
public class RegexPatternConfig
{
public string Pattern { get; set; } // 正则表达式字符串
public int Priority { get; set; } // 优先级,数字越小优先级越高
public Regex Regex { get; private set; } // 编译后的Regex对象用于性能优化
public string Pattern { get; set; }
public int Priority { get; set; }
public AssignmentStructType Type { get; set; }
public Regex Regex { get; private set; }
public RegexPatternConfig(string pattern, int priority)
public RegexPatternConfig(string pattern, int priority, AssignmentStructType type = AssignmentStructType.Question)
{
Pattern = pattern;
Priority = priority;
Regex = new Regex(pattern, RegexOptions.Multiline | RegexOptions.Compiled); // 多行模式,编译以提高性能
Type = type;
Regex = new Regex(pattern, RegexOptions.Multiline | RegexOptions.Compiled);
}
}
public enum ExamParserEnum
{
MajorQuestionGroupPatterns = 0,
QuestionPatterns,
OptionPatterns
}
/// <summary>
/// 试卷解析的配置类,包含所有正则表达式
/// </summary>
public class ExamParserConfig
{
public List<RegexPatternConfig> MajorQuestionGroupPatterns { get; set; } = new List<RegexPatternConfig>();
public List<RegexPatternConfig> QuestionPatterns { get; set; } = new List<RegexPatternConfig>();
public List<RegexPatternConfig> OptionPatterns { get; set; } = new List<RegexPatternConfig>();
public Regex ScoreRegex { get; private set; } // 独立的得分正则表达式
public ExamParserConfig()
{
MajorQuestionGroupPatterns.Add(new RegexPatternConfig(@"^([一二三四五六七八九十]+)[、\.]\s*(.+?)(?:\s*\(((\d+(?:\.\d+)?))\s*分\))?\s*$", 1));
QuestionPatterns.Add(new RegexPatternConfig(@"^\(([一二三四五六七八九十]{1,2}|十[一二三四五六七八九])\)\s*(.+?)(?:\s*\(((\d+(?:\.\d+)?))\s*分\))?\s*$", 1));
// 题目/题组模式:只匹配行开头,并按优先级区分
// Group 1: 编号部分
// Group 2: 题目/题组标题内容
// 例如:一. 这是大题一
QuestionPatterns.Add(new RegexPatternConfig(@"^([一二三四五六七八九十]+)[.\、]\s*(.+)", 1, AssignmentStructType.Struct));
// 例如:(一) 这是第一子题组
QuestionPatterns.Add(new RegexPatternConfig(@"^\(([一二三四五六七八九十]{1,2}|十[一二三四五六七八九])\)\s*(.+)", 2, AssignmentStructType.Composite));
// 例如1. 这是第一道题目 或 1 这是第一道题目
QuestionPatterns.Add(new RegexPatternConfig(@"^(\d+)\.?\s*(.+)", 3, AssignmentStructType.Question));
// 例如:(1). 这是小问一 或 (1) 这是小问一
QuestionPatterns.Add(new RegexPatternConfig(@"^\((\d+)\)\.?\s*(.+)", 4, AssignmentStructType.Question));
// 例如:① 这是另一种小问 或 ①. 这是另一种小问 (如果 ① 后面会跟点,这个更通用)
// 如果 ① 后面通常没有点,但您希望它也能匹配,则保留原样或根据实际情况调整
QuestionPatterns.Add(new RegexPatternConfig(@"^[①②③④⑤⑥⑦⑧⑨⑩]+\.?\s*(.+)", 5, AssignmentStructType.Question));
// 模式 1: "1. 这是一个题目 (5分)" 或 "1. 这是一个题目"
QuestionPatterns.Add(new RegexPatternConfig(@"^(\d+)\.\s*(.+?)(?:\s*\(((\d+(?:\.\d+)?))\s*分\))?\s*$", 2));
// 模式 2: "(1) 这是一个子题目 (3分)" 或 "(1) 这是一个子题目"
QuestionPatterns.Add(new RegexPatternConfig(@"^\((\d+)\)\s*(.+?)(?:\s*\(((\d+(?:\.\d+)?))\s*分\))?\s*$", 3));
// 模式 3: "① 这是一个更深层次的子题目 (2分)" 或 "① 这是一个更深层次的子题目"
QuestionPatterns.Add(new RegexPatternConfig(@"^[①②③④⑤⑥⑦⑧⑨⑩]+\s*(.+?)(?:\s*\(((\d+(?:\.\d+)?))\s*分\))?\s*$", 4));
OptionPatterns.Add(new RegexPatternConfig(@"([A-Z]\.)\s*(.*?)(?=[A-Z]\.|$)", 1)); // 大写字母选项
OptionPatterns.Add(new RegexPatternConfig(@"([a-z]\.)\s*(.*?)(?=[a-z]\.|$)", 1)); // 小写字母选项
// 选项模式 (保持不变,使用 AssignmentStructType.Option 区分)
OptionPatterns.Add(new RegexPatternConfig(@"([A-Z]\.)\s*(.*?)(?=[A-Z]\.|$)", 1, AssignmentStructType.Option));
OptionPatterns.Add(new RegexPatternConfig(@"([a-z]\.)\s*(.*?)(?=[a-z]\.|$)", 2, AssignmentStructType.Option));
// 独立的得分正则表达式:匹配行末尾的 "(X分)" 格式
// Group 1: 捕获分数(如 "10" 或 "0.5"
ScoreRegex = new Regex(@"(?:\s*\(((\d+(?:\.\d+)?))\s*分\)\s*$)", RegexOptions.Multiline | RegexOptions.Compiled);
}
}
public class PotentialMatch
{
public int StartIndex { get; set; }
public int EndIndex { get; set; } // 匹配到的结构在原始文本中的结束位置
public string MatchedText { get; set; } // 匹配到的完整行或段落
public Match RegexMatch { get; set; } // 原始的Regex.Match对象方便获取捕获组
public RegexPatternConfig PatternConfig { get; set; } // 匹配到的模式配置
public MatchType Type { get; set; } // 枚举MajorQuestionGroup, Question, Option, etc.
public int EndIndex { get; set; }
public string MatchedText { get; set; }
public Match RegexMatch { get; set; }
public RegexPatternConfig PatternConfig { get; set; }
}
public enum MatchType
{
MajorQuestionGroup,
Question,
Option,
Other // 如果有其他需要识别的类型
}
/// <summary>
/// 负责扫描原始文本,收集所有潜在的匹配项(题组、题目、选项)。
/// 它只进行匹配,不进行结构化归属。
/// </summary>
public class ExamDocumentScanner
{
private readonly ExamParserConfig _config;
public ExamDocumentScanner(ExamParserConfig config)
{
_config = config ?? throw new ArgumentNullException(nameof(config)); // 确保配置不为空
_config = config ?? throw new ArgumentNullException(nameof(config));
}
/// <summary>
/// 扫描给定的文本,返回所有潜在的匹配项,并按起始位置排序。
/// </summary>
/// <param name="text">要扫描的文本</param>
/// <returns>所有匹配到的 PotentialMatch 列表</returns>
public List<PotentialMatch> Scan(string text)
public List<PotentialMatch> Scan(string text, List<ParseError> errors)
{
if (string.IsNullOrEmpty(text))
{
return new List<PotentialMatch>(); // 对于空文本,直接返回空列表
return new List<PotentialMatch>();
}
var allPotentialMatches = new List<PotentialMatch>();
var allPatternConfigs = new List<RegexPatternConfig>();
allPatternConfigs.AddRange(_config.QuestionPatterns);
allPatternConfigs.AddRange(_config.OptionPatterns);
// 扫描所有题组模式
foreach (var patternConfig in _config.MajorQuestionGroupPatterns)
foreach (var patternConfig in allPatternConfigs)
{
foreach (Match match in patternConfig.Regex.Matches(text))
try
{
allPotentialMatches.Add(new PotentialMatch
foreach (Match match in patternConfig.Regex.Matches(text))
{
StartIndex = match.Index,
EndIndex = match.Index + match.Length,
MatchedText = match.Value,
RegexMatch = match,
PatternConfig = patternConfig,
Type = MatchType.MajorQuestionGroup
});
allPotentialMatches.Add(new PotentialMatch
{
StartIndex = match.Index,
EndIndex = match.Index + match.Length,
MatchedText = match.Value,
RegexMatch = match,
PatternConfig = patternConfig,
});
}
}
catch (Exception ex)
{
errors.Add(new ParseError(ParseErrorType.UnexpectedError,
$"An error occurred during regex matching for pattern: '{patternConfig.Pattern}'.",
innerException: ex));
}
}
// 扫描所有题目模式
foreach (var patternConfig in _config.QuestionPatterns)
{
foreach (Match match in patternConfig.Regex.Matches(text))
{
allPotentialMatches.Add(new PotentialMatch
{
StartIndex = match.Index,
EndIndex = match.Index + match.Length,
MatchedText = match.Value,
RegexMatch = match,
PatternConfig = patternConfig,
Type = MatchType.Question
});
}
}
// 扫描所有选项模式
foreach (var patternConfig in _config.OptionPatterns)
{
foreach (Match match in patternConfig.Regex.Matches(text))
{
allPotentialMatches.Add(new PotentialMatch
{
StartIndex = match.Index,
EndIndex = match.Index + match.Length,
MatchedText = match.Value,
RegexMatch = match,
PatternConfig = patternConfig,
Type = MatchType.Option
});
}
}
// 统一按起始位置排序
return allPotentialMatches.OrderBy(pm => pm.StartIndex).ToList();
}
}
@@ -252,35 +204,8 @@ namespace TechHelper.Client.Exam
_config = config ?? throw new ArgumentNullException(nameof(config), "ExamParserConfig cannot be null.");
}
///
/// 一.基础
/// 1.听写
/// 2.阅读
/// 二.提升
/// 1.阅读
/// (1).选择
/// (2).填空
/// 三.写
/// (一)课文
///
///
///
/// <summary>
/// Builds the ExamPaper structure from raw text and potential matches.
/// Collects and returns parsing errors encountered during the process.
/// </summary>
/// <param name="fullExamText">The complete text of the exam paper.</param>
/// <param name="allPotentialMatches">A list of all identified potential matches.</param>
/// <returns>An ExamPaper object containing the parsed structure and a list of errors.</returns>
/// <exception cref="ArgumentException">Thrown if fullExamText is null or empty.</exception>
/// <exception cref="ArgumentNullException">Thrown if allPotentialMatches is null.</exception>
public ExamPaper BuildExam(string fullExamText, List<PotentialMatch> allPotentialMatches)
public AssignmentEx BuildExam(string fullExamText, List<PotentialMatch> allPotentialMatches)
{
// 核心输入验证仍然是必要的,因为这些错误是无法恢复的
if (string.IsNullOrWhiteSpace(fullExamText))
{
throw new ArgumentException("Full exam text cannot be null or empty.", nameof(fullExamText));
@@ -290,270 +215,79 @@ namespace TechHelper.Client.Exam
throw new ArgumentNullException(nameof(allPotentialMatches), "Potential matches list cannot be null.");
}
var examPaper = new ExamPaper(); // ExamPaper 现在包含一个 Errors 列表
// 尝试获取试卷标题
var assignment = new AssignmentEx();
try
{
examPaper.AssignmentTitle = GetExamTitle(fullExamText);
assignment.Title = GetExamTitle(fullExamText);
}
catch (Exception ex)
{
// 如果获取标题失败,记录错误而不是抛出致命异常
examPaper.Errors.Add(new ParseError(ParseErrorType.UnexpectedError, "Failed to extract exam title.", innerException: ex));
examPaper.AssignmentTitle = "未识别试卷标题"; // 提供默认值
assignment.Errors.Add(new ParseError(ParseErrorType.UnexpectedError, "Failed to extract exam title.", innerException: ex));
assignment.Title = "未识别试卷标题";
}
var majorQGStack = new Stack<MajorQuestionGroup>();
MajorQuestionGroup currentMajorQG = null;
var questionStack = new Stack<PaperQuestion>();
PaperQuestion currentQuestion = null;
var assignmentQuestionStack = new Stack<AssignmentQuestionEx>();
var rootAssignmentQuestion = new AssignmentQuestionEx { Type = AssignmentStructType.Struct, Priority = 0, Title = "Root Exam Structure" };
assignmentQuestionStack.Push(rootAssignmentQuestion);
assignment.ExamStruct = rootAssignmentQuestion;
int currentContentStart = 0;
// 处理试卷开头的描述性文本
if (allPotentialMatches.Any() && allPotentialMatches[0].StartIndex > 0)
{
string introText = fullExamText.Substring(0, allPotentialMatches[0].StartIndex).Trim();
if (!string.IsNullOrWhiteSpace(introText))
{
examPaper.Description += (string.IsNullOrWhiteSpace(examPaper.Description) ? "" : "\n") + introText;
assignment.Description += (string.IsNullOrWhiteSpace(assignment.Description) ? "" : "\n") + introText;
}
}
currentContentStart = allPotentialMatches[0].StartIndex;
currentContentStart = allPotentialMatches.Any() ? allPotentialMatches[0].StartIndex : 0;
for (int i = 0; i < allPotentialMatches.Count; i++)
{
var pm = allPotentialMatches[i];
try
{
// **数据验证:不再抛出,而是记录错误**
if (pm.StartIndex < currentContentStart || pm.EndIndex > fullExamText.Length || pm.StartIndex > pm.EndIndex)
if (!IsValidPotentialMatch(pm, i, fullExamText.Length, currentContentStart, assignment.Errors))
{
examPaper.Errors.Add(new ParseError(ParseErrorType.Validation,
$"PotentialMatch at index {i} has invalid start/end indices. Start: {pm.StartIndex}, End: {pm.EndIndex}, CurrentContentStart: {currentContentStart}, FullTextLength: {fullExamText.Length}",
index: i, matchedText: pm.MatchedText));
currentContentStart = Math.Max(currentContentStart, pm.EndIndex); // 尝试跳过这个损坏的匹配项
continue; // 跳过当前循环迭代,处理下一个匹配项
}
if (pm.RegexMatch == null || pm.PatternConfig == null)
{
examPaper.Errors.Add(new ParseError(ParseErrorType.Validation,
$"PotentialMatch at index {i} is missing RegexMatch or PatternConfig.",
index: i, matchedText: pm.MatchedText));
currentContentStart = Math.Max(currentContentStart, pm.EndIndex); // 尝试跳过这个损坏的匹配项
continue; // 跳过当前循环迭代,处理下一个匹配项
currentContentStart = Math.Max(currentContentStart, pm.EndIndex);
continue;
}
string precedingText = fullExamText.Substring(currentContentStart, pm.StartIndex - currentContentStart).Trim();
if (!string.IsNullOrWhiteSpace(precedingText))
{
if (currentQuestion != null)
if (assignmentQuestionStack.Peek().Question != null)
{
// 将 examPaper.Errors 传递给 ProcessQuestionContent 收集错误
ProcessQuestionContent(currentQuestion, precedingText,
GetSubMatchesForRange(allPotentialMatches, currentContentStart, pm.StartIndex, examPaper.Errors),
examPaper.Errors);
}
else if (currentMajorQG != null)
{
currentMajorQG.Descript += (string.IsNullOrWhiteSpace(currentMajorQG.Descript) ? "" : "\n") + precedingText;
ProcessQuestionContent(assignmentQuestionStack.Peek(), precedingText, assignment.Errors);
}
else
{
examPaper.Description += (string.IsNullOrWhiteSpace(examPaper.Description) ? "" : "\n") + precedingText;
assignment.Description += (string.IsNullOrWhiteSpace(assignment.Description) ? "" : "\n") + precedingText;
}
}
if (pm.Type == MatchType.MajorQuestionGroup)
if (pm.PatternConfig.Type == AssignmentStructType.Option)
{
// 对 MajorQuestionGroup 的处理
try
{
while (majorQGStack.Any() && pm.PatternConfig.Priority <= majorQGStack.Peek().Priority)
{
majorQGStack.Pop();
}
// RegexMatch Groups 验证:不再抛出,记录错误
if (pm.RegexMatch.Groups.Count < 2 || string.IsNullOrWhiteSpace(pm.RegexMatch.Groups[1].Value))
{
examPaper.Errors.Add(new ParseError(ParseErrorType.RegexMatchIssue,
$"MajorQuestionGroup match at index {i} does not have enough regex groups or a valid title group (Group 1). Skipping this group.",
index: i, matchedText: pm.MatchedText));
currentContentStart = pm.EndIndex; // 继续,尝试跳过此项
continue;
}
float score = 0;
// 使用 float.TryParse 避免异常
if (pm.RegexMatch.Groups.Count > 3 && pm.RegexMatch.Groups[4].Success) // 假设纯数字分数是 Group 4
{
if (!float.TryParse(pm.RegexMatch.Groups[4].Value, out score))
{
examPaper.Errors.Add(new ParseError(ParseErrorType.DataParsing,
$"Failed to parse score '{pm.RegexMatch.Groups[4].Value}' for MajorQuestionGroup at index {i}. Defaulting to 0.",
index: i, matchedText: pm.MatchedText));
}
}
MajorQuestionGroup newMajorQG = new MajorQuestionGroup
{
Title = pm.RegexMatch.Groups[2].Value.Trim(), // 标题是 Group 2
Score = score,
Priority = pm.PatternConfig.Priority,
bGroup = true
};
if (majorQGStack.Any())
{
majorQGStack.Peek().SubQuestionGroups.Add(newMajorQG);
}
else
{
examPaper.QuestionGroups.Add(newMajorQG);
}
currentContentStart = pm.EndIndex;
majorQGStack.Push(newMajorQG);
currentMajorQG = newMajorQG;
questionStack.Clear();
currentQuestion = null;
}
catch (Exception ex)
{
examPaper.Errors.Add(new ParseError(ParseErrorType.UnexpectedError,
$"An unexpected error occurred during processing MajorQuestionGroup at index {i}.",
index: i, matchedText: pm.MatchedText, innerException: ex));
currentContentStart = pm.EndIndex; // 尝试跳过此项
continue;
}
HandleOptionMatch(pm, i, assignmentQuestionStack.Peek(), assignment.Errors);
}
else if (pm.Type == MatchType.Question)
else
{
// 对 Question 的处理
try
{
while (questionStack.Any() && pm.PatternConfig.Priority <= questionStack.Peek().Priority)
{
questionStack.Pop();
}
// RegexMatch Groups 验证
if (pm.RegexMatch.Groups.Count < 3 || string.IsNullOrWhiteSpace(pm.RegexMatch.Groups[1].Value) || string.IsNullOrWhiteSpace(pm.RegexMatch.Groups[2].Value))
{
examPaper.Errors.Add(new ParseError(ParseErrorType.RegexMatchIssue,
$"Question match at index {i} does not have enough regex groups or valid number/text groups (Group 1/2). Skipping this question.",
index: i, matchedText: pm.MatchedText));
currentContentStart = pm.EndIndex; // 尝试跳过此项
continue;
}
float score = 0;
// 使用 float.TryParse 避免异常
if (pm.RegexMatch.Groups.Count > 4 && pm.RegexMatch.Groups[4].Success) // 假设纯数字分数是 Group 4
{
if (!float.TryParse(pm.RegexMatch.Groups[4].Value, out score))
{
examPaper.Errors.Add(new ParseError(ParseErrorType.DataParsing,
$"Failed to parse score '{pm.RegexMatch.Groups[4].Value}' for Question at index {i}. Defaulting to 0.",
index: i, matchedText: pm.MatchedText));
}
}
PaperQuestion newQuestion = new PaperQuestion
{
Number = pm.RegexMatch.Groups[1].Value.Trim(),
Stem = pm.RegexMatch.Groups[2].Value.Trim(),
Priority = pm.PatternConfig.Priority,
Score = score // 赋值解析到的分数
};
if (questionStack.Any())
{
questionStack.Peek().SubQuestions.Add(newQuestion);
}
else if (currentMajorQG != null)
{
currentMajorQG.SubQuestions.Add(newQuestion);
}
else
{
examPaper.TopLevelQuestions.Add(newQuestion);
}
currentContentStart = pm.EndIndex;
questionStack.Push(newQuestion);
currentQuestion = newQuestion;
}
catch (Exception ex)
{
examPaper.Errors.Add(new ParseError(ParseErrorType.UnexpectedError,
$"An unexpected error occurred during processing Question at index {i}.",
index: i, matchedText: pm.MatchedText, innerException: ex));
currentContentStart = pm.EndIndex; // 尝试跳过此项
continue;
}
}
else if (pm.Type == MatchType.Option)
{
// 对 Option 的处理
try
{
if (currentQuestion != null)
{
// RegexMatch Groups 验证
if (pm.RegexMatch.Groups.Count < 3 || string.IsNullOrWhiteSpace(pm.RegexMatch.Groups[1].Value) || string.IsNullOrWhiteSpace(pm.RegexMatch.Groups[2].Value))
{
examPaper.Errors.Add(new ParseError(ParseErrorType.RegexMatchIssue,
$"Option match at index {i} does not have enough regex groups or valid label/text groups (Group 1/2). Skipping this option.",
index: i, matchedText: pm.MatchedText));
currentContentStart = pm.EndIndex; // 尝试跳过此项
continue;
}
Option newOption = new Option
{
Label = pm.RegexMatch.Groups[1].Value.Trim(),
Text = pm.RegexMatch.Groups[2].Value.Trim()
};
currentQuestion.Options.Add(newOption);
}
else
{
// 结构性问题:找到孤立的选项,记录错误但继续
examPaper.Errors.Add(new ParseError(ParseErrorType.Structural,
$"Found isolated Option at index {i}. Options must belong to a question. Ignoring this option.",
index: i, matchedText: pm.MatchedText));
}
}
catch (Exception ex)
{
examPaper.Errors.Add(new ParseError(ParseErrorType.UnexpectedError,
$"An unexpected error occurred during processing Option at index {i}.",
index: i, matchedText: pm.MatchedText, innerException: ex));
// 这里不需要 `continue`,因为即使出错也可能只是该选项的问题,不影响后续处理
}
HandleQuestionGroupMatch(pm, i, assignmentQuestionStack, assignment.Errors);
}
currentContentStart = pm.EndIndex; // 更新当前内容起点
currentContentStart = pm.EndIndex;
}
catch (Exception ex)
{
// 捕获任何在处理单个 PotentialMatch 过程中未被更具体 catch 块捕获的意外错误
examPaper.Errors.Add(new ParseError(ParseErrorType.UnexpectedError,
assignment.Errors.Add(new ParseError(ParseErrorType.UnexpectedError,
$"An unexpected error occurred during main loop processing of PotentialMatch at index {i}.",
index: i, matchedText: pm.MatchedText, innerException: ex));
currentContentStart = Math.Max(currentContentStart, pm.EndIndex); // 尝试跳过当前匹配项,继续下一项
// 这里不 `continue` 是因为外层循环会推进 `i`,但确保 `currentContentStart` 更新以避免无限循环
currentContentStart = Math.Max(currentContentStart, pm.EndIndex);
}
}
// --- 处理所有匹配项之后的剩余内容 ---
if (currentContentStart < fullExamText.Length)
{
try
@@ -561,171 +295,177 @@ namespace TechHelper.Client.Exam
string remainingText = fullExamText.Substring(currentContentStart).Trim();
if (!string.IsNullOrWhiteSpace(remainingText))
{
if (currentQuestion != null)
if (assignmentQuestionStack.Peek().Question != null)
{
ProcessQuestionContent(currentQuestion, remainingText,
GetSubMatchesForRange(allPotentialMatches, currentContentStart, fullExamText.Length, examPaper.Errors),
examPaper.Errors);
}
else if (currentMajorQG != null)
{
currentMajorQG.Descript += (string.IsNullOrWhiteSpace(currentMajorQG.Descript) ? "" : "\n") + remainingText;
ProcessQuestionContent(assignmentQuestionStack.Peek(), remainingText, assignment.Errors);
}
else
{
examPaper.Description += (string.IsNullOrWhiteSpace(examPaper.Description) ? "" : "\n") + remainingText;
assignment.Description += (string.IsNullOrWhiteSpace(assignment.Description) ? "" : "\n") + remainingText;
}
}
}
catch (Exception ex)
{
examPaper.Errors.Add(new ParseError(ParseErrorType.UnexpectedError,
assignment.Errors.Add(new ParseError(ParseErrorType.UnexpectedError,
"An unexpected error occurred while processing remaining text after all potential matches.",
innerException: ex));
}
}
return examPaper;
return assignment;
}
private bool IsValidPotentialMatch(PotentialMatch pm, int index, int fullTextLength, int currentContentStart, List<ParseError> errors)
{
if (pm.StartIndex < currentContentStart || pm.EndIndex > fullTextLength || pm.StartIndex > pm.EndIndex)
{
errors.Add(new ParseError(ParseErrorType.Validation,
$"PotentialMatch at index {index} has invalid start/end indices. Start: {pm.StartIndex}, End: {pm.EndIndex}, CurrentContentStart: {currentContentStart}, FullTextLength: {fullTextLength}",
index: index, matchedText: pm.MatchedText));
return false;
}
if (pm.RegexMatch == null || pm.PatternConfig == null)
{
errors.Add(new ParseError(ParseErrorType.Validation,
$"PotentialMatch at index {index} is missing RegexMatch or PatternConfig.",
index: index, matchedText: pm.MatchedText));
return false;
}
return true;
}
private void HandleQuestionGroupMatch(PotentialMatch pm, int index, Stack<AssignmentQuestionEx> assignmentQuestionStack, List<ParseError> errors)
{
try
{
while (assignmentQuestionStack.Count > 1 && pm.PatternConfig.Priority <= assignmentQuestionStack.Peek().Priority)
{
assignmentQuestionStack.Pop();
}
// 验证捕获组Group 1 是编号Group 2 是题目内容
if (pm.RegexMatch.Groups.Count < 3 || !pm.RegexMatch.Groups[1].Success || string.IsNullOrWhiteSpace(pm.RegexMatch.Groups[2].Value))
{
errors.Add(new ParseError(ParseErrorType.RegexMatchIssue,
$"Question/Group match at index {index} does not have enough regex groups (expected 3 for number and title) or a valid title group (Group 2). Skipping this group.",
index: index, matchedText: pm.MatchedText));
return;
}
float score = 0;
// 尝试从 MatchedText 的末尾匹配分数
Match scoreMatch = _config.ScoreRegex.Match(pm.MatchedText);
if (scoreMatch.Success && scoreMatch.Groups.Count > 1 && scoreMatch.Groups[1].Success)
{
if (!float.TryParse(scoreMatch.Groups[1].Value, out score))
{
errors.Add(new ParseError(ParseErrorType.DataParsing,
$"Failed to parse score '{scoreMatch.Groups[1].Value}' for match at index {index}. Defaulting to 0.",
index: index, matchedText: pm.MatchedText));
}
// 从 MatchedText 中移除分数部分,使其只包含编号和标题
// 注意这里修改的是pm.MatchedText这不会影响原始文本只是当前匹配项的“内容”
pm.MatchedText = pm.MatchedText.Substring(0, scoreMatch.Index).Trim();
}
// 提取标题,这里使用 Group 2 的值,它不包含分数
string title = pm.RegexMatch.Groups[2].Value.Trim();
AssignmentQuestionEx newAssignmentQuestion;
if (pm.PatternConfig.Type == AssignmentStructType.Struct)
{
newAssignmentQuestion = new AssignmentQuestionEx
{
Title = title,
Score = score,
Priority = pm.PatternConfig.Priority,
Type = pm.PatternConfig.Type
};
}
else // AssignmentStructType.Question 类型
{
newAssignmentQuestion = new AssignmentQuestionEx
{
Priority = pm.PatternConfig.Priority,
Type = pm.PatternConfig.Type,
Score = score,
Question = new QuestionEx
{
Title = title,
}
};
}
assignmentQuestionStack.Peek().ChildrenAssignmentQuestion.Add(newAssignmentQuestion);
assignmentQuestionStack.Push(newAssignmentQuestion);
}
catch (Exception ex)
{
errors.Add(new ParseError(ParseErrorType.UnexpectedError,
$"An unexpected error occurred during processing a non-option match (type: {pm.PatternConfig.Type}) at index {index}.",
index: index, matchedText: pm.MatchedText, innerException: ex));
}
}
private void HandleOptionMatch(PotentialMatch pm, int index, AssignmentQuestionEx currentAssignmentQuestion, List<ParseError> errors)
{
try
{
if (currentAssignmentQuestion.Question == null)
{
errors.Add(new ParseError(ParseErrorType.Structural,
$"Found isolated Option at index {index}. Options must belong to a 'Question' type structure. Ignoring this option.",
index: index, matchedText: pm.MatchedText));
return;
}
if (pm.RegexMatch.Groups.Count < 3 || !pm.RegexMatch.Groups[1].Success || string.IsNullOrWhiteSpace(pm.RegexMatch.Groups[2].Value))
{
errors.Add(new ParseError(ParseErrorType.RegexMatchIssue,
$"Option match at index {index} does not have enough regex groups or valid label/text groups (Group 1/2). Skipping this option.",
index: index, matchedText: pm.MatchedText));
return;
}
Option newOption = new Option
{
Label = pm.RegexMatch.Groups[1].Value.Trim(),
Text = pm.RegexMatch.Groups[2].Value.Trim()
};
currentAssignmentQuestion.Question.Options.Add(newOption);
}
catch (Exception ex)
{
errors.Add(new ParseError(ParseErrorType.UnexpectedError,
$"An unexpected error occurred during processing Option at index {index}.",
index: index, matchedText: pm.MatchedText, innerException: ex));
}
}
private void ProcessQuestionContent(AssignmentQuestionEx question, string contentText, List<ParseError> errors)
{
if (question?.Question == null)
{
errors.Add(new ParseError(ParseErrorType.Structural,
$"Attempted to process content for a non-question type AssignmentQuestionEx (Type: {question?.Type}). Content: '{contentText}'",
matchedText: contentText));
return;
}
if (!string.IsNullOrWhiteSpace(contentText))
{
question.Question.Title += (string.IsNullOrWhiteSpace(question.Question.Title) ? "" : "\n") + contentText;
}
}
/// <summary>
/// Extracts the exam title (simple implementation).
/// Logs errors to the provided error list instead of throwing.
/// </summary>
private string GetExamTitle(string examPaperText)
{
// 内部不再直接抛出异常,而是让外部的 try-catch 负责
var firstLine = examPaperText.Split(new[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries)
.FirstOrDefault(line => !string.IsNullOrWhiteSpace(line));
return firstLine ?? "未识别试卷标题";
}
/// <summary>
/// Gets a subset of the given PotentialMatch list within a specified range.
/// Logs errors to the provided error list instead of throwing.
/// </summary>
private List<PotentialMatch> GetSubMatchesForRange(List<PotentialMatch> allMatches, int start, int end, List<ParseError> errors)
{
// 输入验证,如果输入错误,记录错误并返回空列表
if (start < 0 || end < start)
{
errors.Add(new ParseError(ParseErrorType.Validation,
$"Invalid range provided to GetSubMatchesForRange. Start: {start}, End: {end}.",
index: start)); // 使用 start 作为大概索引
return new List<PotentialMatch>();
}
// allMatches 为 null 的情况已经在 BuildExamPaper 顶部处理,这里为了方法的健壮性可以再加一次检查
if (allMatches == null)
{
return new List<PotentialMatch>();
}
try
{
return allMatches.Where(pm => pm.StartIndex >= start && pm.StartIndex < end).ToList();
}
catch (Exception ex)
{
errors.Add(new ParseError(ParseErrorType.UnexpectedError,
$"An unexpected error occurred getting sub-matches for range [{start}, {end}).",
innerException: ex));
return new List<PotentialMatch>(); // 出错时返回空列表
}
}
/// <summary>
/// Processes the content of a Question, mainly for parsing Options and identifying unstructured text.
/// Logs errors to the provided error list instead of throwing.
/// </summary>
private void ProcessQuestionContent(PaperQuestion question, string contentText, List<PotentialMatch> potentialMatchesInScope, List<ParseError> errors)
{
// 参数验证,这些是内部方法的契约,如果违反则直接抛出,因为这意味着调用者有错
if (question == null) throw new ArgumentNullException(nameof(question), "Question cannot be null in ProcessQuestionContent.");
if (contentText == null) throw new ArgumentNullException(nameof(contentText), "Content text cannot be null in ProcessQuestionContent.");
if (potentialMatchesInScope == null) throw new ArgumentNullException(nameof(potentialMatchesInScope), "Potential matches in scope cannot be null.");
try
{
int lastOptionEndIndex = 0;
foreach (var pm in potentialMatchesInScope.OrderBy(p => p.StartIndex))
{
// 对每个匹配项的内部处理,记录错误但继续
try
{
if (pm.Type == MatchType.Option)
{
// 验证索引,记录错误但继续
if (pm.StartIndex < lastOptionEndIndex || pm.StartIndex > contentText.Length || pm.EndIndex > contentText.Length)
{
errors.Add(new ParseError(ParseErrorType.Validation,
$"Option match at index {pm.StartIndex} has invalid indices within content text. MatchedText: '{pm.MatchedText}'. Skipping.",
index: pm.StartIndex, matchedText: pm.MatchedText));
continue; // 跳过当前选项
}
// 处理选项前的文本
if (pm.StartIndex > lastOptionEndIndex)
{
string textBeforeOption = contentText.Substring(lastOptionEndIndex, pm.StartIndex - lastOptionEndIndex).Trim();
if (!string.IsNullOrWhiteSpace(textBeforeOption))
{
question.Stem += (string.IsNullOrWhiteSpace(question.Stem) ? "" : "\n") + textBeforeOption;
}
}
// RegexMatch Groups 验证,记录错误但继续
if (pm.RegexMatch.Groups.Count < 3 || string.IsNullOrWhiteSpace(pm.RegexMatch.Groups[1].Value) || string.IsNullOrWhiteSpace(pm.RegexMatch.Groups[2].Value))
{
errors.Add(new ParseError(ParseErrorType.RegexMatchIssue,
$"Option regex match '{pm.MatchedText}' does not have enough groups (expected 3) for label and text. Skipping option.",
index: pm.StartIndex, matchedText: pm.MatchedText));
lastOptionEndIndex = pm.EndIndex; // 更新索引,避免卡死
continue; // 跳过当前选项
}
var newOption = new Option
{
Label = pm.RegexMatch.Groups[1].Value.Trim(),
Text = pm.RegexMatch.Groups[2].Value.Trim()
};
question.Options.Add(newOption);
lastOptionEndIndex = pm.EndIndex;
}
else
{
question.Stem += contentText;
}
}
catch (Exception innerEx)
{
errors.Add(new ParseError(ParseErrorType.UnexpectedError,
$"An unexpected error occurred during processing a potential match ({pm.Type}) within question content.",
index: pm.StartIndex, matchedText: pm.MatchedText, innerException: innerEx));
lastOptionEndIndex = pm.EndIndex; // 尝试更新索引,避免无限循环
continue; // 尝试继续下一个匹配项
}
}
// 处理所有选项之后的剩余文本
if (lastOptionEndIndex < contentText.Length)
{
string remainingContent = contentText.Substring(lastOptionEndIndex).Trim();
if (!string.IsNullOrWhiteSpace(remainingContent))
{
question.Stem += (string.IsNullOrWhiteSpace(question.Stem) ? "" : "\n") + remainingContent;
}
}
}
catch (Exception ex)
{
// 捕获 ProcessQuestionContent 整个方法内部的意外错误
errors.Add(new ParseError(ParseErrorType.UnexpectedError,
$"An unexpected error occurred while processing content for Question '{question.Number}'.",
innerException: ex));
}
}
}
public class ExamParser
@@ -742,20 +482,16 @@ namespace TechHelper.Client.Exam
}
/// <summary>
/// 解析给定的试卷文本,返回结构化的 ExamPaper 对象。
/// 解析给定的试卷文本,返回结构化的 AssignmentEx 对象。
/// </summary>
/// <param name="examPaperText">完整的试卷文本</param>
/// <returns>解析后的 ExamPaper 对象</returns>
public ExamPaper ParseExamPaper(string examPaperText)
/// <returns>解析后的 AssignmentEx 对象</returns>
public AssignmentEx ParseExamPaper(string examPaperText)
{
// 1. 扫描:一次性扫描整个文本,收集所有潜在的匹配项
// Scan 方法现在已经优化为不抛出 ArgumentNullException
List<PotentialMatch> allPotentialMatches = _scanner.Scan(examPaperText);
// 2. 构建:根据扫描结果和原始文本,线性遍历并构建层级结构
// BuildExamPaper 现在会返回一个包含错误列表的 ExamPaper 对象
// 外部不再需要捕获内部解析异常,只需检查 ExamPaper.Errors 列表
return _builder.BuildExam(examPaperText, allPotentialMatches);
var assignment = new AssignmentEx();
List<PotentialMatch> allPotentialMatches = _scanner.Scan(examPaperText, assignment.Errors);
assignment = _builder.BuildExam(examPaperText, allPotentialMatches);
return assignment;
}
}
}