exam_service

This commit is contained in:
SpecialX
2025-06-11 15:02:20 +08:00
parent 97843ab5fd
commit e26881ec2f
52 changed files with 3510 additions and 1174 deletions

View File

@@ -6,244 +6,244 @@ using System.IO; // 用于 XML 反序列化
namespace TechHelper.Client.Exam
{
[XmlRoot("EP")]
public class StringsList
{
//[XmlRoot("EP")]
//public class StringsList
//{
[XmlElement("Q")]
public List<string> Items { get; set; }
}
// [XmlElement("Q")]
// public List<string> Items { get; set; }
//}
// XML 根元素 <EP>
[XmlRoot("EP")]
public class ExamPaper
{
// XML 特性:<QGs> 包含 <QG> 列表
[XmlArray("QGs")]
[XmlArrayItem("QG")]
[JsonProperty("QuestionGroups")]
public List<QuestionGroup> QuestionGroups { get; set; } = new List<QuestionGroup>();
}
//// XML 根元素 <EP>
//[XmlRoot("EP")]
//public class ExamPaper
//{
// // XML 特性:<QGs> 包含 <QG> 列表
// [XmlArray("QGs")]
// [XmlArrayItem("QG")]
// [JsonProperty("QuestionGroups")]
// public List<QuestionGroup> QuestionGroups { get; set; } = new List<QuestionGroup>();
//}
[XmlRoot("QG")]
public class QuestionGroup
{
// JSON 特性
[JsonProperty("题号")]
// XML 特性:作为 <QG Id="X"> 属性
[XmlAttribute("Id")]
public byte Id { get; set; }
//[XmlRoot("QG")]
//public class QuestionGroup
//{
// // JSON 特性
// [JsonProperty("题号")]
// // XML 特性:作为 <QG Id="X"> 属性
// [XmlAttribute("Id")]
// public byte Id { get; set; }
[JsonProperty("标题")]
[XmlElement("T")] // T for Title
public string Title { get; set; }
// [JsonProperty("标题")]
// [XmlElement("T")] // T for Title
// public string Title { get; set; }
[JsonProperty("分值")]
[XmlAttribute("S")] // S for Score
public int Score { get; set; }
// [JsonProperty("分值")]
// [XmlAttribute("S")] // S for Score
// public int Score { get; set; }
[JsonProperty("题目引用")]
[XmlElement("QR")] // QR for QuestionReference作为 <QR> 元素
public string QuestionReference { get; set; } = ""; // 初始化为空字符串
// [JsonProperty("题目引用")]
// [XmlElement("QR")] // QR for QuestionReference作为 <QR> 元素
// public string QuestionReference { get; set; } = ""; // 初始化为空字符串
[JsonProperty("子题目")]
[XmlArray("SQs")] // SQs 包含 <SQ> 列表
[XmlArrayItem("SQ")]
public List<SubQuestion> SubQuestions { get; set; } = new List<SubQuestion>();
// [JsonProperty("子题目")]
// [XmlArray("SQs")] // SQs 包含 <SQ> 列表
// [XmlArrayItem("SQ")]
// public List<SubQuestion> SubQuestions { get; set; } = new List<SubQuestion>();
[JsonProperty("子题组")]
[XmlArray("SQGs")] // SQGs 包含 <QG> 列表 (嵌套题组)
[XmlArrayItem("QG")]
public List<QuestionGroup> SubQuestionGroups { get; set; } = new List<QuestionGroup>();
}
// [JsonProperty("子题组")]
// [XmlArray("SQGs")] // SQGs 包含 <QG> 列表 (嵌套题组)
// [XmlArrayItem("QG")]
// public List<QuestionGroup> SubQuestionGroups { get; set; } = new List<QuestionGroup>();
//}
// 子题目类
public class SubQuestion
{
//// 子题目类
//public class SubQuestion
//{
[JsonProperty("子题号")]
[XmlAttribute("Id")] // Id for SubId
public byte SubId { get; set; }
// [JsonProperty("子题号")]
// [XmlAttribute("Id")] // Id for SubId
// public byte SubId { get; set; }
[JsonProperty("题干")]
[XmlElement("T")] // T for Text (Stem)
public string Stem { get; set; }
// [JsonProperty("题干")]
// [XmlElement("T")] // T for Text (Stem)
// public string Stem { get; set; }
[JsonProperty("分值")]
[XmlAttribute("S")] // S for Score
public int Score { get; set; } // 分值通常为整数
// [JsonProperty("分值")]
// [XmlAttribute("S")] // S for Score
// public int Score { get; set; } // 分值通常为整数
[JsonProperty("选项")]
[XmlArray("Os")] // Os 包含 <O> 列表
[XmlArrayItem("O")]
public List<Option> Options { get; set; } = new List<Option>();
// [JsonProperty("选项")]
// [XmlArray("Os")] // Os 包含 <O> 列表
// [XmlArrayItem("O")]
// public List<Option> Options { get; set; } = new List<Option>();
[JsonProperty("示例答案")]
[XmlElement("SA")] // SA for SampleAnswer
public string SampleAnswer { get; set; } = "";
}
// [JsonProperty("示例答案")]
// [XmlElement("SA")] // SA for SampleAnswer
// public string SampleAnswer { get; set; } = "";
//}
// 选项类,用于适配 <O V="X"/> 结构
public class Option
{
// XML 特性:作为 <O V="X"> 属性
[XmlAttribute("V")] // V for Value
// JSON 特性:如果 JSON 中的选项是 {"Value": "A"} 这样的对象,则需要 JsonProperty("Value")
// 但如果 JSON 选项只是 ["A", "B"] 这样的字符串数组则此Option类不适合JSON Options
// 需要明确你的JSON Options的结构。我假设你JSON Options是 List<string>
// 如果是 List<string>则Options属性在SubQuestion中直接是List<string>Option类则不需要
// 但根据你的精简XML需求Option类是必要的。
// 所以这里需要你自己根据实际JSON Options结构选择。
// 为了兼容XML我会保留Option类但如果JSON是List<string>Options属性会很复杂
public string Value { get; set; }
}
//// 选项类,用于适配 <O V="X"/> 结构
//public class Option
//{
// // XML 特性:作为 <O V="X"> 属性
// [XmlAttribute("V")] // V for Value
// // JSON 特性:如果 JSON 中的选项是 {"Value": "A"} 这样的对象,则需要 JsonProperty("Value")
// // 但如果 JSON 选项只是 ["A", "B"] 这样的字符串数组则此Option类不适合JSON Options
// // 需要明确你的JSON Options的结构。我假设你JSON Options是 List<string>
// // 如果是 List<string>则Options属性在SubQuestion中直接是List<string>Option类则不需要
// // 但根据你的精简XML需求Option类是必要的。
// // 所以这里需要你自己根据实际JSON Options结构选择。
// // 为了兼容XML我会保留Option类但如果JSON是List<string>Options属性会很复杂
// public string Value { get; set; }
//}
// 独立的服务类来处理序列化和反序列化
public static class ExamParser
{
// JSON 反序列化方法
public static List<T> ParseExamJson<T>(string jsonContent)
{
string cleanedJson = jsonContent.Trim();
//// 独立的服务类来处理序列化和反序列化
//public static class ExamParser
//{
// // JSON 反序列化方法
// public static List<T> ParseExamJson<T>(string jsonContent)
// {
// string cleanedJson = jsonContent.Trim();
// 移除可能存在的 Markdown 代码块标记
if (cleanedJson.StartsWith("```json") && cleanedJson.EndsWith("```"))
{
cleanedJson = cleanedJson.Substring("```json".Length, cleanedJson.Length - "```json".Length - "```".Length).Trim();
}
// 移除可能存在的单引号包围(如果 AI 偶尔会这样输出)
if (cleanedJson.StartsWith("'") && cleanedJson.EndsWith("'"))
{
cleanedJson = cleanedJson.Substring(1, cleanedJson.Length - 2).Trim();
}
// // 移除可能存在的 Markdown 代码块标记
// if (cleanedJson.StartsWith("```json") && cleanedJson.EndsWith("```"))
// {
// cleanedJson = cleanedJson.Substring("```json".Length, cleanedJson.Length - "```json".Length - "```".Length).Trim();
// }
// // 移除可能存在的单引号包围(如果 AI 偶尔会这样输出)
// if (cleanedJson.StartsWith("'") && cleanedJson.EndsWith("'"))
// {
// cleanedJson = cleanedJson.Substring(1, cleanedJson.Length - 2).Trim();
// }
try
{
// 注意:这里假设你的 JSON 根直接是一个 QuestionGroup 列表
// 如果你的 JSON 根是 { "QuestionGroups": [...] },则需要先反序列化到 ExamPaper
List<T> examQuestions = JsonConvert.DeserializeObject<List<T>>(cleanedJson);
return examQuestions;
}
catch (JsonSerializationException ex)
{
Console.WriteLine($"JSON 反序列化错误: {ex.Message}");
Console.WriteLine($"内部异常: {ex.InnerException?.Message}");
return null;
}
catch (Exception ex)
{
Console.WriteLine($"处理错误: {ex.Message}");
return null;
}
}
// try
// {
// // 注意:这里假设你的 JSON 根直接是一个 QuestionGroup 列表
// // 如果你的 JSON 根是 { "QuestionGroups": [...] },则需要先反序列化到 ExamPaper
// List<T> examQuestions = JsonConvert.DeserializeObject<List<T>>(cleanedJson);
// return examQuestions;
// }
// catch (JsonSerializationException ex)
// {
// Console.WriteLine($"JSON 反序列化错误: {ex.Message}");
// Console.WriteLine($"内部异常: {ex.InnerException?.Message}");
// return null;
// }
// catch (Exception ex)
// {
// Console.WriteLine($"处理错误: {ex.Message}");
// return null;
// }
// }
#region TEST
[XmlRoot("User")]
public class User
{
[XmlAttribute("id")]
public string Id { get; set; }
// #region TEST
// [XmlRoot("User")]
// public class User
// {
// [XmlAttribute("id")]
// public string Id { get; set; }
[XmlElement("PersonalInfo")]
public PersonalInfo PersonalInfo { get; set; }
// [XmlElement("PersonalInfo")]
// public PersonalInfo PersonalInfo { get; set; }
[XmlArray("Roles")] // 包装元素 <Roles>
[XmlArrayItem("Role")] // 集合中的每个项是 <Role>
public List<Role> Roles { get; set; } = new List<Role>();
// [XmlArray("Roles")] // 包装元素 <Roles>
// [XmlArrayItem("Role")] // 集合中的每个项是 <Role>
// public List<Role> Roles { get; set; } = new List<Role>();
// 构造函数,方便测试
public User() { }
}
// // 构造函数,方便测试
// public User() { }
// }
public class PersonalInfo
{
[XmlElement("FullName")]
public string FullName { get; set; }
// public class PersonalInfo
// {
// [XmlElement("FullName")]
// public string FullName { get; set; }
[XmlElement("EmailAddress")]
public string EmailAddress { get; set; }
// [XmlElement("EmailAddress")]
// public string EmailAddress { get; set; }
// 构造函数,方便测试
public PersonalInfo() { }
}
// // 构造函数,方便测试
// public PersonalInfo() { }
// }
public class Role
{
[XmlAttribute("type")]
public string Type { get; set; }
// public class Role
// {
// [XmlAttribute("type")]
// public string Type { get; set; }
// 构造函数,方便测试
public Role() { }
}
#endregion
// // 构造函数,方便测试
// public Role() { }
// }
// #endregion
// XML 反序列化方法
public static T ParseExamXml<T>(string xmlContent)
{
string cleanedXml = xmlContent.Trim();
if (cleanedXml.StartsWith("'") && cleanedXml.EndsWith("'"))
{
cleanedXml = cleanedXml.Substring(1, cleanedXml.Length - 2);
}
if (cleanedXml.StartsWith("```xml") && cleanedXml.EndsWith("```"))
{
cleanedXml = cleanedXml.Substring("```xml".Length, cleanedXml.Length - "```xml".Length - "```".Length).Trim();
}
// // XML 反序列化方法
// public static T ParseExamXml<T>(string xmlContent)
// {
// string cleanedXml = xmlContent.Trim();
// if (cleanedXml.StartsWith("'") && cleanedXml.EndsWith("'"))
// {
// cleanedXml = cleanedXml.Substring(1, cleanedXml.Length - 2);
// }
// if (cleanedXml.StartsWith("```xml") && cleanedXml.EndsWith("```"))
// {
// cleanedXml = cleanedXml.Substring("```xml".Length, cleanedXml.Length - "```xml".Length - "```".Length).Trim();
// }
XmlSerializer serializer = new XmlSerializer(typeof(T));
// XmlSerializer serializer = new XmlSerializer(typeof(T));
using (StringReader reader = new StringReader(cleanedXml))
{
try
{
T user = (T)serializer.Deserialize(reader);
return user;
}
catch (InvalidOperationException ex)
{
Console.WriteLine($"XML 反序列化操作错误: {ex.Message}");
Console.WriteLine($"内部异常: {ex.InnerException?.Message}");
return default(T);
}
catch (Exception ex)
{
Console.WriteLine($"处理错误: {ex.Message}");
return default(T);
}
}
}
// using (StringReader reader = new StringReader(cleanedXml))
// {
// try
// {
// T user = (T)serializer.Deserialize(reader);
// return user;
// }
// catch (InvalidOperationException ex)
// {
// Console.WriteLine($"XML 反序列化操作错误: {ex.Message}");
// Console.WriteLine($"内部异常: {ex.InnerException?.Message}");
// return default(T);
// }
// catch (Exception ex)
// {
// Console.WriteLine($"处理错误: {ex.Message}");
// return default(T);
// }
// }
// }
public static List<QuestionGroup> ParseExamXmlFormQG(string xmlContent)
{
// 移除可能存在的 Markdown 代码块标记
if (xmlContent.StartsWith("```xml") && xmlContent.EndsWith("```"))
{
xmlContent = xmlContent.Substring("```xml".Length, xmlContent.Length - "```xml".Length - "```".Length).Trim();
}
// public static List<QuestionGroup> ParseExamXmlFormQG(string xmlContent)
// {
// // 移除可能存在的 Markdown 代码块标记
// if (xmlContent.StartsWith("```xml") && xmlContent.EndsWith("```"))
// {
// xmlContent = xmlContent.Substring("```xml".Length, xmlContent.Length - "```xml".Length - "```".Length).Trim();
// }
var serializer = new XmlSerializer(typeof(List<QuestionGroup>), new XmlRootAttribute("QGs"));
// var serializer = new XmlSerializer(typeof(List<QuestionGroup>), new XmlRootAttribute("QGs"));
using (StringReader reader = new StringReader(xmlContent))
{
try
{
List<QuestionGroup> questionGroups = (List<QuestionGroup>)serializer.Deserialize(reader);
return questionGroups;
}
catch (InvalidOperationException ex)
{
Console.WriteLine($"XML 反序列化操作错误: {ex.Message}");
Console.WriteLine($"内部异常: {ex.InnerException?.Message}");
return null;
}
catch (Exception ex)
{
Console.WriteLine($"处理错误: {ex.Message}");
return null;
}
}
}
}
// using (StringReader reader = new StringReader(xmlContent))
// {
// try
// {
// List<QuestionGroup> questionGroups = (List<QuestionGroup>)serializer.Deserialize(reader);
// return questionGroups;
// }
// catch (InvalidOperationException ex)
// {
// Console.WriteLine($"XML 反序列化操作错误: {ex.Message}");
// Console.WriteLine($"内部异常: {ex.InnerException?.Message}");
// return null;
// }
// catch (Exception ex)
// {
// Console.WriteLine($"处理错误: {ex.Message}");
// return null;
// }
// }
// }
//}
}

View File

@@ -1,627 +0,0 @@
using System.Text.RegularExpressions;
namespace TechHelper.Client.Exam.Parse
{
public class ExamPaper
{
public string Title { get; set; } = "未识别试卷标题";
public string Descript { get; set; } = "未识别试卷描述";
public string SubjectArea { get; set; } = "试卷类别";
public List<MajorQuestionGroup> MajorQuestionGroups { get; set; } = new List<MajorQuestionGroup>();
public List<Question> TopLevelQuestions { get; set; } = new List<Question>();
}
public class MajorQuestionGroup
{
public string Title { get; set; } = string.Empty;
public string Descript { get; set; } = string.Empty;
public float Score { get; set; }
public List<MajorQuestionGroup> SubMajorQuestionGroups { get; set; } = new List<MajorQuestionGroup>();
public List<Question> Questions { get; set; } = new List<Question>();
public int Priority { get; set; }
}
public class Question
{
public string Number { get; set; } = string.Empty;
public string Text { get; set; } = string.Empty;
public float Score { get; set; }
public List<Option> Options { get; set; } = new List<Option>();
public List<Question> SubQuestions { get; set; } = new List<Question>();
public int Priority { get; set; }
}
public class Option
{
public string Label { get; set; } = string.Empty;
public string Text { get; set; } = string.Empty;
}
/// <summary>
/// 表示一个带有优先级的正则表达式配置
/// </summary>
public class RegexPatternConfig
{
public string Pattern { get; set; } // 正则表达式字符串
public int Priority { get; set; } // 优先级,数字越小优先级越高
public Regex Regex { get; private set; } // 编译后的Regex对象用于性能优化
public RegexPatternConfig(string pattern, int priority)
{
Pattern = pattern;
Priority = priority;
Regex = new Regex(pattern, RegexOptions.Multiline | RegexOptions.Compiled); // 多行模式,编译以提高性能
}
}
/// <summary>
/// 试卷解析的配置类,包含所有正则表达式
/// </summary>
public class ExamParserConfig
{
public List<RegexPatternConfig> MajorQuestionGroupPatterns { get; set; } = new List<RegexPatternConfig>();
public List<RegexPatternConfig> QuestionPatterns { get; set; } = new List<RegexPatternConfig>();
public List<RegexPatternConfig> OptionPatterns { get; set; } = new List<RegexPatternConfig>();
public ExamParserConfig()
{
// --- 大题组模式 (MajorQuestionGroupPatterns) ---
// 匹配格式: "一、选择题 (20分)" 或 "二、阅读理解"
// Group 1: 大题组编号 (e.g., "一", "二")
// Group 2: 大题组标题 (e.g., "选择题", "阅读理解")
// Group 3: 整个分数部分 (e.g., "(20分)") - 可选
// Group 4: 纯数字分数 (e.g., "20") - 可选
MajorQuestionGroupPatterns.Add(new RegexPatternConfig(@"^([一二三四五六七八九十]+)[、\.]\s*(.+?)(?:\s*\(((\d+(?:\.\d+)?))\s*分\))?\s*$", 1));
// --- 题目模式 (QuestionPatterns) ---
// 针对不同格式的题目编号,捕获题号、题干和可选的分数
// Group 1: 题目编号 (e.g., "1", "(1)", "①")
// Group 2: 题干内容 (不含分数)
// Group 3: 整个分数部分 (e.g., "(5分)") - 可选
// Group 4: 纯数字分数 (e.g., "5") - 可选
// 模式 1: "1. 这是一个题目 (5分)" 或 "1. 这是一个题目"
QuestionPatterns.Add(new RegexPatternConfig(@"^(\d+)\.\s*(.+?)(?:\s*\(((\d+(?:\.\d+)?))\s*分\))?\s*$", 1));
// 模式 2: "(1) 这是一个子题目 (3分)" 或 "(1) 这是一个子题目"
QuestionPatterns.Add(new RegexPatternConfig(@"^\((\d+)\)\s*(.+?)(?:\s*\(((\d+(?:\.\d+)?))\s*分\))?\s*$", 2));
// 模式 3: "① 这是一个更深层次的子题目 (2分)" 或 "① 这是一个更深层次的子题目"
QuestionPatterns.Add(new RegexPatternConfig(@"^[①②③④⑤⑥⑦⑧⑨⑩]+\s*(.+?)(?:\s*\(((\d+(?:\.\d+)?))\s*分\))?\s*$", 3));
// --- 选项模式 (OptionPatterns) ---
// 匹配格式: "A. 选项内容"
// Group 1: 选项标签 (e.g., "A.")
// Group 2: 选项内容
OptionPatterns.Add(new RegexPatternConfig(@"^([A-Z]\.)\s*(.*)$", 1)); // 大写字母选项
OptionPatterns.Add(new RegexPatternConfig(@"^([a-z]\.)\s*(.*)$", 2)); // 小写字母选项
// --- 忽略模式 (IgnoredPatterns) ---
// 匹配空行或只包含空格的行,避免干扰解析流程
//IgnoredPatterns.Add(new RegexPatternConfig(@"^\s*$", 1));
//// 匹配试卷结尾的常见字符,防止被错误解析
//IgnoredPatterns.Add(new RegexPatternConfig(@"^\s*(试卷到此结束)\s*$", 1));
//IgnoredPatterns.Add(new RegexPatternConfig(@"^\s*(本卷共[0-9]+页)\s*$", 1));
// 标题和描述虽然你没要,但在实际解析中,这些模式有助于区分内容块,
// 否则它们可能会被其他模式(如题目模式)错误匹配。
// 建议你保留所有模式,但在本回复中,我只给出你要求的部分。
}
}
public class PotentialMatch
{
public int StartIndex { get; set; }
public int EndIndex { get; set; } // 匹配到的结构在原始文本中的结束位置
public string MatchedText { get; set; } // 匹配到的完整行或段落
public Match RegexMatch { get; set; } // 原始的Regex.Match对象方便获取捕获组
public RegexPatternConfig PatternConfig { get; set; } // 匹配到的模式配置
public MatchType Type { get; set; } // 枚举MajorQuestionGroup, Question, Option, etc.
}
public enum MatchType
{
MajorQuestionGroup,
Question,
Option,
Other // 如果有其他需要识别的类型
}
/// <summary>
/// 负责扫描原始文本,收集所有潜在的匹配项(题组、题目、选项)。
/// 它只进行匹配,不进行结构化归属。
/// </summary>
public class ExamDocumentScanner
{
private readonly ExamParserConfig _config;
public ExamDocumentScanner(ExamParserConfig config)
{
_config = config;
}
/// <summary>
/// 扫描给定的文本,返回所有潜在的匹配项,并按起始位置排序。
/// </summary>
/// <param name="text">要扫描的文本</param>
/// <returns>所有匹配到的 PotentialMatch 列表</returns>
public List<PotentialMatch> Scan(string text)
{
var allPotentialMatches = new List<PotentialMatch>();
// 扫描所有题组模式
foreach (var patternConfig in _config.MajorQuestionGroupPatterns)
{
foreach (Match match in patternConfig.Regex.Matches(text))
{
allPotentialMatches.Add(new PotentialMatch
{
StartIndex = match.Index,
EndIndex = match.Index + match.Length,
MatchedText = match.Value,
RegexMatch = match,
PatternConfig = patternConfig,
Type = MatchType.MajorQuestionGroup
});
}
}
// 扫描所有题目模式
foreach (var patternConfig in _config.QuestionPatterns)
{
foreach (Match match in patternConfig.Regex.Matches(text))
{
allPotentialMatches.Add(new PotentialMatch
{
StartIndex = match.Index,
EndIndex = match.Index + match.Length,
MatchedText = match.Value,
RegexMatch = match,
PatternConfig = patternConfig,
Type = MatchType.Question
});
}
}
// 扫描所有选项模式
foreach (var patternConfig in _config.OptionPatterns)
{
foreach (Match match in patternConfig.Regex.Matches(text))
{
allPotentialMatches.Add(new PotentialMatch
{
StartIndex = match.Index,
EndIndex = match.Index + match.Length,
MatchedText = match.Value,
RegexMatch = match,
PatternConfig = patternConfig,
Type = MatchType.Option
});
}
}
// 统一按起始位置排序
return allPotentialMatches.OrderBy(pm => pm.StartIndex).ToList();
}
}
public class ExamStructureBuilder
{
private readonly ExamParserConfig _config;
public ExamStructureBuilder(ExamParserConfig config)
{
_config = config ?? throw new ArgumentNullException(nameof(config), "ExamParserConfig cannot be null.");
}
public ExamPaper BuildExamPaper(string fullExamText, List<PotentialMatch> allPotentialMatches)
{
if (string.IsNullOrWhiteSpace(fullExamText))
{
throw new ArgumentException("Full exam text cannot be null or empty.", nameof(fullExamText));
}
if (allPotentialMatches == null)
{
throw new ArgumentNullException(nameof(allPotentialMatches), "Potential matches list cannot be null.");
}
var examPaper = new ExamPaper();
try
{
examPaper.Title = GetExamTitle(fullExamText);
}
catch (Exception ex)
{
throw new InvalidOperationException("Failed to extract exam title.", ex);
}
var majorQGStack = new Stack<MajorQuestionGroup>();
MajorQuestionGroup currentMajorQG = null;
var questionStack = new Stack<Question>();
Question currentQuestion = null;
int currentContentStart = 0;
if (allPotentialMatches.Any() && allPotentialMatches[0].StartIndex > 0)
{
string introText = fullExamText.Substring(0, allPotentialMatches[0].StartIndex).Trim();
if (!string.IsNullOrWhiteSpace(introText))
{
examPaper.Descript = introText;
}
}
for (int i = 0; i < allPotentialMatches.Count; i++)
{
var pm = allPotentialMatches[i];
try
{
// Validate potential match data
if (pm.StartIndex < currentContentStart || pm.EndIndex > fullExamText.Length || pm.StartIndex > pm.EndIndex)
{
throw new ArgumentOutOfRangeException(
$"PotentialMatch at index {i} has invalid start/end indices. Start: {pm.StartIndex}, End: {pm.EndIndex}, CurrentContentStart: {currentContentStart}, FullTextLength: {fullExamText.Length}");
}
if (pm.RegexMatch == null || pm.PatternConfig == null)
{
throw new InvalidOperationException($"PotentialMatch at index {i} is missing RegexMatch or PatternConfig.");
}
string precedingText = fullExamText.Substring(currentContentStart, pm.StartIndex - currentContentStart).Trim();
if (!string.IsNullOrWhiteSpace(precedingText))
{
if (currentQuestion != null)
{
ProcessQuestionContent(currentQuestion, precedingText,
GetSubMatchesForRange(allPotentialMatches, currentContentStart, pm.StartIndex));
}
else if (currentMajorQG != null)
{
currentMajorQG.Descript += (string.IsNullOrWhiteSpace(currentMajorQG.Descript) ? "" : "\n") + precedingText;
}
else
{
// Append to ExamPaper.Description if it's top-level descriptive text
examPaper.Descript += (string.IsNullOrWhiteSpace(examPaper.Descript) ? "" : "\n") + precedingText;
}
}
if (pm.Type == MatchType.MajorQuestionGroup)
{
try
{
while (majorQGStack.Any() && pm.PatternConfig.Priority <= majorQGStack.Peek().Priority)
{
majorQGStack.Pop();
}
// Check if regex match groups exist before accessing
if (pm.RegexMatch.Groups.Count < 2)
{
throw new InvalidOperationException($"MajorQuestionGroup match at index {i} does not have enough regex groups for Title.");
}
float score = 0;
if (pm.RegexMatch.Groups.Count > 2 && pm.RegexMatch.Groups[2].Success)
{
if (!float.TryParse(pm.RegexMatch.Groups[2].Value, out score))
{
throw new FormatException($"Failed to parse score '{pm.RegexMatch.Groups[2].Value}' for MajorQuestionGroup at index {i}.");
}
}
MajorQuestionGroup newMajorQG = new MajorQuestionGroup
{
Title = pm.RegexMatch.Groups[1].Value.Trim(),
Score = score,
Priority = pm.PatternConfig.Priority
};
if (majorQGStack.Any())
{
majorQGStack.Peek().SubMajorQuestionGroups.Add(newMajorQG);
}
else
{
examPaper.MajorQuestionGroups.Add(newMajorQG);
}
majorQGStack.Push(newMajorQG);
currentMajorQG = newMajorQG;
questionStack.Clear();
currentQuestion = null;
}
catch (Exception ex)
{
throw new InvalidOperationException($"Error processing MajorQuestionGroup at index {i} (MatchedText: '{pm.MatchedText}').", ex);
}
}
else if (pm.Type == MatchType.Question)
{
try
{
while (questionStack.Any() && pm.PatternConfig.Priority <= questionStack.Peek().Priority)
{
questionStack.Pop();
}
if (pm.RegexMatch.Groups.Count < 3)
{
throw new InvalidOperationException($"Question match at index {i} does not have enough regex groups for Number and Text.");
}
Question newQuestion = new Question
{
Number = pm.RegexMatch.Groups[1].Value.Trim(),
Text = pm.RegexMatch.Groups[2].Value.Trim(),
Priority = pm.PatternConfig.Priority
};
if (pm.RegexMatch.Groups.Count > 3 && pm.RegexMatch.Groups[3].Success) // Assuming score is group 3 if available
{
float score = 0;
if (!float.TryParse(pm.RegexMatch.Groups[3].Value, out score))
{
throw new FormatException($"Failed to parse score '{pm.RegexMatch.Groups[3].Value}' for Question at index {i}.");
}
newQuestion.Score = score;
}
if (questionStack.Any())
{
questionStack.Peek().SubQuestions.Add(newQuestion);
}
else if (currentMajorQG != null)
{
currentMajorQG.Questions.Add(newQuestion);
}
else
{
examPaper.TopLevelQuestions.Add(newQuestion);
}
questionStack.Push(newQuestion);
currentQuestion = newQuestion;
}
catch (Exception ex)
{
throw new InvalidOperationException($"Error processing Question at index {i} (MatchedText: '{pm.MatchedText}').", ex);
}
}
else if (pm.Type == MatchType.Option)
{
try
{
if (currentQuestion != null)
{
if (pm.RegexMatch.Groups.Count < 3)
{
throw new InvalidOperationException($"Option match at index {i} does not have enough regex groups for Label and Text.");
}
Option newOption = new Option
{
Label = pm.RegexMatch.Groups[1].Value.Trim(),
Text = pm.RegexMatch.Groups[2].Value.Trim()
};
currentQuestion.Options.Add(newOption);
}
else
{
// This indicates a structural issue in the exam text
throw new InvalidOperationException($"Found isolated Option at index {i} (MatchedText: '{pm.MatchedText}'). Options must belong to a question.");
}
}
catch (Exception ex)
{
throw new InvalidOperationException($"Error processing Option at index {i} (MatchedText: '{pm.MatchedText}').", ex);
}
}
currentContentStart = pm.EndIndex;
}
catch (Exception ex)
{
// Catch any unexpected errors during the main loop iteration
throw new InvalidOperationException($"An unexpected error occurred during processing of PotentialMatch at index {i}.", ex);
}
}
// --- Step 4: Process remaining content after the last match ---
if (currentContentStart < fullExamText.Length)
{
try
{
string remainingText = fullExamText.Substring(currentContentStart).Trim();
if (!string.IsNullOrWhiteSpace(remainingText))
{
if (currentQuestion != null)
{
ProcessQuestionContent(currentQuestion, remainingText,
GetSubMatchesForRange(allPotentialMatches, currentContentStart, fullExamText.Length));
}
else if (currentMajorQG != null)
{
currentMajorQG.Descript += (string.IsNullOrWhiteSpace(currentMajorQG.Descript) ? "" : "\n") + remainingText;
}
else
{
examPaper.Descript += (string.IsNullOrWhiteSpace(examPaper.Descript) ? "" : "\n") + remainingText;
}
}
}
catch (Exception ex)
{
throw new InvalidOperationException("Error processing remaining text after all potential matches.", ex);
}
}
return examPaper;
}
/// <summary>
/// Extracts the exam title (simple implementation)
/// </summary>
private string GetExamTitle(string examPaperText)
{
try
{
var firstLine = examPaperText.Split(new[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries)
.FirstOrDefault(line => !string.IsNullOrWhiteSpace(line));
return firstLine ?? "未识别试卷标题";
}
catch (Exception ex)
{
throw new InvalidOperationException("An error occurred while trying to extract the exam title from the text.", ex);
}
}
/// <summary>
/// Gets a subset of the given PotentialMatch list within a specified range.
/// This method helps ProcessQuestionContent by providing Options and SubQuestions within that range.
/// </summary>
private List<PotentialMatch> GetSubMatchesForRange(List<PotentialMatch> allMatches, int start, int end)
{
try
{
// Input validation for range
if (start < 0 || end < start)
{
throw new ArgumentOutOfRangeException($"Invalid range provided to GetSubMatchesForRange. Start: {start}, End: {end}");
}
// Ensure allMatches is not null before querying
if (allMatches == null)
{
return new List<PotentialMatch>();
}
return allMatches.Where(pm => pm.StartIndex >= start && pm.StartIndex < end).ToList();
}
catch (Exception ex)
{
throw new InvalidOperationException($"Error getting sub-matches for range [{start}, {end}).", ex);
}
}
/// <summary>
/// Processes the content of a Question, mainly for parsing Options and identifying unstructured text.
/// </summary>
private void ProcessQuestionContent(Question question, string contentText, List<PotentialMatch> potentialMatchesInScope)
{
if (question == null)
{
throw new ArgumentNullException(nameof(question), "Question cannot be null in ProcessQuestionContent.");
}
if (contentText == null) // contentText can be empty, but not null
{
throw new ArgumentNullException(nameof(contentText), "Content text cannot be null in ProcessQuestionContent.");
}
if (potentialMatchesInScope == null)
{
throw new ArgumentNullException(nameof(potentialMatchesInScope), "Potential matches in scope cannot be null.");
}
try
{
int lastOptionEndIndex = 0;
foreach (var pm in potentialMatchesInScope.OrderBy(p => p.StartIndex))
{
try
{
if (pm.Type == MatchType.Option)
{
// Check for valid indices
if (pm.StartIndex < lastOptionEndIndex || pm.StartIndex > contentText.Length || pm.EndIndex > contentText.Length)
{
throw new ArgumentOutOfRangeException(
$"Option match at index {pm.StartIndex} has invalid indices within content text. MatchedText: '{pm.MatchedText}'");
}
if (pm.StartIndex > lastOptionEndIndex)
{
string textBeforeOption = contentText.Substring(lastOptionEndIndex, pm.StartIndex - lastOptionEndIndex).Trim();
if (!string.IsNullOrWhiteSpace(textBeforeOption))
{
question.Text += (string.IsNullOrWhiteSpace(question.Text) ? "" : "\n") + textBeforeOption;
}
}
if (pm.RegexMatch.Groups.Count < 3)
{
throw new InvalidOperationException($"Option regex match '{pm.MatchedText}' does not have enough groups for label and text.");
}
var newOption = new Option
{
Label = pm.RegexMatch.Groups[1].Value.Trim(),
Text = pm.RegexMatch.Groups[2].Value.Trim()
};
question.Options.Add(newOption);
lastOptionEndIndex = pm.EndIndex;
}
// TODO: If there are SubQuestion types, they can be processed similarly here.
}
catch (Exception innerEx)
{
throw new InvalidOperationException($"Error processing a potential match ({pm.Type}) within question content (MatchedText: '{pm.MatchedText}').", innerEx);
}
}
// Process any remaining text after all options
if (lastOptionEndIndex < contentText.Length)
{
string remainingContent = contentText.Substring(lastOptionEndIndex).Trim();
if (!string.IsNullOrWhiteSpace(remainingContent))
{
question.Text += (string.IsNullOrWhiteSpace(question.Text) ? "" : "\n") + remainingContent;
}
}
}
catch (Exception ex)
{
throw new InvalidOperationException($"An error occurred while processing content for Question '{question.Number}'.", ex);
}
}
}
public class ExamParser
{
private readonly ExamParserConfig _config;
private readonly ExamDocumentScanner _scanner;
private readonly ExamStructureBuilder _builder;
public ExamParser(ExamParserConfig config)
{
_config = config;
_scanner = new ExamDocumentScanner(_config);
_builder = new ExamStructureBuilder(_config);
}
/// <summary>
/// 解析给定的试卷文本,返回结构化的 ExamPaper 对象。
/// </summary>
/// <param name="examPaperText">完整的试卷文本</param>
/// <returns>解析后的 ExamPaper 对象</returns>
public ExamPaper ParseExamPaper(string examPaperText)
{
// 1. 扫描:一次性扫描整个文本,收集所有潜在的匹配项
List<PotentialMatch> allPotentialMatches = _scanner.Scan(examPaperText);
// 2. 构建:根据扫描结果和原始文本,线性遍历并构建层级结构
ExamPaper parsedExam = _builder.BuildExamPaper(examPaperText, allPotentialMatches);
return parsedExam;
}
}
}

View File

@@ -1,43 +0,0 @@
@using TechHelper.Client.Exam.Parse
<MudCard Class="my-2 pa-2" Outlined="true" Elevation="1">
<MudCardContent>
<MudText Typo="Typo.subtitle1">
<b>@Question.Number</b> @((MarkupString)Question.Text)
@if (Question.Score > 0)
{
<MudText Typo="Typo.body2" Class="d-inline ml-2">(@Question.Score 分)</MudText>
}
</MudText>
@* 显示选项 - 不使用 MudList *@
@if (Question.Options.Any())
{
<div class="mt-2">
@* 使用普通的 div 容器,你可以添加自定义 CSS 类进行样式控制 *@
@foreach (var option in Question.Options)
{
<MudText Typo="Typo.body2" Class="my-1">
@* 为每个选项文本添加一些边距 *@
<b>@option.Label</b> @((MarkupString)option.Text)
</MudText>
}
</div>
}
@* 递归显示子题目 *@
@if (Question.SubQuestions.Any())
{
<MudText Typo="Typo.subtitle2" Class="my-2">子题目:</MudText>
@foreach (var subQuestion in Question.SubQuestions)
{
<QuestionCard Question="subQuestion" />
}
}
</MudCardContent>
</MudCard>
@code {
[Parameter]
public Question Question { get; set; }
}

View File

@@ -1,43 +0,0 @@
@using TechHelper.Client.Exam.Parse
@* SubMajorQuestionGroupDisplay.razor *@
<MudExpansionPanels>
@foreach (var majorQG in MajorQGList)
{
<MudExpansionPanel Text="@majorQG.Title" DisableRipple="true">
<MudCard Class="mt-2" Outlined="true">
<MudCardContent>
@if (!string.IsNullOrWhiteSpace(majorQG.Descript))
{
<MudText Typo="Typo.body2"><b>描述:</b> @((MarkupString)majorQG.Descript)</MudText>
}
@if (majorQG.Score > 0)
{
<MudText Typo="Typo.body2"><b>总分:</b> @majorQG.Score 分</MudText>
}
@* 显示当前子题组下的题目 *@
@if (majorQG.Questions.Any())
{
<MudText Typo="Typo.subtitle1" Class="my-2">题目:</MudText>
@foreach (var question in majorQG.Questions)
{
<QuestionCard Question="question" />
}
}
@* 递归显示更深层次的子题组 *@
@if (majorQG.SubMajorQuestionGroups.Any())
{
<MudText Typo="Typo.subtitle1" Class="my-2">子题组:</MudText>
<SubMajorQuestionGroupDisplay MajorQGList="majorQG.SubMajorQuestionGroups" />
}
</MudCardContent>
</MudCard>
</MudExpansionPanel>
}
</MudExpansionPanels>
@code {
[Parameter]
public List<MajorQuestionGroup> MajorQGList { get; set; }
}

View File

@@ -0,0 +1,220 @@
using Entities.DTO;
using System.Text.Json.Serialization;
using System.Text.Json;
namespace TechHelper.Client.Exam
{
public static class ExamPaperExtensions
{
public static ExamDto ConvertToExamDTO(this ExamPaper examPaper)
{
ExamDto dto = new ExamDto();
dto.AssignmentTitle = examPaper.AssignmentTitle;
dto.Description = examPaper.Description;
dto.SubjectArea = examPaper.SubjectArea;
dto.QuestionGroups.Title = examPaper.AssignmentTitle;
dto.QuestionGroups.Descript = examPaper.Description;
// 处理顶级 QuestionGroups
foreach (var qg in examPaper.QuestionGroups)
{
var qgd = new QuestionGroupDto();
// 顶级 QuestionGroup其父组当然无效 (false),所以 isParentGroupValidChain 为 false
ParseMajorQuestionGroup(qg, qgd, false);
dto.QuestionGroups.SubQuestionGroups.Add(qgd);
}
// 处理 TopLevelQuestions
foreach (var question in examPaper.TopLevelQuestions)
{
// 对于 TopLevelQuestions它们没有父组所以 isParentGroupValidChain 初始为 false
// 如果顶级 Question 包含子问题,则将其视为一个 QuestionGroupDto
if (question.SubQuestions != null && question.SubQuestions.Any())
{
var qgDto = new QuestionGroupDto
{
Title = question.Stem,
Score = (int)question.Score,
Descript = "", // 顶级 Question 默认无描述
};
// 判断当前组是否有效:如果有描述,则为有效组
qgDto.ValidQuestionGroup = !string.IsNullOrEmpty(qgDto.Descript);
// 传递给子项的 isParentGroupValidChain 状态:如果当前组有效,则传递 true否则继承父级状态 (此处为 false)
ParseQuestionWithSubQuestions(question, qgDto, qgDto.ValidQuestionGroup);
dto.QuestionGroups.SubQuestionGroups.Add(qgDto);
}
else // 如果顶级 Question 没有子问题,则它本身就是一个独立的 SubQuestionDto放在一个容器 QuestionGroupDto 中
{
var qgDto = new QuestionGroupDto
{
Title = question.Stem,
Score = (int)question.Score,
Descript = "", // 独立题目的容器组通常无描述
};
// 独立题目的容器组,如果没有描述,则不是“有效组”
qgDto.ValidQuestionGroup = !string.IsNullOrEmpty(qgDto.Descript);
var subQuestionDto = new SubQuestionDto();
// 此时qgDto.ValidQuestionGroup 为 false所以传入 true表示题目是有效的
// 因为其父组链 (此处为自身) 不是有效组
ParseSingleQuestion(question, subQuestionDto, !qgDto.ValidQuestionGroup);
qgDto.SubQuestions.Add(subQuestionDto);
dto.QuestionGroups.SubQuestionGroups.Add(qgDto);
}
}
return dto;
}
// 解析 MajorQuestionGroup 及其子项
// isParentGroupValidChain 参数表示从顶层到当前组的任一父组是否已经是“有效组”
private static void ParseMajorQuestionGroup(MajorQuestionGroup qg, QuestionGroupDto qgd, bool isParentGroupValidChain)
{
qgd.Title = qg.Title;
qgd.Score = (int)qg.Score;
qgd.Descript = qg.Descript;
// 判断当前组是否有效:如果有描述,并且其父级链中没有任何一个组是有效组,则当前组有效
qgd.ValidQuestionGroup = !string.IsNullOrEmpty(qg.Descript) && !isParentGroupValidChain;
// 更新传递给子项的 isParentGroupValidChain 状态:
// 如果当前组是有效组 (即 qgd.ValidQuestionGroup 为 true),那么子项的父级链就包含了有效组
// 否则,子项的父级链有效性继承自其父级 (isParentGroupValidChain)
bool nextIsParentGroupValidChain = qgd.ValidQuestionGroup || isParentGroupValidChain;
// 处理子 QuestionGroup
if (qg.SubQuestionGroups != null)
{
qg.SubQuestionGroups.ForEach(sqg =>
{
var sqgd = new QuestionGroupDto();
ParseMajorQuestionGroup(sqg, sqgd, nextIsParentGroupValidChain);
qgd.SubQuestionGroups.Add(sqgd);
});
}
// 处理 MajorQuestionGroup 下的 SubQuestions
if (qg.SubQuestions != null)
{
qg.SubQuestions.ForEach(sq =>
{
// 如果 MajorQuestionGroup 下的 Question 包含子问题,则转为 QuestionGroupDto
if (sq.SubQuestions != null && sq.SubQuestions.Any())
{
var subQgd = new QuestionGroupDto
{
Title = sq.Stem,
Score = (int)sq.Score,
Descript = "" // 默认为空
};
// 判断当前组是否有效:如果有描述,并且其父级链中没有任何一个组是有效组,则当前组有效
subQgd.ValidQuestionGroup = !string.IsNullOrEmpty(subQgd.Descript) && !nextIsParentGroupValidChain;
ParseQuestionWithSubQuestions(sq, subQgd, subQgd.ValidQuestionGroup || nextIsParentGroupValidChain);
qgd.SubQuestionGroups.Add(subQgd);
}
else // 如果 MajorQuestionGroup 下的 Question 没有子问题,则转为 SubQuestionDto
{
var subQd = new SubQuestionDto();
// 只有当所有父组(包括当前组)都不是有效组时,这个题目才有效
ParseSingleQuestion(sq, subQd, !nextIsParentGroupValidChain);
qgd.SubQuestions.Add(subQd);
}
});
}
}
// 解析包含子问题的 Question将其转换为 QuestionGroupDto
// isParentGroupValidChain 参数表示从顶层到当前组的任一父组是否已经是“有效组”
private static void ParseQuestionWithSubQuestions(Question question, QuestionGroupDto qgd, bool isParentGroupValidChain)
{
qgd.Title = question.Stem;
qgd.Score = (int)question.Score;
qgd.Descript = ""; // 默认为空
// 判断当前组是否有效:如果有描述,并且其父级链中没有任何一个组是有效组,则当前组有效
qgd.ValidQuestionGroup = !string.IsNullOrEmpty(qgd.Descript) && !isParentGroupValidChain;
// 更新传递给子项的 isParentGroupValidChain 状态
bool nextIsParentGroupValidChain = qgd.ValidQuestionGroup || isParentGroupValidChain;
if (question.SubQuestions != null)
{
question.SubQuestions.ForEach(subQ =>
{
// 如果子问题本身还有子问题(多层嵌套),则继续创建 QuestionGroupDto
if (subQ.SubQuestions != null && subQ.SubQuestions.Any())
{
var nestedQgd = new QuestionGroupDto
{
Title = subQ.Stem,
Score = (int)subQ.Score,
Descript = "" // 默认为空
};
// 判断当前组是否有效:如果有描述,并且其父级链中没有任何一个组是有效组,则当前组有效
nestedQgd.ValidQuestionGroup = !string.IsNullOrEmpty(nestedQgd.Descript) && !nextIsParentGroupValidChain;
ParseQuestionWithSubQuestions(subQ, nestedQgd, nestedQgd.ValidQuestionGroup || nextIsParentGroupValidChain);
qgd.SubQuestionGroups.Add(nestedQgd);
}
else // 如果子问题没有子问题,则直接创建 SubQuestionDto
{
var subQd = new SubQuestionDto();
// 只有当所有父组(包括当前组)都不是有效组时,这个题目才有效
ParseSingleQuestion(subQ, subQd, !nextIsParentGroupValidChain);
qgd.SubQuestions.Add(subQd);
}
});
}
}
// 解析单个 Question (没有子问题) 为 SubQuestionDto
private static void ParseSingleQuestion(Question question, SubQuestionDto subQd, bool validQ)
{
subQd.Stem = question.Stem;
subQd.Score = (int)question.Score;
subQd.ValidQuestion = validQ; // 根据传入的 validQ 确定是否是“有效题目”
subQd.SampleAnswer = question.SampleAnswer;
subQd.QuestionType = question.QuestionType;
// 注意DifficultyLevel 在本地 Question 中没有,如果服务器需要,可能需要补充默认值或从其他地方获取
// subQd.DifficultyLevel = ...;
if (question.Options != null)
{
question.Options.ForEach(o =>
{
subQd.Options.Add(new OptionDto { Value = o.Label + o.Text });
});
}
}
public static string SerializeExamDto(this ExamDto dto)
{
// 配置序列化选项(可选)
var options = new JsonSerializerOptions
{
WriteIndented = true,
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull
};
return JsonSerializer.Serialize(dto, options);
}
public static ExamDto DeserializeExamDto(string jsonString)
{
var options = new JsonSerializerOptions
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase
};
return JsonSerializer.Deserialize<ExamDto>(jsonString, options);
}
}
}

View File

@@ -0,0 +1,740 @@
using Entities.DTO;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.RegularExpressions;
namespace TechHelper.Client.Exam
{
// --- 新增错误处理相关类 ---
public class ParseError
{
public ParseErrorType Type { get; }
public string Message { get; }
public int? Index { get; } // 错误发生的文本索引或匹配项索引
public string MatchedText { get; } // 如果与某个匹配项相关,记录其文本
public Exception InnerException { get; } // 捕获到的原始异常
public ParseError(ParseErrorType type, string message, int? index = null, string matchedText = null, Exception innerException = null)
{
Type = type;
Message = message;
Index = index;
MatchedText = matchedText;
InnerException = innerException;
}
public override string ToString()
{
var sb = new System.Text.StringBuilder();
sb.Append($"[{Type}] {Message}");
if (Index.HasValue) sb.Append($" (Index: {Index.Value})");
if (!string.IsNullOrEmpty(MatchedText)) sb.Append($" (MatchedText: '{MatchedText}')");
if (InnerException != null) sb.Append($" InnerException: {InnerException.Message}");
return sb.ToString();
}
}
public enum ParseErrorType
{
Validation = 1, // 输入验证失败
DataParsing = 2, // 数据解析失败(如数字转换)
Structural = 3, // 结构性问题(如选项没有对应的问题)
RegexMatchIssue = 4, // 正则表达式匹配结果不符合预期
UnexpectedError = 5 // 未预料到的通用错误
}
public class ExamPaper
{
public string AssignmentTitle { get; set; } = "未识别试卷标题";
public string Description { get; set; } = "未识别试卷描述";
public string SubjectArea { get; set; } = "试卷类别";
public List<MajorQuestionGroup> QuestionGroups { get; set; } = new List<MajorQuestionGroup>();
public List<Question> TopLevelQuestions { get; set; } = new List<Question>();
public List<ParseError> Errors { get; set; } = new List<ParseError>();
}
public class MajorQuestionGroup
{
public string Title { get; set; } = string.Empty;
public string Descript { get; set; } = string.Empty;
public float Score { get; set; }
public List<MajorQuestionGroup> SubQuestionGroups { get; set; } = new List<MajorQuestionGroup>();
public List<Question> SubQuestions { get; set; } = new List<Question>();
public int Priority { get; set; }
}
public class Question
{
public string Number { get; set; } = string.Empty;
public string Stem { get; set; } = string.Empty;
public float Score { get; set; }
public List<Option> Options { get; set; } = new List<Option>();
public List<Question> SubQuestions { get; set; } = new List<Question>();
public string SampleAnswer { get; set; } = string.Empty;
public string QuestionType { get; set; } = string.Empty;
public int Priority { get; set; }
}
public class Option
{
public string Label { get; set; } = string.Empty;
public string Text { get; set; } = string.Empty;
}
/// <summary>
/// 表示一个带有优先级的正则表达式配置
/// </summary>
public class RegexPatternConfig
{
public string Pattern { get; set; } // 正则表达式字符串
public int Priority { get; set; } // 优先级,数字越小优先级越高
public Regex Regex { get; private set; } // 编译后的Regex对象用于性能优化
public RegexPatternConfig(string pattern, int priority)
{
Pattern = pattern;
Priority = priority;
Regex = new Regex(pattern, RegexOptions.Multiline | RegexOptions.Compiled); // 多行模式,编译以提高性能
}
}
public enum ExamParserEnum
{
MajorQuestionGroupPatterns = 0,
QuestionPatterns,
OptionPatterns
}
/// <summary>
/// 试卷解析的配置类,包含所有正则表达式
/// </summary>
public class ExamParserConfig
{
public List<RegexPatternConfig> MajorQuestionGroupPatterns { get; set; } = new List<RegexPatternConfig>();
public List<RegexPatternConfig> QuestionPatterns { get; set; } = new List<RegexPatternConfig>();
public List<RegexPatternConfig> OptionPatterns { get; set; } = new List<RegexPatternConfig>();
public ExamParserConfig()
{
MajorQuestionGroupPatterns.Add(new RegexPatternConfig(@"^([一二三四五六七八九十]+)[、\.]\s*(.+?)(?:\s*\(((\d+(?:\.\d+)?))\s*分\))?\s*$", 1));
MajorQuestionGroupPatterns.Add(new RegexPatternConfig(@"^\(([一二三四五六七八九十]{1,2}|十[一二三四五六七八九])\)\s*(.+?)(?:\s*\(((\d+(?:\.\d+)?))\s*分\))?\s*$", 2));
// 模式 1: "1. 这是一个题目 (5分)" 或 "1. 这是一个题目"
QuestionPatterns.Add(new RegexPatternConfig(@"^(\d+)\.\s*(.+?)(?:\s*\(((\d+(?:\.\d+)?))\s*分\))?\s*$", 1));
// 模式 2: "(1) 这是一个子题目 (3分)" 或 "(1) 这是一个子题目"
QuestionPatterns.Add(new RegexPatternConfig(@"^\((\d+)\)\s*(.+?)(?:\s*\(((\d+(?:\.\d+)?))\s*分\))?\s*$", 2));
// 模式 3: "① 这是一个更深层次的子题目 (2分)" 或 "① 这是一个更深层次的子题目"
QuestionPatterns.Add(new RegexPatternConfig(@"^[①②③④⑤⑥⑦⑧⑨⑩]+\s*(.+?)(?:\s*\(((\d+(?:\.\d+)?))\s*分\))?\s*$", 3));
OptionPatterns.Add(new RegexPatternConfig(@"([A-Z]\.)\s*(.*?)(?=[A-Z]\.|$)", 1)); // 大写字母选项
OptionPatterns.Add(new RegexPatternConfig(@"([a-z]\.)\s*(.*?)(?=[a-z]\.|$)", 1)); // 小写字母选项
}
}
public class PotentialMatch
{
public int StartIndex { get; set; }
public int EndIndex { get; set; } // 匹配到的结构在原始文本中的结束位置
public string MatchedText { get; set; } // 匹配到的完整行或段落
public Match RegexMatch { get; set; } // 原始的Regex.Match对象方便获取捕获组
public RegexPatternConfig PatternConfig { get; set; } // 匹配到的模式配置
public MatchType Type { get; set; } // 枚举MajorQuestionGroup, Question, Option, etc.
}
public enum MatchType
{
MajorQuestionGroup,
Question,
Option,
Other // 如果有其他需要识别的类型
}
/// <summary>
/// 负责扫描原始文本,收集所有潜在的匹配项(题组、题目、选项)。
/// 它只进行匹配,不进行结构化归属。
/// </summary>
public class ExamDocumentScanner
{
private readonly ExamParserConfig _config;
public ExamDocumentScanner(ExamParserConfig config)
{
_config = config ?? throw new ArgumentNullException(nameof(config)); // 确保配置不为空
}
/// <summary>
/// 扫描给定的文本,返回所有潜在的匹配项,并按起始位置排序。
/// </summary>
/// <param name="text">要扫描的文本</param>
/// <returns>所有匹配到的 PotentialMatch 列表</returns>
public List<PotentialMatch> Scan(string text)
{
if (string.IsNullOrEmpty(text))
{
return new List<PotentialMatch>(); // 对于空文本,直接返回空列表
}
var allPotentialMatches = new List<PotentialMatch>();
// 扫描所有题组模式
foreach (var patternConfig in _config.MajorQuestionGroupPatterns)
{
foreach (Match match in patternConfig.Regex.Matches(text))
{
allPotentialMatches.Add(new PotentialMatch
{
StartIndex = match.Index,
EndIndex = match.Index + match.Length,
MatchedText = match.Value,
RegexMatch = match,
PatternConfig = patternConfig,
Type = MatchType.MajorQuestionGroup
});
}
}
// 扫描所有题目模式
foreach (var patternConfig in _config.QuestionPatterns)
{
foreach (Match match in patternConfig.Regex.Matches(text))
{
allPotentialMatches.Add(new PotentialMatch
{
StartIndex = match.Index,
EndIndex = match.Index + match.Length,
MatchedText = match.Value,
RegexMatch = match,
PatternConfig = patternConfig,
Type = MatchType.Question
});
}
}
// 扫描所有选项模式
foreach (var patternConfig in _config.OptionPatterns)
{
foreach (Match match in patternConfig.Regex.Matches(text))
{
allPotentialMatches.Add(new PotentialMatch
{
StartIndex = match.Index,
EndIndex = match.Index + match.Length,
MatchedText = match.Value,
RegexMatch = match,
PatternConfig = patternConfig,
Type = MatchType.Option
});
}
}
// 统一按起始位置排序
return allPotentialMatches.OrderBy(pm => pm.StartIndex).ToList();
}
}
public class ExamStructureBuilder
{
private readonly ExamParserConfig _config;
public ExamStructureBuilder(ExamParserConfig config)
{
_config = config ?? throw new ArgumentNullException(nameof(config), "ExamParserConfig cannot be null.");
}
/// <summary>
/// Builds the ExamPaper structure from raw text and potential matches.
/// Collects and returns parsing errors encountered during the process.
/// </summary>
/// <param name="fullExamText">The complete text of the exam paper.</param>
/// <param name="allPotentialMatches">A list of all identified potential matches.</param>
/// <returns>An ExamPaper object containing the parsed structure and a list of errors.</returns>
/// <exception cref="ArgumentException">Thrown if fullExamText is null or empty.</exception>
/// <exception cref="ArgumentNullException">Thrown if allPotentialMatches is null.</exception>
public ExamPaper BuildExamPaper(string fullExamText, List<PotentialMatch> allPotentialMatches)
{
// 核心输入验证仍然是必要的,因为这些错误是无法恢复的
if (string.IsNullOrWhiteSpace(fullExamText))
{
throw new ArgumentException("Full exam text cannot be null or empty.", nameof(fullExamText));
}
if (allPotentialMatches == null)
{
throw new ArgumentNullException(nameof(allPotentialMatches), "Potential matches list cannot be null.");
}
var examPaper = new ExamPaper(); // ExamPaper 现在包含一个 Errors 列表
// 尝试获取试卷标题
try
{
examPaper.AssignmentTitle = GetExamTitle(fullExamText);
}
catch (Exception ex)
{
// 如果获取标题失败,记录错误而不是抛出致命异常
examPaper.Errors.Add(new ParseError(ParseErrorType.UnexpectedError, "Failed to extract exam title.", innerException: ex));
examPaper.AssignmentTitle = "未识别试卷标题"; // 提供默认值
}
var majorQGStack = new Stack<MajorQuestionGroup>();
MajorQuestionGroup currentMajorQG = null;
var questionStack = new Stack<Question>();
Question currentQuestion = null;
int currentContentStart = 0;
// 处理试卷开头的描述性文本
if (allPotentialMatches.Any() && allPotentialMatches[0].StartIndex > 0)
{
string introText = fullExamText.Substring(0, allPotentialMatches[0].StartIndex).Trim();
if (!string.IsNullOrWhiteSpace(introText))
{
examPaper.Description += (string.IsNullOrWhiteSpace(examPaper.Description) ? "" : "\n") + introText;
}
}
currentContentStart = allPotentialMatches[0].StartIndex;
for (int i = 0; i < allPotentialMatches.Count; i++)
{
var pm = allPotentialMatches[i];
try
{
// **数据验证:不再抛出,而是记录错误**
if (pm.StartIndex < currentContentStart || pm.EndIndex > fullExamText.Length || pm.StartIndex > pm.EndIndex)
{
examPaper.Errors.Add(new ParseError(ParseErrorType.Validation,
$"PotentialMatch at index {i} has invalid start/end indices. Start: {pm.StartIndex}, End: {pm.EndIndex}, CurrentContentStart: {currentContentStart}, FullTextLength: {fullExamText.Length}",
index: i, matchedText: pm.MatchedText));
currentContentStart = Math.Max(currentContentStart, pm.EndIndex); // 尝试跳过这个损坏的匹配项
continue; // 跳过当前循环迭代,处理下一个匹配项
}
if (pm.RegexMatch == null || pm.PatternConfig == null)
{
examPaper.Errors.Add(new ParseError(ParseErrorType.Validation,
$"PotentialMatch at index {i} is missing RegexMatch or PatternConfig.",
index: i, matchedText: pm.MatchedText));
currentContentStart = Math.Max(currentContentStart, pm.EndIndex); // 尝试跳过这个损坏的匹配项
continue; // 跳过当前循环迭代,处理下一个匹配项
}
string precedingText = fullExamText.Substring(currentContentStart, pm.StartIndex - currentContentStart).Trim();
if (!string.IsNullOrWhiteSpace(precedingText))
{
if (currentQuestion != null)
{
// 将 examPaper.Errors 传递给 ProcessQuestionContent 收集错误
ProcessQuestionContent(currentQuestion, precedingText,
GetSubMatchesForRange(allPotentialMatches, currentContentStart, pm.StartIndex, examPaper.Errors),
examPaper.Errors);
}
else if (currentMajorQG != null)
{
currentMajorQG.Descript += (string.IsNullOrWhiteSpace(currentMajorQG.Descript) ? "" : "\n") + precedingText;
}
else
{
examPaper.Description += (string.IsNullOrWhiteSpace(examPaper.Description) ? "" : "\n") + precedingText;
}
}
if (pm.Type == MatchType.MajorQuestionGroup)
{
// 对 MajorQuestionGroup 的处理
try
{
while (majorQGStack.Any() && pm.PatternConfig.Priority <= majorQGStack.Peek().Priority)
{
majorQGStack.Pop();
}
// RegexMatch Groups 验证:不再抛出,记录错误
if (pm.RegexMatch.Groups.Count < 2 || string.IsNullOrWhiteSpace(pm.RegexMatch.Groups[1].Value))
{
examPaper.Errors.Add(new ParseError(ParseErrorType.RegexMatchIssue,
$"MajorQuestionGroup match at index {i} does not have enough regex groups or a valid title group (Group 1). Skipping this group.",
index: i, matchedText: pm.MatchedText));
currentContentStart = pm.EndIndex; // 继续,尝试跳过此项
continue;
}
float score = 0;
// 使用 float.TryParse 避免异常
if (pm.RegexMatch.Groups.Count > 3 && pm.RegexMatch.Groups[4].Success) // 假设纯数字分数是 Group 4
{
if (!float.TryParse(pm.RegexMatch.Groups[4].Value, out score))
{
examPaper.Errors.Add(new ParseError(ParseErrorType.DataParsing,
$"Failed to parse score '{pm.RegexMatch.Groups[4].Value}' for MajorQuestionGroup at index {i}. Defaulting to 0.",
index: i, matchedText: pm.MatchedText));
}
}
MajorQuestionGroup newMajorQG = new MajorQuestionGroup
{
Title = pm.RegexMatch.Groups[2].Value.Trim(), // 标题是 Group 2
Score = score,
Priority = pm.PatternConfig.Priority,
};
if (majorQGStack.Any())
{
majorQGStack.Peek().SubQuestionGroups.Add(newMajorQG);
}
else
{
examPaper.QuestionGroups.Add(newMajorQG);
}
currentContentStart = pm.EndIndex;
majorQGStack.Push(newMajorQG);
currentMajorQG = newMajorQG;
questionStack.Clear();
currentQuestion = null;
}
catch (Exception ex)
{
examPaper.Errors.Add(new ParseError(ParseErrorType.UnexpectedError,
$"An unexpected error occurred during processing MajorQuestionGroup at index {i}.",
index: i, matchedText: pm.MatchedText, innerException: ex));
currentContentStart = pm.EndIndex; // 尝试跳过此项
continue;
}
}
else if (pm.Type == MatchType.Question)
{
// 对 Question 的处理
try
{
while (questionStack.Any() && pm.PatternConfig.Priority <= questionStack.Peek().Priority)
{
questionStack.Pop();
}
// RegexMatch Groups 验证
if (pm.RegexMatch.Groups.Count < 3 || string.IsNullOrWhiteSpace(pm.RegexMatch.Groups[1].Value) || string.IsNullOrWhiteSpace(pm.RegexMatch.Groups[2].Value))
{
examPaper.Errors.Add(new ParseError(ParseErrorType.RegexMatchIssue,
$"Question match at index {i} does not have enough regex groups or valid number/text groups (Group 1/2). Skipping this question.",
index: i, matchedText: pm.MatchedText));
currentContentStart = pm.EndIndex; // 尝试跳过此项
continue;
}
float score = 0;
// 使用 float.TryParse 避免异常
if (pm.RegexMatch.Groups.Count > 4 && pm.RegexMatch.Groups[4].Success) // 假设纯数字分数是 Group 4
{
if (!float.TryParse(pm.RegexMatch.Groups[4].Value, out score))
{
examPaper.Errors.Add(new ParseError(ParseErrorType.DataParsing,
$"Failed to parse score '{pm.RegexMatch.Groups[4].Value}' for Question at index {i}. Defaulting to 0.",
index: i, matchedText: pm.MatchedText));
}
}
Question newQuestion = new Question
{
Number = pm.RegexMatch.Groups[1].Value.Trim(),
Stem = pm.RegexMatch.Groups[2].Value.Trim(),
Priority = pm.PatternConfig.Priority,
Score = score // 赋值解析到的分数
};
if (questionStack.Any())
{
questionStack.Peek().SubQuestions.Add(newQuestion);
}
else if (currentMajorQG != null)
{
currentMajorQG.SubQuestions.Add(newQuestion);
}
else
{
examPaper.TopLevelQuestions.Add(newQuestion);
}
currentContentStart = pm.EndIndex;
questionStack.Push(newQuestion);
currentQuestion = newQuestion;
}
catch (Exception ex)
{
examPaper.Errors.Add(new ParseError(ParseErrorType.UnexpectedError,
$"An unexpected error occurred during processing Question at index {i}.",
index: i, matchedText: pm.MatchedText, innerException: ex));
currentContentStart = pm.EndIndex; // 尝试跳过此项
continue;
}
}
else if (pm.Type == MatchType.Option)
{
// 对 Option 的处理
try
{
if (currentQuestion != null)
{
// RegexMatch Groups 验证
if (pm.RegexMatch.Groups.Count < 3 || string.IsNullOrWhiteSpace(pm.RegexMatch.Groups[1].Value) || string.IsNullOrWhiteSpace(pm.RegexMatch.Groups[2].Value))
{
examPaper.Errors.Add(new ParseError(ParseErrorType.RegexMatchIssue,
$"Option match at index {i} does not have enough regex groups or valid label/text groups (Group 1/2). Skipping this option.",
index: i, matchedText: pm.MatchedText));
currentContentStart = pm.EndIndex; // 尝试跳过此项
continue;
}
Option newOption = new Option
{
Label = pm.RegexMatch.Groups[1].Value.Trim(),
Text = pm.RegexMatch.Groups[2].Value.Trim()
};
currentQuestion.Options.Add(newOption);
}
else
{
// 结构性问题:找到孤立的选项,记录错误但继续
examPaper.Errors.Add(new ParseError(ParseErrorType.Structural,
$"Found isolated Option at index {i}. Options must belong to a question. Ignoring this option.",
index: i, matchedText: pm.MatchedText));
}
}
catch (Exception ex)
{
examPaper.Errors.Add(new ParseError(ParseErrorType.UnexpectedError,
$"An unexpected error occurred during processing Option at index {i}.",
index: i, matchedText: pm.MatchedText, innerException: ex));
// 这里不需要 `continue`,因为即使出错也可能只是该选项的问题,不影响后续处理
}
}
currentContentStart = pm.EndIndex; // 更新当前内容起点
}
catch (Exception ex)
{
// 捕获任何在处理单个 PotentialMatch 过程中未被更具体 catch 块捕获的意外错误
examPaper.Errors.Add(new ParseError(ParseErrorType.UnexpectedError,
$"An unexpected error occurred during main loop processing of PotentialMatch at index {i}.",
index: i, matchedText: pm.MatchedText, innerException: ex));
currentContentStart = Math.Max(currentContentStart, pm.EndIndex); // 尝试跳过当前匹配项,继续下一项
// 这里不 `continue` 是因为外层循环会推进 `i`,但确保 `currentContentStart` 更新以避免无限循环
}
}
// --- 处理所有匹配项之后的剩余内容 ---
if (currentContentStart < fullExamText.Length)
{
try
{
string remainingText = fullExamText.Substring(currentContentStart).Trim();
if (!string.IsNullOrWhiteSpace(remainingText))
{
if (currentQuestion != null)
{
ProcessQuestionContent(currentQuestion, remainingText,
GetSubMatchesForRange(allPotentialMatches, currentContentStart, fullExamText.Length, examPaper.Errors),
examPaper.Errors);
}
else if (currentMajorQG != null)
{
currentMajorQG.Descript += (string.IsNullOrWhiteSpace(currentMajorQG.Descript) ? "" : "\n") + remainingText;
}
else
{
examPaper.Description += (string.IsNullOrWhiteSpace(examPaper.Description) ? "" : "\n") + remainingText;
}
}
}
catch (Exception ex)
{
examPaper.Errors.Add(new ParseError(ParseErrorType.UnexpectedError,
"An unexpected error occurred while processing remaining text after all potential matches.",
innerException: ex));
}
}
return examPaper;
}
/// <summary>
/// Extracts the exam title (simple implementation).
/// Logs errors to the provided error list instead of throwing.
/// </summary>
private string GetExamTitle(string examPaperText)
{
// 内部不再直接抛出异常,而是让外部的 try-catch 负责
var firstLine = examPaperText.Split(new[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries)
.FirstOrDefault(line => !string.IsNullOrWhiteSpace(line));
return firstLine ?? "未识别试卷标题";
}
/// <summary>
/// Gets a subset of the given PotentialMatch list within a specified range.
/// Logs errors to the provided error list instead of throwing.
/// </summary>
private List<PotentialMatch> GetSubMatchesForRange(List<PotentialMatch> allMatches, int start, int end, List<ParseError> errors)
{
// 输入验证,如果输入错误,记录错误并返回空列表
if (start < 0 || end < start)
{
errors.Add(new ParseError(ParseErrorType.Validation,
$"Invalid range provided to GetSubMatchesForRange. Start: {start}, End: {end}.",
index: start)); // 使用 start 作为大概索引
return new List<PotentialMatch>();
}
// allMatches 为 null 的情况已经在 BuildExamPaper 顶部处理,这里为了方法的健壮性可以再加一次检查
if (allMatches == null)
{
return new List<PotentialMatch>();
}
try
{
return allMatches.Where(pm => pm.StartIndex >= start && pm.StartIndex < end).ToList();
}
catch (Exception ex)
{
errors.Add(new ParseError(ParseErrorType.UnexpectedError,
$"An unexpected error occurred getting sub-matches for range [{start}, {end}).",
innerException: ex));
return new List<PotentialMatch>(); // 出错时返回空列表
}
}
/// <summary>
/// Processes the content of a Question, mainly for parsing Options and identifying unstructured text.
/// Logs errors to the provided error list instead of throwing.
/// </summary>
private void ProcessQuestionContent(Question question, string contentText, List<PotentialMatch> potentialMatchesInScope, List<ParseError> errors)
{
// 参数验证,这些是内部方法的契约,如果违反则直接抛出,因为这意味着调用者有错
if (question == null) throw new ArgumentNullException(nameof(question), "Question cannot be null in ProcessQuestionContent.");
if (contentText == null) throw new ArgumentNullException(nameof(contentText), "Content text cannot be null in ProcessQuestionContent.");
if (potentialMatchesInScope == null) throw new ArgumentNullException(nameof(potentialMatchesInScope), "Potential matches in scope cannot be null.");
try
{
int lastOptionEndIndex = 0;
foreach (var pm in potentialMatchesInScope.OrderBy(p => p.StartIndex))
{
// 对每个匹配项的内部处理,记录错误但继续
try
{
if (pm.Type == MatchType.Option)
{
// 验证索引,记录错误但继续
if (pm.StartIndex < lastOptionEndIndex || pm.StartIndex > contentText.Length || pm.EndIndex > contentText.Length)
{
errors.Add(new ParseError(ParseErrorType.Validation,
$"Option match at index {pm.StartIndex} has invalid indices within content text. MatchedText: '{pm.MatchedText}'. Skipping.",
index: pm.StartIndex, matchedText: pm.MatchedText));
continue; // 跳过当前选项
}
// 处理选项前的文本
if (pm.StartIndex > lastOptionEndIndex)
{
string textBeforeOption = contentText.Substring(lastOptionEndIndex, pm.StartIndex - lastOptionEndIndex).Trim();
if (!string.IsNullOrWhiteSpace(textBeforeOption))
{
question.Stem += (string.IsNullOrWhiteSpace(question.Stem) ? "" : "\n") + textBeforeOption;
}
}
// RegexMatch Groups 验证,记录错误但继续
if (pm.RegexMatch.Groups.Count < 3 || string.IsNullOrWhiteSpace(pm.RegexMatch.Groups[1].Value) || string.IsNullOrWhiteSpace(pm.RegexMatch.Groups[2].Value))
{
errors.Add(new ParseError(ParseErrorType.RegexMatchIssue,
$"Option regex match '{pm.MatchedText}' does not have enough groups (expected 3) for label and text. Skipping option.",
index: pm.StartIndex, matchedText: pm.MatchedText));
lastOptionEndIndex = pm.EndIndex; // 更新索引,避免卡死
continue; // 跳过当前选项
}
var newOption = new Option
{
Label = pm.RegexMatch.Groups[1].Value.Trim(),
Text = pm.RegexMatch.Groups[2].Value.Trim()
};
question.Options.Add(newOption);
lastOptionEndIndex = pm.EndIndex;
}
// TODO: If there are SubQuestion types, they can be processed similarly here.
// 你可以在此处添加对子问题的处理逻辑,同样需要小心处理其内容和嵌套。
}
catch (Exception innerEx)
{
errors.Add(new ParseError(ParseErrorType.UnexpectedError,
$"An unexpected error occurred during processing a potential match ({pm.Type}) within question content.",
index: pm.StartIndex, matchedText: pm.MatchedText, innerException: innerEx));
lastOptionEndIndex = pm.EndIndex; // 尝试更新索引,避免无限循环
continue; // 尝试继续下一个匹配项
}
}
// 处理所有选项之后的剩余文本
if (lastOptionEndIndex < contentText.Length)
{
string remainingContent = contentText.Substring(lastOptionEndIndex).Trim();
if (!string.IsNullOrWhiteSpace(remainingContent))
{
question.Stem += (string.IsNullOrWhiteSpace(question.Stem) ? "" : "\n") + remainingContent;
}
}
}
catch (Exception ex)
{
// 捕获 ProcessQuestionContent 整个方法内部的意外错误
errors.Add(new ParseError(ParseErrorType.UnexpectedError,
$"An unexpected error occurred while processing content for Question '{question.Number}'.",
innerException: ex));
}
}
}
public class ExamParser
{
private readonly ExamParserConfig _config;
private readonly ExamDocumentScanner _scanner;
private readonly ExamStructureBuilder _builder;
public ExamParser(ExamParserConfig config)
{
_config = config ?? throw new ArgumentNullException(nameof(config));
_scanner = new ExamDocumentScanner(_config);
_builder = new ExamStructureBuilder(_config);
}
/// <summary>
/// 解析给定的试卷文本,返回结构化的 ExamPaper 对象。
/// </summary>
/// <param name="examPaperText">完整的试卷文本</param>
/// <returns>解析后的 ExamPaper 对象</returns>
public ExamPaper ParseExamPaper(string examPaperText)
{
// 1. 扫描:一次性扫描整个文本,收集所有潜在的匹配项
// Scan 方法现在已经优化为不抛出 ArgumentNullException
List<PotentialMatch> allPotentialMatches = _scanner.Scan(examPaperText);
// 2. 构建:根据扫描结果和原始文本,线性遍历并构建层级结构
// BuildExamPaper 现在会返回一个包含错误列表的 ExamPaper 对象
// 外部不再需要捕获内部解析异常,只需检查 ExamPaper.Errors 列表
return _builder.BuildExamPaper(examPaperText, allPotentialMatches);
}
}
}

View File

@@ -2,6 +2,9 @@
using TechHelper.Client.AI;
using TechHelper.Services;
using Entities.DTO;
using System.Net.Http.Json;
using Newtonsoft.Json;
using TechHelper.Client.Pages.Exam;
namespace TechHelper.Client.Exam
@@ -9,10 +12,13 @@ namespace TechHelper.Client.Exam
public class ExamService : IExamService
{
private IAIService aIService;
private IHttpClientFactory httpClientFactory;
public ExamService(IAIService aIService)
public ExamService(IAIService aIService,
IHttpClientFactory httpClientFactory)
{
this.aIService = aIService;
this.httpClientFactory = httpClientFactory;
}
public ApiResponse ConvertToXML<T>(string xmlContent)
@@ -86,7 +92,7 @@ namespace TechHelper.Client.Exam
{
Status = false,
Result = null,
Message = $"处理试题分割时发生内部错误: {ex.Message}"
Message = $"处理试题分割时发生内部错误: {ex.Message}"
};
}
}
@@ -127,6 +133,31 @@ namespace TechHelper.Client.Exam
}
}
public async Task<ApiResponse> GetAllExam(string user)
{
using (var client = httpClientFactory.CreateClient("Default"))
{
var response = await client.GetAsync($"exam/getAllPreview?user={user}");
if (response.IsSuccessStatusCode)
{
var content = await response.Content.ReadAsStringAsync();
var result = JsonConvert.DeserializeObject<List<ExamDto>>(content);
return ApiResponse.Success(result: result);
}
else
{
return ApiResponse.Error(await response.Content.ReadAsStringAsync());
}
}
}
public async Task<ApiResponse> GetExam(Guid guid)
{
return ApiResponse.Success("HELLO");
}
public async Task<ApiResponse> ParseSingleQuestionGroup(string examContent)
{
try
@@ -163,9 +194,19 @@ namespace TechHelper.Client.Exam
}
}
public Task<ApiResponse> SaveParsedExam(ExamDto examDto)
public async Task<ApiResponse> SaveParsedExam(ExamDto examDto)
{
throw new NotImplementedException();
using (var client = httpClientFactory.CreateClient("Default"))
{
var respont = await client.PostAsJsonAsync("exam/add",
examDto);
if (respont.StatusCode == System.Net.HttpStatusCode.OK)
{
return new ApiResponse(true, "ok");
}
return new ApiResponse("false");
}
}
}
}

View File

@@ -10,5 +10,8 @@ namespace TechHelper.Client.Exam
public Task<ApiResponse> SaveParsedExam(ExamDto examDto);
public Task<ApiResponse> ParseSingleQuestionGroup(string examContent);
public ApiResponse ConvertToXML<T>(string xmlContent);
public Task<ApiResponse> GetAllExam(string user);
public Task<ApiResponse> GetExam(Guid guid);
}
}