TechHelper/TechHelper.Client/Exam/ExamParse.cs

using Entities.DTO;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.RegularExpressions;

namespace TechHelper.Client.Exam
{

	public class ParseError
	{
		public ParseErrorType Type { get; }
		public string Message { get; }
		public int? Index { get; }
		public string MatchedText { get; }
		public Exception InnerException { get; }

		public ParseError(ParseErrorType type, string message, int? index = null, string matchedText = null, Exception innerException = null)
		{
			Type = type;
			Message = message;
			Index = index;
			MatchedText = matchedText;
			InnerException = innerException;
		}

		public override string ToString()
		{
			var sb = new System.Text.StringBuilder();
			sb.Append($"[{Type}] {Message}");
			if (Index.HasValue) sb.Append($" (Index: {Index.Value})");
			if (!string.IsNullOrEmpty(MatchedText)) sb.Append($" (MatchedText: '{MatchedText}')");
			if (InnerException != null) sb.Append($" InnerException: {InnerException.Message}");
			return sb.ToString();
		}
	}

	public enum ParseErrorType
	{
		Validation = 1,          // 输入验证失败
		DataParsing = 2,         // 数据解析失败（如数字转换）
		Structural = 3,          // 结构性问题（如选项没有对应的问题）
		RegexMatchIssue = 4,     // 正则表达式匹配结果不符合预期
		UnexpectedError = 5      // 未预料到的通用错误
	}


	public class ExamPaper
	{
		public string AssignmentTitle { get; set; } = "未识别试卷标题";
		public string Description { get; set; } = "未识别试卷描述";
		public string SubjectArea { get; set; } = "试卷类别";
		public List<MajorQuestionGroup> QuestionGroups { get; set; } = new List<MajorQuestionGroup>();
		public List<PaperQuestion> TopLevelQuestions { get; set; } = new List<PaperQuestion>();
		public List<ParseError> Errors { get; set; } = new List<ParseError>();
	}

	public class MajorQuestionGroup
	{
		public string Title { get; set; } = string.Empty;
		public string Descript { get; set; } = string.Empty;
		public float Score { get; set; }
		public List<MajorQuestionGroup> SubQuestionGroups { get; set; } = new List<MajorQuestionGroup>();
		public List<PaperQuestion> SubQuestions { get; set; } = new List<PaperQuestion>();
		public int Priority { get; set; }
		public bool bGroup { get; set; } = true;
	}

	public class PaperQuestion
	{
		public string Number { get; set; } = string.Empty;
		public string Stem { get; set; } = string.Empty;
		public float Score { get; set; }
		public List<Option> Options { get; set; } = new List<Option>();
		public List<PaperQuestion> SubQuestions { get; set; } = new List<PaperQuestion>();
		public string SampleAnswer { get; set; } = string.Empty;
		public string QuestionType { get; set; } = string.Empty;
		public int Priority { get; set; }
	}

	public class Option
	{
		public string Label { get; set; } = string.Empty;
		public string Text { get; set; } = string.Empty;
	}


	/// <summary>
	/// 表示一个带有优先级的正则表达式配置
	/// </summary>
	public class RegexPatternConfig
	{
		public string Pattern { get; set; } // 正则表达式字符串
		public int Priority { get; set; }    // 优先级，数字越小优先级越高
		public Regex Regex { get; private set; } // 编译后的Regex对象，用于性能优化

		public RegexPatternConfig(string pattern, int priority)
		{
			Pattern = pattern;
			Priority = priority;
			Regex = new Regex(pattern, RegexOptions.Multiline | RegexOptions.Compiled); // 多行模式，编译以提高性能
		}
	}

	public enum ExamParserEnum
	{
		MajorQuestionGroupPatterns = 0,
		QuestionPatterns,
		OptionPatterns
	}

	/// <summary>
	/// 试卷解析的配置类，包含所有正则表达式
	/// </summary>
	public class ExamParserConfig
	{
		public List<RegexPatternConfig> MajorQuestionGroupPatterns { get; set; } = new List<RegexPatternConfig>();
		public List<RegexPatternConfig> QuestionPatterns { get; set; } = new List<RegexPatternConfig>();
		public List<RegexPatternConfig> OptionPatterns { get; set; } = new List<RegexPatternConfig>();

		public ExamParserConfig()
		{
			MajorQuestionGroupPatterns.Add(new RegexPatternConfig(@"^([一二三四五六七八九十]+)[、\.]\s*(.+?)(?:\s*\(((\d+(?:\.\d+)?))\s*分\))?\s*$", 1));
			QuestionPatterns.Add(new RegexPatternConfig(@"^\(([一二三四五六七八九十]{1,2}|十[一二三四五六七八九])\)\s*(.+?)(?:\s*\(((\d+(?:\.\d+)?))\s*分\))?\s*$", 1));


			// 模式 1: "1. 这是一个题目 (5分)" 或 "1. 这是一个题目"
			QuestionPatterns.Add(new RegexPatternConfig(@"^(\d+)\.\s*(.+?)(?:\s*\(((\d+(?:\.\d+)?))\s*分\))?\s*$", 2));

			// 模式 2: "(1) 这是一个子题目 (3分)" 或 "(1) 这是一个子题目"
			QuestionPatterns.Add(new RegexPatternConfig(@"^\((\d+)\)\s*(.+?)(?:\s*\(((\d+(?:\.\d+)?))\s*分\))?\s*$", 3));

			// 模式 3: "① 这是一个更深层次的子题目 (2分)" 或 "① 这是一个更深层次的子题目"
			QuestionPatterns.Add(new RegexPatternConfig(@"^[①②③④⑤⑥⑦⑧⑨⑩]+\s*(.+?)(?:\s*\(((\d+(?:\.\d+)?))\s*分\))?\s*$", 4));


			OptionPatterns.Add(new RegexPatternConfig(@"([A-Z]\.)\s*(.*?)(?=[A-Z]\.|$)", 1)); // 大写字母选项
			OptionPatterns.Add(new RegexPatternConfig(@"([a-z]\.)\s*(.*?)(?=[a-z]\.|$)", 1)); // 小写字母选项

		}
	}


	public class PotentialMatch
	{
		public int StartIndex { get; set; }
		public int EndIndex { get; set; } // 匹配到的结构在原始文本中的结束位置
		public string MatchedText { get; set; } // 匹配到的完整行或段落
		public Match RegexMatch { get; set; } // 原始的Regex.Match对象，方便获取捕获组
		public RegexPatternConfig PatternConfig { get; set; } // 匹配到的模式配置
		public MatchType Type { get; set; } // 枚举：MajorQuestionGroup, Question, Option, etc.
	}

	public enum MatchType
	{
		MajorQuestionGroup,
		Question,
		Option,
		Other // 如果有其他需要识别的类型
	}


	/// <summary>
	/// 负责扫描原始文本，收集所有潜在的匹配项（题组、题目、选项）。
	/// 它只进行匹配，不进行结构化归属。
	/// </summary>
	public class ExamDocumentScanner
	{
		private readonly ExamParserConfig _config;

		public ExamDocumentScanner(ExamParserConfig config)
		{
			_config = config ?? throw new ArgumentNullException(nameof(config)); // 确保配置不为空
		}

		/// <summary>
		/// 扫描给定的文本，返回所有潜在的匹配项，并按起始位置排序。
		/// </summary>
		/// <param name="text">要扫描的文本</param>
		/// <returns>所有匹配到的 PotentialMatch 列表</returns>
		public List<PotentialMatch> Scan(string text)
		{
			if (string.IsNullOrEmpty(text))
			{
				return new List<PotentialMatch>(); // 对于空文本，直接返回空列表
			}

			var allPotentialMatches = new List<PotentialMatch>();

			// 扫描所有题组模式
			foreach (var patternConfig in _config.MajorQuestionGroupPatterns)
			{
				foreach (Match match in patternConfig.Regex.Matches(text))
				{
					allPotentialMatches.Add(new PotentialMatch
					{
						StartIndex = match.Index,
						EndIndex = match.Index + match.Length,
						MatchedText = match.Value,
						RegexMatch = match,
						PatternConfig = patternConfig,
						Type = MatchType.MajorQuestionGroup
					});
				}
			}

			// 扫描所有题目模式
			foreach (var patternConfig in _config.QuestionPatterns)
			{
				foreach (Match match in patternConfig.Regex.Matches(text))
				{
					allPotentialMatches.Add(new PotentialMatch
					{
						StartIndex = match.Index,
						EndIndex = match.Index + match.Length,
						MatchedText = match.Value,
						RegexMatch = match,
						PatternConfig = patternConfig,
						Type = MatchType.Question
					});
				}
			}

			// 扫描所有选项模式
			foreach (var patternConfig in _config.OptionPatterns)
			{
				foreach (Match match in patternConfig.Regex.Matches(text))
				{
					allPotentialMatches.Add(new PotentialMatch
					{
						StartIndex = match.Index,
						EndIndex = match.Index + match.Length,
						MatchedText = match.Value,
						RegexMatch = match,
						PatternConfig = patternConfig,
						Type = MatchType.Option
					});
				}
			}

			// 统一按起始位置排序
			return allPotentialMatches.OrderBy(pm => pm.StartIndex).ToList();
		}
	}

	public class ExamStructureBuilder
	{
		private readonly ExamParserConfig _config;

		public ExamStructureBuilder(ExamParserConfig config)
		{
			_config = config ?? throw new ArgumentNullException(nameof(config), "ExamParserConfig cannot be null.");
		}


		///
		/// 一.基础
		///		1.听写
		///		2.阅读
		///	二.提升
		///		1.阅读
		///			(1).选择
		///			(2).填空
		/// 三.写
		///		(一)课文
		///
		///
		///


		/// <summary>
		/// Builds the ExamPaper structure from raw text and potential matches.
		/// Collects and returns parsing errors encountered during the process.
		/// </summary>
		/// <param name="fullExamText">The complete text of the exam paper.</param>
		/// <param name="allPotentialMatches">A list of all identified potential matches.</param>
		/// <returns>An ExamPaper object containing the parsed structure and a list of errors.</returns>
		/// <exception cref="ArgumentException">Thrown if fullExamText is null or empty.</exception>
		/// <exception cref="ArgumentNullException">Thrown if allPotentialMatches is null.</exception>
		public ExamPaper BuildExam(string fullExamText, List<PotentialMatch> allPotentialMatches)
		{
			// 核心输入验证仍然是必要的，因为这些错误是无法恢复的
			if (string.IsNullOrWhiteSpace(fullExamText))
			{
				throw new ArgumentException("Full exam text cannot be null or empty.", nameof(fullExamText));
			}
			if (allPotentialMatches == null)
			{
				throw new ArgumentNullException(nameof(allPotentialMatches), "Potential matches list cannot be null.");
			}

			var examPaper = new ExamPaper(); // ExamPaper 现在包含一个 Errors 列表

			// 尝试获取试卷标题
			try
			{
				examPaper.AssignmentTitle = GetExamTitle(fullExamText);
			}
			catch (Exception ex)
			{
				// 如果获取标题失败，记录错误而不是抛出致命异常
				examPaper.Errors.Add(new ParseError(ParseErrorType.UnexpectedError, "Failed to extract exam title.", innerException: ex));
				examPaper.AssignmentTitle = "未识别试卷标题"; // 提供默认值
			}

			var majorQGStack = new Stack<MajorQuestionGroup>();
			MajorQuestionGroup currentMajorQG = null;

			var questionStack = new Stack<PaperQuestion>();
			PaperQuestion currentQuestion = null;

			int currentContentStart = 0;

			// 处理试卷开头的描述性文本
			if (allPotentialMatches.Any() && allPotentialMatches[0].StartIndex > 0)
			{
				string introText = fullExamText.Substring(0, allPotentialMatches[0].StartIndex).Trim();
				if (!string.IsNullOrWhiteSpace(introText))
				{
					examPaper.Description += (string.IsNullOrWhiteSpace(examPaper.Description) ? "" : "\n") + introText;
				}
			}

			currentContentStart = allPotentialMatches[0].StartIndex;

			for (int i = 0; i < allPotentialMatches.Count; i++)
			{
				var pm = allPotentialMatches[i];


				try
				{
					// **数据验证：不再抛出，而是记录错误**
					if (pm.StartIndex < currentContentStart || pm.EndIndex > fullExamText.Length || pm.StartIndex > pm.EndIndex)
					{
						examPaper.Errors.Add(new ParseError(ParseErrorType.Validation,
							$"PotentialMatch at index {i} has invalid start/end indices. Start: {pm.StartIndex}, End: {pm.EndIndex}, CurrentContentStart: {currentContentStart}, FullTextLength: {fullExamText.Length}",
							index: i, matchedText: pm.MatchedText));
						currentContentStart = Math.Max(currentContentStart, pm.EndIndex); // 尝试跳过这个损坏的匹配项
						continue; // 跳过当前循环迭代，处理下一个匹配项
					}
					if (pm.RegexMatch == null || pm.PatternConfig == null)
					{
						examPaper.Errors.Add(new ParseError(ParseErrorType.Validation,
							$"PotentialMatch at index {i} is missing RegexMatch or PatternConfig.",
							index: i, matchedText: pm.MatchedText));
						currentContentStart = Math.Max(currentContentStart, pm.EndIndex); // 尝试跳过这个损坏的匹配项
						continue; // 跳过当前循环迭代，处理下一个匹配项
					}

					string precedingText = fullExamText.Substring(currentContentStart, pm.StartIndex - currentContentStart).Trim();
					if (!string.IsNullOrWhiteSpace(precedingText))
					{
						if (currentQuestion != null)
						{
							// 将 examPaper.Errors 传递给 ProcessQuestionContent 收集错误
							ProcessQuestionContent(currentQuestion, precedingText,
								GetSubMatchesForRange(allPotentialMatches, currentContentStart, pm.StartIndex, examPaper.Errors),
								examPaper.Errors);
						}
						else if (currentMajorQG != null)
						{
							currentMajorQG.Descript += (string.IsNullOrWhiteSpace(currentMajorQG.Descript) ? "" : "\n") + precedingText;
						}
						else
						{
							examPaper.Description += (string.IsNullOrWhiteSpace(examPaper.Description) ? "" : "\n") + precedingText;
						}
					}

					if (pm.Type == MatchType.MajorQuestionGroup)
					{
						// 对 MajorQuestionGroup 的处理
						try
						{
							while (majorQGStack.Any() && pm.PatternConfig.Priority <= majorQGStack.Peek().Priority)
							{
								majorQGStack.Pop();
							}

							// RegexMatch Groups 验证：不再抛出，记录错误
							if (pm.RegexMatch.Groups.Count < 2 || string.IsNullOrWhiteSpace(pm.RegexMatch.Groups[1].Value))
							{
								examPaper.Errors.Add(new ParseError(ParseErrorType.RegexMatchIssue,
									$"MajorQuestionGroup match at index {i} does not have enough regex groups or a valid title group (Group 1). Skipping this group.",
									index: i, matchedText: pm.MatchedText));
								currentContentStart = pm.EndIndex; // 继续，尝试跳过此项
								continue;
							}

							float score = 0;
							// 使用 float.TryParse 避免异常
							if (pm.RegexMatch.Groups.Count > 3 && pm.RegexMatch.Groups[4].Success) // 假设纯数字分数是 Group 4
							{
								if (!float.TryParse(pm.RegexMatch.Groups[4].Value, out score))
								{
									examPaper.Errors.Add(new ParseError(ParseErrorType.DataParsing,
										$"Failed to parse score '{pm.RegexMatch.Groups[4].Value}' for MajorQuestionGroup at index {i}. Defaulting to 0.",
										index: i, matchedText: pm.MatchedText));
								}
							}

							MajorQuestionGroup newMajorQG = new MajorQuestionGroup
							{
								Title = pm.RegexMatch.Groups[2].Value.Trim(), // 标题是 Group 2
								Score = score,
								Priority = pm.PatternConfig.Priority,
								bGroup = true
							};

							if (majorQGStack.Any())
							{
								majorQGStack.Peek().SubQuestionGroups.Add(newMajorQG);
							}
							else
							{
								examPaper.QuestionGroups.Add(newMajorQG);
							}

							currentContentStart = pm.EndIndex;
							majorQGStack.Push(newMajorQG);
							currentMajorQG = newMajorQG;
							questionStack.Clear();
							currentQuestion = null;
						}
						catch (Exception ex)
						{
							examPaper.Errors.Add(new ParseError(ParseErrorType.UnexpectedError,
								$"An unexpected error occurred during processing MajorQuestionGroup at index {i}.",
								index: i, matchedText: pm.MatchedText, innerException: ex));
							currentContentStart = pm.EndIndex; // 尝试跳过此项
							continue;
						}
					}
					else if (pm.Type == MatchType.Question)
					{
						// 对 Question 的处理
						try
						{
							while (questionStack.Any() && pm.PatternConfig.Priority <= questionStack.Peek().Priority)
							{
								questionStack.Pop();
							}

							// RegexMatch Groups 验证
							if (pm.RegexMatch.Groups.Count < 3 || string.IsNullOrWhiteSpace(pm.RegexMatch.Groups[1].Value) || string.IsNullOrWhiteSpace(pm.RegexMatch.Groups[2].Value))
							{
								examPaper.Errors.Add(new ParseError(ParseErrorType.RegexMatchIssue,
									$"Question match at index {i} does not have enough regex groups or valid number/text groups (Group 1/2). Skipping this question.",
									index: i, matchedText: pm.MatchedText));
								currentContentStart = pm.EndIndex; // 尝试跳过此项
								continue;
							}

							float score = 0;
							// 使用 float.TryParse 避免异常
							if (pm.RegexMatch.Groups.Count > 4 && pm.RegexMatch.Groups[4].Success) // 假设纯数字分数是 Group 4
							{
								if (!float.TryParse(pm.RegexMatch.Groups[4].Value, out score))
								{
									examPaper.Errors.Add(new ParseError(ParseErrorType.DataParsing,
										$"Failed to parse score '{pm.RegexMatch.Groups[4].Value}' for Question at index {i}. Defaulting to 0.",
										index: i, matchedText: pm.MatchedText));
								}
							}

							PaperQuestion newQuestion = new PaperQuestion
							{
								Number = pm.RegexMatch.Groups[1].Value.Trim(),
								Stem = pm.RegexMatch.Groups[2].Value.Trim(),
								Priority = pm.PatternConfig.Priority,
								Score = score // 赋值解析到的分数
							};

							if (questionStack.Any())
							{
								questionStack.Peek().SubQuestions.Add(newQuestion);
							}
							else if (currentMajorQG != null)
							{
								currentMajorQG.SubQuestions.Add(newQuestion);
							}
							else
							{
								examPaper.TopLevelQuestions.Add(newQuestion);
							}

							currentContentStart = pm.EndIndex;
							questionStack.Push(newQuestion);
							currentQuestion = newQuestion;
						}
						catch (Exception ex)
						{
							examPaper.Errors.Add(new ParseError(ParseErrorType.UnexpectedError,
								$"An unexpected error occurred during processing Question at index {i}.",
								index: i, matchedText: pm.MatchedText, innerException: ex));
							currentContentStart = pm.EndIndex; // 尝试跳过此项
							continue;
						}
					}
					else if (pm.Type == MatchType.Option)
					{
						// 对 Option 的处理
						try
						{
							if (currentQuestion != null)
							{
								// RegexMatch Groups 验证
								if (pm.RegexMatch.Groups.Count < 3 || string.IsNullOrWhiteSpace(pm.RegexMatch.Groups[1].Value) || string.IsNullOrWhiteSpace(pm.RegexMatch.Groups[2].Value))
								{
									examPaper.Errors.Add(new ParseError(ParseErrorType.RegexMatchIssue,
										$"Option match at index {i} does not have enough regex groups or valid label/text groups (Group 1/2). Skipping this option.",
										index: i, matchedText: pm.MatchedText));
									currentContentStart = pm.EndIndex; // 尝试跳过此项
									continue;
								}

								Option newOption = new Option
								{
									Label = pm.RegexMatch.Groups[1].Value.Trim(),
									Text = pm.RegexMatch.Groups[2].Value.Trim()
								};
								currentQuestion.Options.Add(newOption);
							}
							else
							{
								// 结构性问题：找到孤立的选项，记录错误但继续
								examPaper.Errors.Add(new ParseError(ParseErrorType.Structural,
									$"Found isolated Option at index {i}. Options must belong to a question. Ignoring this option.",
									index: i, matchedText: pm.MatchedText));
							}
						}
						catch (Exception ex)
						{
							examPaper.Errors.Add(new ParseError(ParseErrorType.UnexpectedError,
								$"An unexpected error occurred during processing Option at index {i}.",
								index: i, matchedText: pm.MatchedText, innerException: ex));
							// 这里不需要 `continue`，因为即使出错也可能只是该选项的问题，不影响后续处理
						}
					}

					currentContentStart = pm.EndIndex; // 更新当前内容起点
				}
				catch (Exception ex)
				{
					// 捕获任何在处理单个 PotentialMatch 过程中未被更具体 catch 块捕获的意外错误
					examPaper.Errors.Add(new ParseError(ParseErrorType.UnexpectedError,
						$"An unexpected error occurred during main loop processing of PotentialMatch at index {i}.",
						index: i, matchedText: pm.MatchedText, innerException: ex));
					currentContentStart = Math.Max(currentContentStart, pm.EndIndex); // 尝试跳过当前匹配项，继续下一项
																					  // 这里不 `continue` 是因为外层循环会推进 `i`，但确保 `currentContentStart` 更新以避免无限循环
				}
			}

			// --- 处理所有匹配项之后的剩余内容 ---
			if (currentContentStart < fullExamText.Length)
			{
				try
				{
					string remainingText = fullExamText.Substring(currentContentStart).Trim();
					if (!string.IsNullOrWhiteSpace(remainingText))
					{
						if (currentQuestion != null)
						{
							ProcessQuestionContent(currentQuestion, remainingText,
								 GetSubMatchesForRange(allPotentialMatches, currentContentStart, fullExamText.Length, examPaper.Errors),
								 examPaper.Errors);
						}
						else if (currentMajorQG != null)
						{
							currentMajorQG.Descript += (string.IsNullOrWhiteSpace(currentMajorQG.Descript) ? "" : "\n") + remainingText;
						}
						else
						{
							examPaper.Description += (string.IsNullOrWhiteSpace(examPaper.Description) ? "" : "\n") + remainingText;
						}
					}
				}
				catch (Exception ex)
				{
					examPaper.Errors.Add(new ParseError(ParseErrorType.UnexpectedError,
						"An unexpected error occurred while processing remaining text after all potential matches.",
						innerException: ex));
				}
			}

			return examPaper;
		}

		/// <summary>
		/// Extracts the exam title (simple implementation).
		/// Logs errors to the provided error list instead of throwing.
		/// </summary>
		private string GetExamTitle(string examPaperText)
		{
			// 内部不再直接抛出异常，而是让外部的 try-catch 负责
			var firstLine = examPaperText.Split(new[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries)
										.FirstOrDefault(line => !string.IsNullOrWhiteSpace(line));
			return firstLine ?? "未识别试卷标题";
		}

		/// <summary>
		/// Gets a subset of the given PotentialMatch list within a specified range.
		/// Logs errors to the provided error list instead of throwing.
		/// </summary>
		private List<PotentialMatch> GetSubMatchesForRange(List<PotentialMatch> allMatches, int start, int end, List<ParseError> errors)
		{
			// 输入验证，如果输入错误，记录错误并返回空列表
			if (start < 0 || end < start)
			{
				errors.Add(new ParseError(ParseErrorType.Validation,
					$"Invalid range provided to GetSubMatchesForRange. Start: {start}, End: {end}.",
					index: start)); // 使用 start 作为大概索引
				return new List<PotentialMatch>();
			}
			// allMatches 为 null 的情况已经在 BuildExamPaper 顶部处理，这里为了方法的健壮性可以再加一次检查
			if (allMatches == null)
			{
				return new List<PotentialMatch>();
			}

			try
			{
				return allMatches.Where(pm => pm.StartIndex >= start && pm.StartIndex < end).ToList();
			}
			catch (Exception ex)
			{
				errors.Add(new ParseError(ParseErrorType.UnexpectedError,
					$"An unexpected error occurred getting sub-matches for range [{start}, {end}).",
					innerException: ex));
				return new List<PotentialMatch>(); // 出错时返回空列表
			}
		}

		/// <summary>
		/// Processes the content of a Question, mainly for parsing Options and identifying unstructured text.
		/// Logs errors to the provided error list instead of throwing.
		/// </summary>
		private void ProcessQuestionContent(PaperQuestion question, string contentText, List<PotentialMatch> potentialMatchesInScope, List<ParseError> errors)
		{
			// 参数验证，这些是内部方法的契约，如果违反则直接抛出，因为这意味着调用者有错
			if (question == null) throw new ArgumentNullException(nameof(question), "Question cannot be null in ProcessQuestionContent.");
			if (contentText == null) throw new ArgumentNullException(nameof(contentText), "Content text cannot be null in ProcessQuestionContent.");
			if (potentialMatchesInScope == null) throw new ArgumentNullException(nameof(potentialMatchesInScope), "Potential matches in scope cannot be null.");


			try
			{
				int lastOptionEndIndex = 0;

				foreach (var pm in potentialMatchesInScope.OrderBy(p => p.StartIndex))
				{
					// 对每个匹配项的内部处理，记录错误但继续
					try
					{
						if (pm.Type == MatchType.Option)
						{
							// 验证索引，记录错误但继续
							if (pm.StartIndex < lastOptionEndIndex || pm.StartIndex > contentText.Length || pm.EndIndex > contentText.Length)
							{
								errors.Add(new ParseError(ParseErrorType.Validation,
									$"Option match at index {pm.StartIndex} has invalid indices within content text. MatchedText: '{pm.MatchedText}'. Skipping.",
									index: pm.StartIndex, matchedText: pm.MatchedText));
								continue; // 跳过当前选项
							}

							// 处理选项前的文本
							if (pm.StartIndex > lastOptionEndIndex)
							{
								string textBeforeOption = contentText.Substring(lastOptionEndIndex, pm.StartIndex - lastOptionEndIndex).Trim();
								if (!string.IsNullOrWhiteSpace(textBeforeOption))
								{
									question.Stem += (string.IsNullOrWhiteSpace(question.Stem) ? "" : "\n") + textBeforeOption;
								}
							}

							// RegexMatch Groups 验证，记录错误但继续
							if (pm.RegexMatch.Groups.Count < 3 || string.IsNullOrWhiteSpace(pm.RegexMatch.Groups[1].Value) || string.IsNullOrWhiteSpace(pm.RegexMatch.Groups[2].Value))
							{
								errors.Add(new ParseError(ParseErrorType.RegexMatchIssue,
									$"Option regex match '{pm.MatchedText}' does not have enough groups (expected 3) for label and text. Skipping option.",
									index: pm.StartIndex, matchedText: pm.MatchedText));
								lastOptionEndIndex = pm.EndIndex; // 更新索引，避免卡死
								continue; // 跳过当前选项
							}

							var newOption = new Option
							{
								Label = pm.RegexMatch.Groups[1].Value.Trim(),
								Text = pm.RegexMatch.Groups[2].Value.Trim()
							};
							question.Options.Add(newOption);
							lastOptionEndIndex = pm.EndIndex;
						}
						else
						{
							question.Stem += contentText;
						}
					}
					catch (Exception innerEx)
					{
						errors.Add(new ParseError(ParseErrorType.UnexpectedError,
							$"An unexpected error occurred during processing a potential match ({pm.Type}) within question content.",
							index: pm.StartIndex, matchedText: pm.MatchedText, innerException: innerEx));
						lastOptionEndIndex = pm.EndIndex; // 尝试更新索引，避免无限循环
						continue; // 尝试继续下一个匹配项
					}
				}

				// 处理所有选项之后的剩余文本
				if (lastOptionEndIndex < contentText.Length)
				{
					string remainingContent = contentText.Substring(lastOptionEndIndex).Trim();
					if (!string.IsNullOrWhiteSpace(remainingContent))
					{
						question.Stem += (string.IsNullOrWhiteSpace(question.Stem) ? "" : "\n") + remainingContent;
					}
				}
			}
			catch (Exception ex)
			{
				// 捕获 ProcessQuestionContent 整个方法内部的意外错误
				errors.Add(new ParseError(ParseErrorType.UnexpectedError,
					$"An unexpected error occurred while processing content for Question '{question.Number}'.",
					innerException: ex));
			}
		}
	}

	public class ExamParser
	{
		private readonly ExamParserConfig _config;
		private readonly ExamDocumentScanner _scanner;
		private readonly ExamStructureBuilder _builder;

		public ExamParser(ExamParserConfig config)
		{
			_config = config ?? throw new ArgumentNullException(nameof(config));
			_scanner = new ExamDocumentScanner(_config);
			_builder = new ExamStructureBuilder(_config);
		}

		/// <summary>
		/// 解析给定的试卷文本，返回结构化的 ExamPaper 对象。
		/// </summary>
		/// <param name="examPaperText">完整的试卷文本</param>
		/// <returns>解析后的 ExamPaper 对象</returns>
		public ExamPaper ParseExamPaper(string examPaperText)
		{
			// 1. 扫描：一次性扫描整个文本，收集所有潜在的匹配项
			// Scan 方法现在已经优化为不抛出 ArgumentNullException
			List<PotentialMatch> allPotentialMatches = _scanner.Scan(examPaperText);

			// 2. 构建：根据扫描结果和原始文本，线性遍历并构建层级结构
			// BuildExamPaper 现在会返回一个包含错误列表的 ExamPaper 对象
			// 外部不再需要捕获内部解析异常，只需检查 ExamPaper.Errors 列表
			return _builder.BuildExam(examPaperText, allPotentialMatches);
		}
	}
}