update-examParseByText
This commit is contained in:
627
TechHelper.Client/Exam/Exam/ExamParse.cs
Normal file
627
TechHelper.Client/Exam/Exam/ExamParse.cs
Normal file
@@ -0,0 +1,627 @@
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace TechHelper.Client.Exam.Parse
|
||||
{
|
||||
public class ExamPaper
|
||||
{
|
||||
public string Title { get; set; } = "未识别试卷标题";
|
||||
public string Descript { get; set; } = "未识别试卷描述";
|
||||
public string SubjectArea { get; set; } = "试卷类别";
|
||||
public List<MajorQuestionGroup> MajorQuestionGroups { get; set; } = new List<MajorQuestionGroup>();
|
||||
public List<Question> TopLevelQuestions { get; set; } = new List<Question>();
|
||||
}
|
||||
|
||||
public class MajorQuestionGroup
|
||||
{
|
||||
public string Title { get; set; } = string.Empty;
|
||||
public string Descript { get; set; } = string.Empty;
|
||||
public float Score { get; set; }
|
||||
public List<MajorQuestionGroup> SubMajorQuestionGroups { get; set; } = new List<MajorQuestionGroup>();
|
||||
public List<Question> Questions { get; set; } = new List<Question>();
|
||||
public int Priority { get; set; }
|
||||
}
|
||||
|
||||
public class Question
|
||||
{
|
||||
public string Number { get; set; } = string.Empty;
|
||||
public string Text { get; set; } = string.Empty;
|
||||
public float Score { get; set; }
|
||||
public List<Option> Options { get; set; } = new List<Option>();
|
||||
public List<Question> SubQuestions { get; set; } = new List<Question>();
|
||||
public int Priority { get; set; }
|
||||
}
|
||||
|
||||
public class Option
|
||||
{
|
||||
public string Label { get; set; } = string.Empty;
|
||||
public string Text { get; set; } = string.Empty;
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// 表示一个带有优先级的正则表达式配置
|
||||
/// </summary>
|
||||
public class RegexPatternConfig
|
||||
{
|
||||
public string Pattern { get; set; } // 正则表达式字符串
|
||||
public int Priority { get; set; } // 优先级,数字越小优先级越高
|
||||
public Regex Regex { get; private set; } // 编译后的Regex对象,用于性能优化
|
||||
|
||||
public RegexPatternConfig(string pattern, int priority)
|
||||
{
|
||||
Pattern = pattern;
|
||||
Priority = priority;
|
||||
Regex = new Regex(pattern, RegexOptions.Multiline | RegexOptions.Compiled); // 多行模式,编译以提高性能
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 试卷解析的配置类,包含所有正则表达式
|
||||
/// </summary>
|
||||
public class ExamParserConfig
|
||||
{
|
||||
public List<RegexPatternConfig> MajorQuestionGroupPatterns { get; set; } = new List<RegexPatternConfig>();
|
||||
public List<RegexPatternConfig> QuestionPatterns { get; set; } = new List<RegexPatternConfig>();
|
||||
public List<RegexPatternConfig> OptionPatterns { get; set; } = new List<RegexPatternConfig>();
|
||||
|
||||
public ExamParserConfig()
|
||||
{
|
||||
// --- 大题组模式 (MajorQuestionGroupPatterns) ---
|
||||
// 匹配格式: "一、选择题 (20分)" 或 "二、阅读理解"
|
||||
// Group 1: 大题组编号 (e.g., "一", "二")
|
||||
// Group 2: 大题组标题 (e.g., "选择题", "阅读理解")
|
||||
// Group 3: 整个分数部分 (e.g., "(20分)") - 可选
|
||||
// Group 4: 纯数字分数 (e.g., "20") - 可选
|
||||
MajorQuestionGroupPatterns.Add(new RegexPatternConfig(@"^([一二三四五六七八九十]+)[、\.]\s*(.+?)(?:\s*\(((\d+(?:\.\d+)?))\s*分\))?\s*$", 1));
|
||||
|
||||
|
||||
// --- 题目模式 (QuestionPatterns) ---
|
||||
// 针对不同格式的题目编号,捕获题号、题干和可选的分数
|
||||
// Group 1: 题目编号 (e.g., "1", "(1)", "①")
|
||||
// Group 2: 题干内容 (不含分数)
|
||||
// Group 3: 整个分数部分 (e.g., "(5分)") - 可选
|
||||
// Group 4: 纯数字分数 (e.g., "5") - 可选
|
||||
|
||||
// 模式 1: "1. 这是一个题目 (5分)" 或 "1. 这是一个题目"
|
||||
QuestionPatterns.Add(new RegexPatternConfig(@"^(\d+)\.\s*(.+?)(?:\s*\(((\d+(?:\.\d+)?))\s*分\))?\s*$", 1));
|
||||
|
||||
// 模式 2: "(1) 这是一个子题目 (3分)" 或 "(1) 这是一个子题目"
|
||||
QuestionPatterns.Add(new RegexPatternConfig(@"^\((\d+)\)\s*(.+?)(?:\s*\(((\d+(?:\.\d+)?))\s*分\))?\s*$", 2));
|
||||
|
||||
// 模式 3: "① 这是一个更深层次的子题目 (2分)" 或 "① 这是一个更深层次的子题目"
|
||||
QuestionPatterns.Add(new RegexPatternConfig(@"^[①②③④⑤⑥⑦⑧⑨⑩]+\s*(.+?)(?:\s*\(((\d+(?:\.\d+)?))\s*分\))?\s*$", 3));
|
||||
|
||||
|
||||
// --- 选项模式 (OptionPatterns) ---
|
||||
// 匹配格式: "A. 选项内容"
|
||||
// Group 1: 选项标签 (e.g., "A.")
|
||||
// Group 2: 选项内容
|
||||
OptionPatterns.Add(new RegexPatternConfig(@"^([A-Z]\.)\s*(.*)$", 1)); // 大写字母选项
|
||||
OptionPatterns.Add(new RegexPatternConfig(@"^([a-z]\.)\s*(.*)$", 2)); // 小写字母选项
|
||||
|
||||
|
||||
// --- 忽略模式 (IgnoredPatterns) ---
|
||||
// 匹配空行或只包含空格的行,避免干扰解析流程
|
||||
//IgnoredPatterns.Add(new RegexPatternConfig(@"^\s*$", 1));
|
||||
//// 匹配试卷结尾的常见字符,防止被错误解析
|
||||
//IgnoredPatterns.Add(new RegexPatternConfig(@"^\s*(试卷到此结束)\s*$", 1));
|
||||
//IgnoredPatterns.Add(new RegexPatternConfig(@"^\s*(本卷共[0-9]+页)\s*$", 1));
|
||||
// 标题和描述虽然你没要,但在实际解析中,这些模式有助于区分内容块,
|
||||
// 否则它们可能会被其他模式(如题目模式)错误匹配。
|
||||
// 建议你保留所有模式,但在本回复中,我只给出你要求的部分。
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
public class PotentialMatch
|
||||
{
|
||||
public int StartIndex { get; set; }
|
||||
public int EndIndex { get; set; } // 匹配到的结构在原始文本中的结束位置
|
||||
public string MatchedText { get; set; } // 匹配到的完整行或段落
|
||||
public Match RegexMatch { get; set; } // 原始的Regex.Match对象,方便获取捕获组
|
||||
public RegexPatternConfig PatternConfig { get; set; } // 匹配到的模式配置
|
||||
public MatchType Type { get; set; } // 枚举:MajorQuestionGroup, Question, Option, etc.
|
||||
}
|
||||
|
||||
public enum MatchType
|
||||
{
|
||||
MajorQuestionGroup,
|
||||
Question,
|
||||
Option,
|
||||
Other // 如果有其他需要识别的类型
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// 负责扫描原始文本,收集所有潜在的匹配项(题组、题目、选项)。
|
||||
/// 它只进行匹配,不进行结构化归属。
|
||||
/// </summary>
|
||||
public class ExamDocumentScanner
|
||||
{
|
||||
private readonly ExamParserConfig _config;
|
||||
|
||||
public ExamDocumentScanner(ExamParserConfig config)
|
||||
{
|
||||
_config = config;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 扫描给定的文本,返回所有潜在的匹配项,并按起始位置排序。
|
||||
/// </summary>
|
||||
/// <param name="text">要扫描的文本</param>
|
||||
/// <returns>所有匹配到的 PotentialMatch 列表</returns>
|
||||
public List<PotentialMatch> Scan(string text)
|
||||
{
|
||||
var allPotentialMatches = new List<PotentialMatch>();
|
||||
|
||||
// 扫描所有题组模式
|
||||
foreach (var patternConfig in _config.MajorQuestionGroupPatterns)
|
||||
{
|
||||
foreach (Match match in patternConfig.Regex.Matches(text))
|
||||
{
|
||||
allPotentialMatches.Add(new PotentialMatch
|
||||
{
|
||||
StartIndex = match.Index,
|
||||
EndIndex = match.Index + match.Length,
|
||||
MatchedText = match.Value,
|
||||
RegexMatch = match,
|
||||
PatternConfig = patternConfig,
|
||||
Type = MatchType.MajorQuestionGroup
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// 扫描所有题目模式
|
||||
foreach (var patternConfig in _config.QuestionPatterns)
|
||||
{
|
||||
foreach (Match match in patternConfig.Regex.Matches(text))
|
||||
{
|
||||
allPotentialMatches.Add(new PotentialMatch
|
||||
{
|
||||
StartIndex = match.Index,
|
||||
EndIndex = match.Index + match.Length,
|
||||
MatchedText = match.Value,
|
||||
RegexMatch = match,
|
||||
PatternConfig = patternConfig,
|
||||
Type = MatchType.Question
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// 扫描所有选项模式
|
||||
foreach (var patternConfig in _config.OptionPatterns)
|
||||
{
|
||||
foreach (Match match in patternConfig.Regex.Matches(text))
|
||||
{
|
||||
allPotentialMatches.Add(new PotentialMatch
|
||||
{
|
||||
StartIndex = match.Index,
|
||||
EndIndex = match.Index + match.Length,
|
||||
MatchedText = match.Value,
|
||||
RegexMatch = match,
|
||||
PatternConfig = patternConfig,
|
||||
Type = MatchType.Option
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// 统一按起始位置排序
|
||||
return allPotentialMatches.OrderBy(pm => pm.StartIndex).ToList();
|
||||
}
|
||||
}
|
||||
|
||||
public class ExamStructureBuilder
|
||||
{
|
||||
private readonly ExamParserConfig _config;
|
||||
|
||||
public ExamStructureBuilder(ExamParserConfig config)
|
||||
{
|
||||
_config = config ?? throw new ArgumentNullException(nameof(config), "ExamParserConfig cannot be null.");
|
||||
}
|
||||
|
||||
public ExamPaper BuildExamPaper(string fullExamText, List<PotentialMatch> allPotentialMatches)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(fullExamText))
|
||||
{
|
||||
throw new ArgumentException("Full exam text cannot be null or empty.", nameof(fullExamText));
|
||||
}
|
||||
if (allPotentialMatches == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(allPotentialMatches), "Potential matches list cannot be null.");
|
||||
}
|
||||
|
||||
var examPaper = new ExamPaper();
|
||||
try
|
||||
{
|
||||
examPaper.Title = GetExamTitle(fullExamText);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
throw new InvalidOperationException("Failed to extract exam title.", ex);
|
||||
}
|
||||
|
||||
var majorQGStack = new Stack<MajorQuestionGroup>();
|
||||
MajorQuestionGroup currentMajorQG = null;
|
||||
|
||||
var questionStack = new Stack<Question>();
|
||||
Question currentQuestion = null;
|
||||
|
||||
int currentContentStart = 0;
|
||||
|
||||
if (allPotentialMatches.Any() && allPotentialMatches[0].StartIndex > 0)
|
||||
{
|
||||
string introText = fullExamText.Substring(0, allPotentialMatches[0].StartIndex).Trim();
|
||||
if (!string.IsNullOrWhiteSpace(introText))
|
||||
{
|
||||
examPaper.Descript = introText;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < allPotentialMatches.Count; i++)
|
||||
{
|
||||
var pm = allPotentialMatches[i];
|
||||
|
||||
try
|
||||
{
|
||||
// Validate potential match data
|
||||
if (pm.StartIndex < currentContentStart || pm.EndIndex > fullExamText.Length || pm.StartIndex > pm.EndIndex)
|
||||
{
|
||||
throw new ArgumentOutOfRangeException(
|
||||
$"PotentialMatch at index {i} has invalid start/end indices. Start: {pm.StartIndex}, End: {pm.EndIndex}, CurrentContentStart: {currentContentStart}, FullTextLength: {fullExamText.Length}");
|
||||
}
|
||||
if (pm.RegexMatch == null || pm.PatternConfig == null)
|
||||
{
|
||||
throw new InvalidOperationException($"PotentialMatch at index {i} is missing RegexMatch or PatternConfig.");
|
||||
}
|
||||
|
||||
string precedingText = fullExamText.Substring(currentContentStart, pm.StartIndex - currentContentStart).Trim();
|
||||
if (!string.IsNullOrWhiteSpace(precedingText))
|
||||
{
|
||||
if (currentQuestion != null)
|
||||
{
|
||||
ProcessQuestionContent(currentQuestion, precedingText,
|
||||
GetSubMatchesForRange(allPotentialMatches, currentContentStart, pm.StartIndex));
|
||||
}
|
||||
else if (currentMajorQG != null)
|
||||
{
|
||||
currentMajorQG.Descript += (string.IsNullOrWhiteSpace(currentMajorQG.Descript) ? "" : "\n") + precedingText;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Append to ExamPaper.Description if it's top-level descriptive text
|
||||
examPaper.Descript += (string.IsNullOrWhiteSpace(examPaper.Descript) ? "" : "\n") + precedingText;
|
||||
}
|
||||
}
|
||||
|
||||
if (pm.Type == MatchType.MajorQuestionGroup)
|
||||
{
|
||||
try
|
||||
{
|
||||
while (majorQGStack.Any() && pm.PatternConfig.Priority <= majorQGStack.Peek().Priority)
|
||||
{
|
||||
majorQGStack.Pop();
|
||||
}
|
||||
|
||||
// Check if regex match groups exist before accessing
|
||||
if (pm.RegexMatch.Groups.Count < 2)
|
||||
{
|
||||
throw new InvalidOperationException($"MajorQuestionGroup match at index {i} does not have enough regex groups for Title.");
|
||||
}
|
||||
|
||||
float score = 0;
|
||||
if (pm.RegexMatch.Groups.Count > 2 && pm.RegexMatch.Groups[2].Success)
|
||||
{
|
||||
if (!float.TryParse(pm.RegexMatch.Groups[2].Value, out score))
|
||||
{
|
||||
throw new FormatException($"Failed to parse score '{pm.RegexMatch.Groups[2].Value}' for MajorQuestionGroup at index {i}.");
|
||||
}
|
||||
}
|
||||
|
||||
MajorQuestionGroup newMajorQG = new MajorQuestionGroup
|
||||
{
|
||||
Title = pm.RegexMatch.Groups[1].Value.Trim(),
|
||||
Score = score,
|
||||
Priority = pm.PatternConfig.Priority
|
||||
};
|
||||
|
||||
if (majorQGStack.Any())
|
||||
{
|
||||
majorQGStack.Peek().SubMajorQuestionGroups.Add(newMajorQG);
|
||||
}
|
||||
else
|
||||
{
|
||||
examPaper.MajorQuestionGroups.Add(newMajorQG);
|
||||
}
|
||||
|
||||
majorQGStack.Push(newMajorQG);
|
||||
currentMajorQG = newMajorQG;
|
||||
questionStack.Clear();
|
||||
currentQuestion = null;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
throw new InvalidOperationException($"Error processing MajorQuestionGroup at index {i} (MatchedText: '{pm.MatchedText}').", ex);
|
||||
}
|
||||
}
|
||||
else if (pm.Type == MatchType.Question)
|
||||
{
|
||||
try
|
||||
{
|
||||
while (questionStack.Any() && pm.PatternConfig.Priority <= questionStack.Peek().Priority)
|
||||
{
|
||||
questionStack.Pop();
|
||||
}
|
||||
|
||||
if (pm.RegexMatch.Groups.Count < 3)
|
||||
{
|
||||
throw new InvalidOperationException($"Question match at index {i} does not have enough regex groups for Number and Text.");
|
||||
}
|
||||
|
||||
Question newQuestion = new Question
|
||||
{
|
||||
Number = pm.RegexMatch.Groups[1].Value.Trim(),
|
||||
Text = pm.RegexMatch.Groups[2].Value.Trim(),
|
||||
Priority = pm.PatternConfig.Priority
|
||||
};
|
||||
|
||||
if (pm.RegexMatch.Groups.Count > 3 && pm.RegexMatch.Groups[3].Success) // Assuming score is group 3 if available
|
||||
{
|
||||
float score = 0;
|
||||
if (!float.TryParse(pm.RegexMatch.Groups[3].Value, out score))
|
||||
{
|
||||
throw new FormatException($"Failed to parse score '{pm.RegexMatch.Groups[3].Value}' for Question at index {i}.");
|
||||
}
|
||||
newQuestion.Score = score;
|
||||
}
|
||||
|
||||
if (questionStack.Any())
|
||||
{
|
||||
questionStack.Peek().SubQuestions.Add(newQuestion);
|
||||
}
|
||||
else if (currentMajorQG != null)
|
||||
{
|
||||
currentMajorQG.Questions.Add(newQuestion);
|
||||
}
|
||||
else
|
||||
{
|
||||
examPaper.TopLevelQuestions.Add(newQuestion);
|
||||
}
|
||||
|
||||
questionStack.Push(newQuestion);
|
||||
currentQuestion = newQuestion;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
throw new InvalidOperationException($"Error processing Question at index {i} (MatchedText: '{pm.MatchedText}').", ex);
|
||||
}
|
||||
}
|
||||
else if (pm.Type == MatchType.Option)
|
||||
{
|
||||
try
|
||||
{
|
||||
if (currentQuestion != null)
|
||||
{
|
||||
if (pm.RegexMatch.Groups.Count < 3)
|
||||
{
|
||||
throw new InvalidOperationException($"Option match at index {i} does not have enough regex groups for Label and Text.");
|
||||
}
|
||||
|
||||
Option newOption = new Option
|
||||
{
|
||||
Label = pm.RegexMatch.Groups[1].Value.Trim(),
|
||||
Text = pm.RegexMatch.Groups[2].Value.Trim()
|
||||
};
|
||||
currentQuestion.Options.Add(newOption);
|
||||
}
|
||||
else
|
||||
{
|
||||
// This indicates a structural issue in the exam text
|
||||
throw new InvalidOperationException($"Found isolated Option at index {i} (MatchedText: '{pm.MatchedText}'). Options must belong to a question.");
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
throw new InvalidOperationException($"Error processing Option at index {i} (MatchedText: '{pm.MatchedText}').", ex);
|
||||
}
|
||||
}
|
||||
|
||||
currentContentStart = pm.EndIndex;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Catch any unexpected errors during the main loop iteration
|
||||
throw new InvalidOperationException($"An unexpected error occurred during processing of PotentialMatch at index {i}.", ex);
|
||||
}
|
||||
}
|
||||
|
||||
// --- Step 4: Process remaining content after the last match ---
|
||||
if (currentContentStart < fullExamText.Length)
|
||||
{
|
||||
try
|
||||
{
|
||||
string remainingText = fullExamText.Substring(currentContentStart).Trim();
|
||||
if (!string.IsNullOrWhiteSpace(remainingText))
|
||||
{
|
||||
if (currentQuestion != null)
|
||||
{
|
||||
ProcessQuestionContent(currentQuestion, remainingText,
|
||||
GetSubMatchesForRange(allPotentialMatches, currentContentStart, fullExamText.Length));
|
||||
}
|
||||
else if (currentMajorQG != null)
|
||||
{
|
||||
currentMajorQG.Descript += (string.IsNullOrWhiteSpace(currentMajorQG.Descript) ? "" : "\n") + remainingText;
|
||||
}
|
||||
else
|
||||
{
|
||||
examPaper.Descript += (string.IsNullOrWhiteSpace(examPaper.Descript) ? "" : "\n") + remainingText;
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
throw new InvalidOperationException("Error processing remaining text after all potential matches.", ex);
|
||||
}
|
||||
}
|
||||
|
||||
return examPaper;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Extracts the exam title (simple implementation)
|
||||
/// </summary>
|
||||
private string GetExamTitle(string examPaperText)
|
||||
{
|
||||
try
|
||||
{
|
||||
var firstLine = examPaperText.Split(new[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries)
|
||||
.FirstOrDefault(line => !string.IsNullOrWhiteSpace(line));
|
||||
return firstLine ?? "未识别试卷标题";
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
throw new InvalidOperationException("An error occurred while trying to extract the exam title from the text.", ex);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets a subset of the given PotentialMatch list within a specified range.
|
||||
/// This method helps ProcessQuestionContent by providing Options and SubQuestions within that range.
|
||||
/// </summary>
|
||||
private List<PotentialMatch> GetSubMatchesForRange(List<PotentialMatch> allMatches, int start, int end)
|
||||
{
|
||||
try
|
||||
{
|
||||
// Input validation for range
|
||||
if (start < 0 || end < start)
|
||||
{
|
||||
throw new ArgumentOutOfRangeException($"Invalid range provided to GetSubMatchesForRange. Start: {start}, End: {end}");
|
||||
}
|
||||
// Ensure allMatches is not null before querying
|
||||
if (allMatches == null)
|
||||
{
|
||||
return new List<PotentialMatch>();
|
||||
}
|
||||
|
||||
return allMatches.Where(pm => pm.StartIndex >= start && pm.StartIndex < end).ToList();
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
throw new InvalidOperationException($"Error getting sub-matches for range [{start}, {end}).", ex);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Processes the content of a Question, mainly for parsing Options and identifying unstructured text.
|
||||
/// </summary>
|
||||
private void ProcessQuestionContent(Question question, string contentText, List<PotentialMatch> potentialMatchesInScope)
|
||||
{
|
||||
if (question == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(question), "Question cannot be null in ProcessQuestionContent.");
|
||||
}
|
||||
if (contentText == null) // contentText can be empty, but not null
|
||||
{
|
||||
throw new ArgumentNullException(nameof(contentText), "Content text cannot be null in ProcessQuestionContent.");
|
||||
}
|
||||
if (potentialMatchesInScope == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(potentialMatchesInScope), "Potential matches in scope cannot be null.");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
int lastOptionEndIndex = 0;
|
||||
|
||||
foreach (var pm in potentialMatchesInScope.OrderBy(p => p.StartIndex))
|
||||
{
|
||||
try
|
||||
{
|
||||
if (pm.Type == MatchType.Option)
|
||||
{
|
||||
// Check for valid indices
|
||||
if (pm.StartIndex < lastOptionEndIndex || pm.StartIndex > contentText.Length || pm.EndIndex > contentText.Length)
|
||||
{
|
||||
throw new ArgumentOutOfRangeException(
|
||||
$"Option match at index {pm.StartIndex} has invalid indices within content text. MatchedText: '{pm.MatchedText}'");
|
||||
}
|
||||
|
||||
if (pm.StartIndex > lastOptionEndIndex)
|
||||
{
|
||||
string textBeforeOption = contentText.Substring(lastOptionEndIndex, pm.StartIndex - lastOptionEndIndex).Trim();
|
||||
if (!string.IsNullOrWhiteSpace(textBeforeOption))
|
||||
{
|
||||
question.Text += (string.IsNullOrWhiteSpace(question.Text) ? "" : "\n") + textBeforeOption;
|
||||
}
|
||||
}
|
||||
|
||||
if (pm.RegexMatch.Groups.Count < 3)
|
||||
{
|
||||
throw new InvalidOperationException($"Option regex match '{pm.MatchedText}' does not have enough groups for label and text.");
|
||||
}
|
||||
|
||||
var newOption = new Option
|
||||
{
|
||||
Label = pm.RegexMatch.Groups[1].Value.Trim(),
|
||||
Text = pm.RegexMatch.Groups[2].Value.Trim()
|
||||
};
|
||||
question.Options.Add(newOption);
|
||||
lastOptionEndIndex = pm.EndIndex;
|
||||
}
|
||||
// TODO: If there are SubQuestion types, they can be processed similarly here.
|
||||
}
|
||||
catch (Exception innerEx)
|
||||
{
|
||||
throw new InvalidOperationException($"Error processing a potential match ({pm.Type}) within question content (MatchedText: '{pm.MatchedText}').", innerEx);
|
||||
}
|
||||
}
|
||||
|
||||
// Process any remaining text after all options
|
||||
if (lastOptionEndIndex < contentText.Length)
|
||||
{
|
||||
string remainingContent = contentText.Substring(lastOptionEndIndex).Trim();
|
||||
if (!string.IsNullOrWhiteSpace(remainingContent))
|
||||
{
|
||||
question.Text += (string.IsNullOrWhiteSpace(question.Text) ? "" : "\n") + remainingContent;
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
throw new InvalidOperationException($"An error occurred while processing content for Question '{question.Number}'.", ex);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public class ExamParser
|
||||
{
|
||||
private readonly ExamParserConfig _config;
|
||||
private readonly ExamDocumentScanner _scanner;
|
||||
private readonly ExamStructureBuilder _builder;
|
||||
|
||||
public ExamParser(ExamParserConfig config)
|
||||
{
|
||||
_config = config;
|
||||
_scanner = new ExamDocumentScanner(_config);
|
||||
_builder = new ExamStructureBuilder(_config);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 解析给定的试卷文本,返回结构化的 ExamPaper 对象。
|
||||
/// </summary>
|
||||
/// <param name="examPaperText">完整的试卷文本</param>
|
||||
/// <returns>解析后的 ExamPaper 对象</returns>
|
||||
public ExamPaper ParseExamPaper(string examPaperText)
|
||||
{
|
||||
// 1. 扫描:一次性扫描整个文本,收集所有潜在的匹配项
|
||||
List<PotentialMatch> allPotentialMatches = _scanner.Scan(examPaperText);
|
||||
|
||||
// 2. 构建:根据扫描结果和原始文本,线性遍历并构建层级结构
|
||||
ExamPaper parsedExam = _builder.BuildExamPaper(examPaperText, allPotentialMatches);
|
||||
|
||||
return parsedExam;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
43
TechHelper.Client/Exam/Exam/QuestionCard.razor
Normal file
43
TechHelper.Client/Exam/Exam/QuestionCard.razor
Normal file
@@ -0,0 +1,43 @@
|
||||
@using TechHelper.Client.Exam.Parse
|
||||
|
||||
<MudCard Class="my-2 pa-2" Outlined="true" Elevation="1">
|
||||
<MudCardContent>
|
||||
<MudText Typo="Typo.subtitle1">
|
||||
<b>@Question.Number</b> @((MarkupString)Question.Text)
|
||||
@if (Question.Score > 0)
|
||||
{
|
||||
<MudText Typo="Typo.body2" Class="d-inline ml-2">(@Question.Score 分)</MudText>
|
||||
}
|
||||
</MudText>
|
||||
|
||||
@* 显示选项 - 不使用 MudList *@
|
||||
@if (Question.Options.Any())
|
||||
{
|
||||
<div class="mt-2">
|
||||
@* 使用普通的 div 容器,你可以添加自定义 CSS 类进行样式控制 *@
|
||||
@foreach (var option in Question.Options)
|
||||
{
|
||||
<MudText Typo="Typo.body2" Class="my-1">
|
||||
@* 为每个选项文本添加一些边距 *@
|
||||
<b>@option.Label</b> @((MarkupString)option.Text)
|
||||
</MudText>
|
||||
}
|
||||
</div>
|
||||
}
|
||||
|
||||
@* 递归显示子题目 *@
|
||||
@if (Question.SubQuestions.Any())
|
||||
{
|
||||
<MudText Typo="Typo.subtitle2" Class="my-2">子题目:</MudText>
|
||||
@foreach (var subQuestion in Question.SubQuestions)
|
||||
{
|
||||
<QuestionCard Question="subQuestion" />
|
||||
}
|
||||
}
|
||||
</MudCardContent>
|
||||
</MudCard>
|
||||
|
||||
@code {
|
||||
[Parameter]
|
||||
public Question Question { get; set; }
|
||||
}
|
@@ -0,0 +1,43 @@
|
||||
@using TechHelper.Client.Exam.Parse
|
||||
@* SubMajorQuestionGroupDisplay.razor *@
|
||||
<MudExpansionPanels>
|
||||
@foreach (var majorQG in MajorQGList)
|
||||
{
|
||||
<MudExpansionPanel Text="@majorQG.Title" DisableRipple="true">
|
||||
<MudCard Class="mt-2" Outlined="true">
|
||||
<MudCardContent>
|
||||
@if (!string.IsNullOrWhiteSpace(majorQG.Descript))
|
||||
{
|
||||
<MudText Typo="Typo.body2"><b>描述:</b> @((MarkupString)majorQG.Descript)</MudText>
|
||||
}
|
||||
@if (majorQG.Score > 0)
|
||||
{
|
||||
<MudText Typo="Typo.body2"><b>总分:</b> @majorQG.Score 分</MudText>
|
||||
}
|
||||
|
||||
@* 显示当前子题组下的题目 *@
|
||||
@if (majorQG.Questions.Any())
|
||||
{
|
||||
<MudText Typo="Typo.subtitle1" Class="my-2">题目:</MudText>
|
||||
@foreach (var question in majorQG.Questions)
|
||||
{
|
||||
<QuestionCard Question="question" />
|
||||
}
|
||||
}
|
||||
|
||||
@* 递归显示更深层次的子题组 *@
|
||||
@if (majorQG.SubMajorQuestionGroups.Any())
|
||||
{
|
||||
<MudText Typo="Typo.subtitle1" Class="my-2">子题组:</MudText>
|
||||
<SubMajorQuestionGroupDisplay MajorQGList="majorQG.SubMajorQuestionGroups" />
|
||||
}
|
||||
</MudCardContent>
|
||||
</MudCard>
|
||||
</MudExpansionPanel>
|
||||
}
|
||||
</MudExpansionPanels>
|
||||
|
||||
@code {
|
||||
[Parameter]
|
||||
public List<MajorQuestionGroup> MajorQGList { get; set; }
|
||||
}
|
@@ -1,429 +0,0 @@
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace TechHelper.Client.Exam.Parse
|
||||
{
|
||||
public class ExamPaper
|
||||
{
|
||||
public string Title { get; set; } = "未识别试卷标题";
|
||||
public string Descript { get; set; } = "未识别试卷描述";
|
||||
public string SubjectArea { get; set; } = "试卷类别";
|
||||
public List<MajorQuestionGroup> MajorQuestionGroups { get; set; } = new List<MajorQuestionGroup>();
|
||||
public List<Question> TopLevelQuestions { get; set; } = new List<Question>();
|
||||
}
|
||||
|
||||
public class MajorQuestionGroup
|
||||
{
|
||||
public string Title { get; set; } = string.Empty;
|
||||
public string Descript { get; set; } = string.Empty;
|
||||
public float Score { get; set; }
|
||||
public List<MajorQuestionGroup> SubMajorQuestionGroups { get; set; } = new List<MajorQuestionGroup>();
|
||||
public List<Question> Questions { get; set; } = new List<Question>();
|
||||
public int Priority { get; set; }
|
||||
}
|
||||
|
||||
public class Question
|
||||
{
|
||||
public string Number { get; set; } = string.Empty;
|
||||
public string Text { get; set; } = string.Empty;
|
||||
public float Score { get; set; }
|
||||
public List<Option> Options { get; set; } = new List<Option>();
|
||||
public List<Question> SubQuestions { get; set; } = new List<Question>();
|
||||
public int Priority { get; set; }
|
||||
}
|
||||
|
||||
public class Option
|
||||
{
|
||||
public string Label { get; set; } = string.Empty;
|
||||
public string Text { get; set; } = string.Empty;
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// 表示一个带有优先级的正则表达式配置
|
||||
/// </summary>
|
||||
public class RegexPatternConfig
|
||||
{
|
||||
public string Pattern { get; set; } // 正则表达式字符串
|
||||
public int Priority { get; set; } // 优先级,数字越小优先级越高
|
||||
public Regex Regex { get; private set; } // 编译后的Regex对象,用于性能优化
|
||||
|
||||
public RegexPatternConfig(string pattern, int priority)
|
||||
{
|
||||
Pattern = pattern;
|
||||
Priority = priority;
|
||||
Regex = new Regex(pattern, RegexOptions.Multiline | RegexOptions.Compiled); // 多行模式,编译以提高性能
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 试卷解析的配置类,包含所有正则表达式
|
||||
/// </summary>
|
||||
public class ExamParserConfig
|
||||
{
|
||||
public List<RegexPatternConfig> MajorQuestionGroupPatterns { get; set; } = new List<RegexPatternConfig>();
|
||||
public List<RegexPatternConfig> QuestionPatterns { get; set; } = new List<RegexPatternConfig>();
|
||||
public List<RegexPatternConfig> OptionPatterns { get; set; } = new List<RegexPatternConfig>();
|
||||
|
||||
public ExamParserConfig()
|
||||
{
|
||||
MajorQuestionGroupPatterns.Add(new RegexPatternConfig(@"^[一二三四五六七八九十]+\s*[、.]\s*(.+?)(?:\s*\((\d+)\s*分\))?$", 1)); // 如: 一、选择题 (5分)
|
||||
MajorQuestionGroupPatterns.Add(new RegexPatternConfig(@"^\d+\.\s*(.+?)(?:\s*\((\d+)\s*分\))?$", 2)); // 如: 1. 填空题 (10分)
|
||||
MajorQuestionGroupPatterns.Add(new RegexPatternConfig(@"^(\(.+\))\s*(.+?)(?:\s*\((\d+)\s*分\))?$", 3)); // 如: (一) 文言文阅读 (8分)
|
||||
|
||||
QuestionPatterns.Add(new RegexPatternConfig(@"^(\d+)\.\s*(.*)$", 1)); // 如: 1. 题干
|
||||
OptionPatterns.Add(new RegexPatternConfig(@"^[A-D]\.\s*(.*)$", 1)); // 如: A. 选项内容
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
public class PotentialMatch
|
||||
{
|
||||
public int StartIndex { get; set; }
|
||||
public int EndIndex { get; set; } // 匹配到的结构在原始文本中的结束位置
|
||||
public string MatchedText { get; set; } // 匹配到的完整行或段落
|
||||
public Match RegexMatch { get; set; } // 原始的Regex.Match对象,方便获取捕获组
|
||||
public RegexPatternConfig PatternConfig { get; set; } // 匹配到的模式配置
|
||||
public MatchType Type { get; set; } // 枚举:MajorQuestionGroup, Question, Option, etc.
|
||||
}
|
||||
|
||||
public enum MatchType
|
||||
{
|
||||
MajorQuestionGroup,
|
||||
Question,
|
||||
Option,
|
||||
Other // 如果有其他需要识别的类型
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// 负责扫描原始文本,收集所有潜在的匹配项(题组、题目、选项)。
|
||||
/// 它只进行匹配,不进行结构化归属。
|
||||
/// </summary>
|
||||
public class ExamDocumentScanner
|
||||
{
|
||||
private readonly ExamParserConfig _config;
|
||||
|
||||
public ExamDocumentScanner(ExamParserConfig config)
|
||||
{
|
||||
_config = config;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 扫描给定的文本,返回所有潜在的匹配项,并按起始位置排序。
|
||||
/// </summary>
|
||||
/// <param name="text">要扫描的文本</param>
|
||||
/// <returns>所有匹配到的 PotentialMatch 列表</returns>
|
||||
public List<PotentialMatch> Scan(string text)
|
||||
{
|
||||
var allPotentialMatches = new List<PotentialMatch>();
|
||||
|
||||
// 扫描所有题组模式
|
||||
foreach (var patternConfig in _config.MajorQuestionGroupPatterns)
|
||||
{
|
||||
foreach (Match match in patternConfig.Regex.Matches(text))
|
||||
{
|
||||
allPotentialMatches.Add(new PotentialMatch
|
||||
{
|
||||
StartIndex = match.Index,
|
||||
EndIndex = match.Index + match.Length,
|
||||
MatchedText = match.Value,
|
||||
RegexMatch = match,
|
||||
PatternConfig = patternConfig,
|
||||
Type = MatchType.MajorQuestionGroup
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// 扫描所有题目模式
|
||||
foreach (var patternConfig in _config.QuestionPatterns)
|
||||
{
|
||||
foreach (Match match in patternConfig.Regex.Matches(text))
|
||||
{
|
||||
allPotentialMatches.Add(new PotentialMatch
|
||||
{
|
||||
StartIndex = match.Index,
|
||||
EndIndex = match.Index + match.Length,
|
||||
MatchedText = match.Value,
|
||||
RegexMatch = match,
|
||||
PatternConfig = patternConfig,
|
||||
Type = MatchType.Question
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// 扫描所有选项模式
|
||||
foreach (var patternConfig in _config.OptionPatterns)
|
||||
{
|
||||
foreach (Match match in patternConfig.Regex.Matches(text))
|
||||
{
|
||||
allPotentialMatches.Add(new PotentialMatch
|
||||
{
|
||||
StartIndex = match.Index,
|
||||
EndIndex = match.Index + match.Length,
|
||||
MatchedText = match.Value,
|
||||
RegexMatch = match,
|
||||
PatternConfig = patternConfig,
|
||||
Type = MatchType.Option
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// 统一按起始位置排序
|
||||
return allPotentialMatches.OrderBy(pm => pm.StartIndex).ToList();
|
||||
}
|
||||
}
|
||||
|
||||
public class ExamStructureBuilder
|
||||
{
|
||||
private readonly ExamParserConfig _config;
|
||||
|
||||
public ExamStructureBuilder(ExamParserConfig config)
|
||||
{
|
||||
_config = config;
|
||||
}
|
||||
|
||||
public ExamPaper BuildExamPaper(string fullExamText, List<PotentialMatch> allPotentialMatches)
|
||||
{
|
||||
var examPaper = new ExamPaper();
|
||||
examPaper.Title = GetExamTitle(fullExamText);
|
||||
|
||||
var majorQGStack = new Stack<MajorQuestionGroup>();
|
||||
MajorQuestionGroup currentMajorQG = null;
|
||||
|
||||
var questionStack = new Stack<Question>();
|
||||
Question currentQuestion = null;
|
||||
|
||||
int currentContentStart = 0;
|
||||
|
||||
|
||||
if (allPotentialMatches.Any() && allPotentialMatches[0].StartIndex > 0)
|
||||
{
|
||||
string introText = fullExamText.Substring(0, allPotentialMatches[0].StartIndex).Trim();
|
||||
// 可以选择将这部分文本存储到 ExamPaper 的某个属性,例如 ExamPaper.Description
|
||||
}
|
||||
|
||||
|
||||
// 这里需要处理currentContentStart的位置,到allPotentialMatches[0].StartIndex
|
||||
|
||||
|
||||
for (int i = 0; i < allPotentialMatches.Count; i++)
|
||||
{
|
||||
var pm = allPotentialMatches[i];
|
||||
|
||||
string precedingText = fullExamText.Substring(currentContentStart, pm.StartIndex - currentContentStart).Trim();
|
||||
if (!string.IsNullOrWhiteSpace(precedingText))
|
||||
{
|
||||
if (currentQuestion != null)
|
||||
{
|
||||
ProcessQuestionContent(currentQuestion, precedingText,
|
||||
GetSubMatchesForRange(allPotentialMatches, currentContentStart, pm.StartIndex));
|
||||
}
|
||||
else if (currentMajorQG != null)
|
||||
{
|
||||
currentMajorQG.Descript += (string.IsNullOrWhiteSpace(currentMajorQG.Descript) ? "" : "\n") + precedingText;
|
||||
}
|
||||
else
|
||||
{
|
||||
// 暂时忽略,或可以添加到 ExamPaper.Description
|
||||
}
|
||||
}
|
||||
|
||||
if (pm.Type == MatchType.MajorQuestionGroup)
|
||||
{
|
||||
// 1. 确定当前 MajorQuestionGroup 的层级关系
|
||||
while (majorQGStack.Any() && pm.PatternConfig.Priority <= majorQGStack.Peek().Priority)
|
||||
{
|
||||
// 当前 QG 的优先级等于或高于栈顶 QG,说明栈顶 QG 已经结束
|
||||
majorQGStack.Pop();
|
||||
}
|
||||
|
||||
MajorQuestionGroup newMajorQG = new MajorQuestionGroup
|
||||
{
|
||||
Title = pm.RegexMatch.Groups[1].Value.Trim(),
|
||||
Score = (pm.RegexMatch.Groups.Count > 2 && pm.RegexMatch.Groups[2].Success) ? float.Parse(pm.RegexMatch.Groups[2].Value) : 0,
|
||||
Priority = pm.PatternConfig.Priority
|
||||
};
|
||||
|
||||
if (majorQGStack.Any())
|
||||
{
|
||||
majorQGStack.Peek().SubMajorQuestionGroups.Add(newMajorQG);
|
||||
}
|
||||
else
|
||||
{
|
||||
examPaper.MajorQuestionGroups.Add(newMajorQG);
|
||||
}
|
||||
|
||||
majorQGStack.Push(newMajorQG);
|
||||
currentMajorQG = newMajorQG;
|
||||
questionStack.Clear();
|
||||
currentQuestion = null;
|
||||
}
|
||||
else if (pm.Type == MatchType.Question)
|
||||
{
|
||||
// 1. 确定当前 Question 的层级关系(子题目)
|
||||
// 找到比当前 Question 优先级高或相等的 Question 作为其父级
|
||||
while (questionStack.Any() && pm.PatternConfig.Priority <= questionStack.Peek().Priority)
|
||||
{
|
||||
// 如果当前 Question 的优先级等于或高于栈顶 Question,说明栈顶 Question 已经结束
|
||||
questionStack.Pop();
|
||||
}
|
||||
|
||||
Question newQuestion = new Question
|
||||
{
|
||||
Number = pm.RegexMatch.Groups[1].Value.Trim(),
|
||||
Text = pm.RegexMatch.Groups[2].Value.Trim(),
|
||||
Priority = pm.PatternConfig.Priority
|
||||
};
|
||||
if (pm.RegexMatch.Groups.Count > 2 && pm.RegexMatch.Groups[2].Success)
|
||||
{
|
||||
float.TryParse(pm.RegexMatch.Groups[2].Value, out float score);
|
||||
newQuestion.Score = score;
|
||||
}
|
||||
|
||||
if (questionStack.Any())
|
||||
{
|
||||
questionStack.Peek().SubQuestions.Add(newQuestion);
|
||||
}
|
||||
else if (currentMajorQG != null)
|
||||
{
|
||||
// 归属于当前活跃的 MajorQuestionGroup
|
||||
currentMajorQG.Questions.Add(newQuestion);
|
||||
}
|
||||
else
|
||||
{
|
||||
// 没有活跃的 MajorQuestionGroup 或 Question,作为 ExamPaper 的顶级 Questions
|
||||
examPaper.TopLevelQuestions.Add(newQuestion);
|
||||
}
|
||||
|
||||
questionStack.Push(newQuestion); // 新的 Question 入栈,成为当前活跃 Question
|
||||
currentQuestion = newQuestion;
|
||||
}
|
||||
else if (pm.Type == MatchType.Option)
|
||||
{
|
||||
// 选项必须归属于一个题目
|
||||
if (currentQuestion != null)
|
||||
{
|
||||
Option newOption = new Option
|
||||
{
|
||||
Label = pm.RegexMatch.Groups[1].Value.Trim(),
|
||||
Text = pm.RegexMatch.Groups[2].Value.Trim()
|
||||
};
|
||||
currentQuestion.Options.Add(newOption);
|
||||
}
|
||||
else
|
||||
{
|
||||
// 孤立的选项,可能需要日志记录或错误处理
|
||||
Console.WriteLine($"Warning: Found isolated Option at index {pm.StartIndex}: {pm.MatchedText}");
|
||||
}
|
||||
}
|
||||
|
||||
// --- 步骤3: 更新 currentContentStart 为当前匹配点的 EndIndex ---
|
||||
// 下一次循环将从这里开始提取内容
|
||||
currentContentStart = pm.EndIndex;
|
||||
}
|
||||
|
||||
// --- 步骤4: 处理循环结束后,最后一个匹配点之后到文本末尾的剩余内容 ---
|
||||
if (currentContentStart < fullExamText.Length)
|
||||
{
|
||||
string remainingText = fullExamText.Substring(currentContentStart).Trim();
|
||||
if (!string.IsNullOrWhiteSpace(remainingText))
|
||||
{
|
||||
if (currentQuestion != null)
|
||||
{
|
||||
// 最后一个题目后面的内容(可能是选项或多行描述)
|
||||
ProcessQuestionContent(currentQuestion, remainingText,
|
||||
GetSubMatchesForRange(allPotentialMatches, currentContentStart, fullExamText.Length));
|
||||
}
|
||||
else if (currentMajorQG != null)
|
||||
{
|
||||
// 最后一个题组后面的内容(可能是描述或题目)
|
||||
currentMajorQG.Descript += (string.IsNullOrWhiteSpace(currentMajorQG.Descript) ? "" : "\n") + remainingText;
|
||||
}
|
||||
else
|
||||
{
|
||||
// 顶级剩余文本,可能作为 ExamPaper 的整体描述
|
||||
// examPaper.Description += remainingText;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return examPaper;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 提取试卷标题 (简单实现)
|
||||
/// </summary>
|
||||
private string GetExamTitle(string examPaperText)
|
||||
{
|
||||
var firstLine = examPaperText.Split(new[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries)
|
||||
.FirstOrDefault(line => !string.IsNullOrWhiteSpace(line));
|
||||
return firstLine ?? "未识别试卷标题";
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 获取给定 PotentialMatch 列表在指定范围内的子集。
|
||||
/// 这个方法用于辅助 ProcessQuestionContent,为其提供该范围内的 Options 和 SubQuestions。
|
||||
/// </summary>
|
||||
private List<PotentialMatch> GetSubMatchesForRange(List<PotentialMatch> allMatches, int start, int end)
|
||||
{
|
||||
// 注意:这里需要考虑 potentialMatches 的索引与 fullExamText 索引的映射
|
||||
// 这里的 StartIndex 是相对于 fullExamText 的
|
||||
return allMatches.Where(pm => pm.StartIndex >= start && pm.StartIndex < end).ToList();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 处理 Question 的内容,主要用于解析 Options 和识别非结构化文本。
|
||||
/// </summary>
|
||||
private void ProcessQuestionContent(Question question, string contentText, List<PotentialMatch> potentialMatchesInScope)
|
||||
{
|
||||
// 遍历当前范围内的所有 PotentialMatch,找出 Options
|
||||
var optionsText = new System.Text.StringBuilder();
|
||||
int lastOptionEndIndex = 0; // 记录最后一个处理的选项的结束位置
|
||||
|
||||
foreach (var pm in potentialMatchesInScope.OrderBy(p => p.StartIndex))
|
||||
{
|
||||
// 检查是否是选项
|
||||
if (pm.Type == MatchType.Option)
|
||||
{
|
||||
// 收集选项之间的文本作为题干的延续或描述
|
||||
if (pm.StartIndex > lastOptionEndIndex)
|
||||
{
|
||||
string textBeforeOption = contentText.Substring(lastOptionEndIndex, pm.StartIndex - lastOptionEndIndex).Trim();
|
||||
if (!string.IsNullOrWhiteSpace(textBeforeOption))
|
||||
{
|
||||
question.Text += (string.IsNullOrWhiteSpace(question.Text) ? "" : "\n") + textBeforeOption;
|
||||
}
|
||||
}
|
||||
|
||||
var newOption = new Option
|
||||
{
|
||||
Label = pm.RegexMatch.Groups[1].Value.Trim(),
|
||||
Text = pm.RegexMatch.Groups[2].Value.Trim()
|
||||
};
|
||||
question.Options.Add(newOption);
|
||||
lastOptionEndIndex = pm.EndIndex;
|
||||
}
|
||||
// TODO: 如果有 SubQuestion 类型,在这里也可以类似处理
|
||||
// else if (pm.Type == MatchType.Question && pm.PatternConfig.Priority > question.Priority)
|
||||
// {
|
||||
// // 这是一个子题目,需要进一步解析
|
||||
// // 递归调用,但这里的逻辑会更复杂,因为需要识别子题目自己的Options
|
||||
// // 可能会在这里创建一个临时的 Question,然后递归 ProcessQuestionContent
|
||||
// }
|
||||
}
|
||||
|
||||
// 处理所有选项之后剩余的文本
|
||||
if (lastOptionEndIndex < contentText.Length)
|
||||
{
|
||||
string remainingContent = contentText.Substring(lastOptionEndIndex).Trim();
|
||||
if (!string.IsNullOrWhiteSpace(remainingContent))
|
||||
{
|
||||
question.Text += (string.IsNullOrWhiteSpace(question.Text) ? "" : "\n") + remainingContent;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
Reference in New Issue
Block a user