diff --git a/Smart_Summarize/AIAssitViewModel.cs b/Smart_Summarize/AIAssitViewModel.cs index e137a13..f27ac79 100644 --- a/Smart_Summarize/AIAssitViewModel.cs +++ b/Smart_Summarize/AIAssitViewModel.cs @@ -94,26 +94,15 @@ public AIAssitViewModel(PdfViewerControl viewer) /// public async Task GenerateMessages() { - Chats.Add(new TextMessage - { - Author = currentUser, - DateTime = DateTime.Now, - Text = "Summarizing the PDF document..." - }); - // Execute the following asynchronously await ExtractDetailsFromPDF(); - string summaryText = await SummarizePDF(); - - // Update chats on the UI thread Chats.Add(new TextMessage { - Author = new Author { Name = "AIAssistant" }, + Author = currentUser, DateTime = DateTime.Now, - Text = summaryText + Text = "Summarizing the PDF document..." }); - await AddSuggestions(summaryText); } /// /// Generate suggestion from the answers @@ -133,48 +122,20 @@ private async Task AddSuggestions(String text) /// private async Task ExtractDetailsFromPDF() { - StringBuilder extractedText = new StringBuilder(); + List extractedText = new List(); Syncfusion.Pdf.TextLines textLines = new Syncfusion.Pdf.TextLines(); //Extract the text from the PDF document for (int pageIndex = 0; pageIndex < pdfViewer.PageCount; pageIndex++) { string text = $"... Page {pageIndex + 1} ...\n"; text += pdfViewer.ExtractText(pageIndex, out textLines); - extractedText.AppendLine(text); + extractedText.Add(text); } - return ProcessExtractedText(extractedText.ToString()); - } - /// - /// Processes the extracted full text from a document by splitting it into pages. - /// - /// The complete extracted text from the document. - /// A formatted string containing the processed text. - private string ProcessExtractedText(string fullText) - { - string[] pages = fullText.Split(new string[] { "\f", "\n\nPage " }, StringSplitOptions.RemoveEmptyEntries); - for (int i = 0; i < pages.Length; i++) - { - processedText.AppendLine($"... Page {i + 1} ..."); - string[] lines = pages[i].Split(new[] { '\n' }, StringSplitOptions.RemoveEmptyEntries); - int maxLines = Math.Min(1000, lines.Length); + microsoftAIExtension.CreateEmbeddedPage(extractedText.ToArray()); - for (int j = 0; j < maxLines; j++) - { - processedText.AppendLine(lines[j]); - } - processedText.AppendLine(); - } - return processedText.ToString(); - } - /// - /// Summarizes the extracted text from the PDF using Extension AI. - /// - private async Task SummarizePDF() - { - //Summarize the text using the Semantic Kernel AI - string summary = await microsoftAIExtension.GetAnswerFromGPT(processedText.ToString()); - return summary; + return extractedText.ToString(); } + /// /// Handles the event when the chat collection changes. /// @@ -187,23 +148,18 @@ private async void Chats_CollectionChanged(object sender, System.Collections.Spe var item = e.NewItems[0] as ITextMessage; if (item != null) { - if (item.Text != "Summarizing the PDF document...") + if (item.Author.Name == currentUser.Name) { - if (item.Author.Name == currentUser.Name) + string answer = await microsoftAIExtension.AnswerQuestion(item.Text); + Chats.Add(new TextMessage { - string answer = await microsoftAIExtension.GetAnswerFromGPT("You are a helpful assistant. Your task is to analyze the provided question and answer the question based on the pdf", item.Text); - Chats.Add(new TextMessage - { - Author = new Author { Name = "AIAssistant" }, - DateTime = DateTime.Now, - Text = answer - }); - Suggestion.Clear(); - await AddSuggestions(answer); - } - + Author = new Author { Name = "AIAssistant" }, + DateTime = DateTime.Now, + Text = answer + }); + Suggestion.Clear(); + await AddSuggestions(answer); } - } } } diff --git a/Smart_Summarize/MicrosoftAIExtension.cs b/Smart_Summarize/MicrosoftAIExtension.cs index e5c58c2..18f5beb 100644 --- a/Smart_Summarize/MicrosoftAIExtension.cs +++ b/Smart_Summarize/MicrosoftAIExtension.cs @@ -1,5 +1,6 @@ -using Microsoft.Extensions.AI; -using Azure.AI.OpenAI; +using Azure.AI.OpenAI; +using Microsoft.Extensions.AI; +using SmartComponents.LocalEmbeddings; using System.Text; namespace Smart_Summarize { @@ -8,10 +9,11 @@ internal class MicrosoftAIExtension const string endpoint = "YOUR-AI-ENDPOINT"; const string deploymentName = "YOUR-DEPLOYMENT-NAME"; - internal string key = string.Empty; private IChatClient clientAI; + public Dictionary? PageEmbeddings { get; set; } + /// /// Initializes a new instance of the class. /// @@ -64,5 +66,33 @@ public async Task GetAnswerFromGPT(string systemPrompt, string userText) } return string.Empty; } + + /// + /// Create the embedded page from the extracted chunks in the PDF + /// + /// Extracted text from pdfViewer + /// + public async Task CreateEmbeddedPage(string[] chunks) + { + var embedder = new LocalEmbedder(); + PageEmbeddings = chunks.Select(x => KeyValuePair.Create(x, embedder.Embed(x))).ToDictionary(k => k.Key, v => v.Value); + } + + + public async Task AnswerQuestion(string question) + { + var embedder = new LocalEmbedder(); + var questionEmbedding = embedder.Embed(question); + var results = LocalEmbedder.FindClosestWithScore(questionEmbedding, PageEmbeddings.Select(x => (x.Key, x.Value)), 5, 0.5f); + StringBuilder builder = new StringBuilder(); + foreach (var result in results) + { + builder.AppendLine(result.Item); + } + string message = builder.ToString(); + var answer = await GetAnswerFromGPT("You are a helpful assistant. Use the provided PDF document pages and pick a precise page to answer the user question. Provide the answer in plain text without any special formatting or Markdown syntax. Pages: " + message, question); + + return answer; + } } -} +} \ No newline at end of file diff --git a/Smart_Summarize/Smart_Summarize.csproj b/Smart_Summarize/Smart_Summarize.csproj index 3aad4f5..204faa9 100644 --- a/Smart_Summarize/Smart_Summarize.csproj +++ b/Smart_Summarize/Smart_Summarize.csproj @@ -8,9 +8,6 @@ true - - - @@ -21,10 +18,9 @@ + - - - +