From cb20aee1cab810da52ea38cff3ba51a241cd6ffb Mon Sep 17 00:00:00 2001 From: Bohan Date: Tue, 11 Mar 2025 11:16:46 +0800 Subject: [PATCH 1/5] fix upload document for knowledgebase with file --- DifySharp.Demo.AspNet/Program.cs | 80 +++++- .../KnowledgeBaseApiTest/DocumentApiTest.cs | 263 ++++++++++-------- DifySharp/Apis/KnowledgeBaseApi.cs | 9 +- .../KnowledgeBase/Document/CreateByFile.cs | 7 +- DifySharp/DepedncyInjection.cs | 20 +- DifySharp/DifyClient/DifyClient.cs | 15 +- DifySharp/DifySharp.csproj | 9 + 7 files changed, 264 insertions(+), 139 deletions(-) diff --git a/DifySharp.Demo.AspNet/Program.cs b/DifySharp.Demo.AspNet/Program.cs index e7c610d..6d8feec 100644 --- a/DifySharp.Demo.AspNet/Program.cs +++ b/DifySharp.Demo.AspNet/Program.cs @@ -1,8 +1,11 @@ +using System.Text.Json; +using System.Text.Json.Serialization; using DifySharp; using DifySharp.Chat.ChatMessages; using DifySharp.Extensions; using DifySharp.KnowledgeBase; using DifySharp.KnowledgeBase.Document; +using WebApiClientCore.Parameters; var builder = WebApplication.CreateBuilder(args); @@ -17,13 +20,18 @@ var app = builder.Build(); -app.MapGet("/", async (IServiceProvider sp) => +var group = app.MapGroup("example"); +var knowledgeGroup = group.MapGroup("knowledge"); + +knowledgeGroup.MapGet("create_file_by_text", async (IServiceProvider sp) => { - var api = sp.GetRequiredKeyedService("knowledge"); // get client instance by name in configuration + var api = sp + .GetRequiredKeyedService("knowledge"); // get client instance by name in configuration var uuid = Guid.NewGuid().ToString("N")[..6]; - var response = await api.PostCreateDocumentByTextAsync("", // add a dataset id here + var response = await api.PostCreateDocumentByTextAsync( + "", // add a dataset id here new CreateByText.RequestBody( $"Test Document {uuid}", "Test Content", @@ -78,16 +86,74 @@ }; }); +knowledgeGroup.MapGet("create_file_by_file", async (IServiceProvider sp) => +{ + var api = sp + .GetRequiredKeyedService("knowledge"); // get client instance by name in configuration + + var uuid = Guid.NewGuid().ToString("N")[..6]; + + var tmpFile = Path.GetTempFileName(); + await File.WriteAllTextAsync(tmpFile, "Test Content"); + + var _defaultProcessRule = new ProcessRule( + "automatic", + new Rules( + [ + new PreProcessingRule( + "remove_extra_spaces", + true + ), + new PreProcessingRule( + "remove_urls_emails", + true + ) + ], + new Segmentation( + "\n\n", + 1000 + ), + "paragraph", + new SubChunkSegmentation( + "\n\n", + 1000, + 200 + ) + ) + ); + + // var dataJsonStr = + // "{\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}"; + + var data = new CreateByFile.Data( + null, + IndexingTechnique.Economy, + DocForm.TextModel, + "", + _defaultProcessRule + ); + + var response = await api.PostCreateDocumentByFileAsync( // add a dataset id here + "", data, new FormDataFile("")); + + var document = response.Document; + + return new + { + document + }; +}); + app.MapGet("/ChatApiDemo/ChatMessagesBlocking", async (IServiceProvider sp) => { // get chat client instance by name in configuration - var client = sp.GetRequiredKeyedService("chat"); - + var client = sp.GetRequiredKeyedService("chat"); + // send chat message in blocking mode var response = await client.PostChatMessageBlocking(new ChatMessage.RequestBody { - Query = "ping", - User = "test-user" + Query = "ping", + User = "test-user" }); return new diff --git a/DifySharp.Test/Apis/KnowledgeBaseApiTest/DocumentApiTest.cs b/DifySharp.Test/Apis/KnowledgeBaseApiTest/DocumentApiTest.cs index bcd046f..d6329d6 100644 --- a/DifySharp.Test/Apis/KnowledgeBaseApiTest/DocumentApiTest.cs +++ b/DifySharp.Test/Apis/KnowledgeBaseApiTest/DocumentApiTest.cs @@ -1,130 +1,169 @@ +using System.Text.Json; using DifySharp.KnowledgeBase; using DifySharp.KnowledgeBase.Dataset; using DifySharp.KnowledgeBase.Document; using JetBrains.Annotations; using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Logging; +using WebApiClientCore.Parameters; namespace DifySharp.Test.Apis.KnowledgeBaseApiTest; public class DocumentApiTestFixture : KnowledgeBaseApiTestFixture { - public Dataset Dataset { get; private set; } - public KnowledgeBaseClient Client { get; } - - public DocumentApiTestFixture() - { - Client = ServiceProvider.GetRequiredKeyedService("knowledge"); - - // create a dataset - var uuid = Guid.NewGuid().ToString("N")[..6]; - Dataset = - Client.PostCreateDatasetAsync(new Create.RequestBody(Name: $"Test Dataset {uuid}")) - .GetAwaiter() - .GetResult(); - } - - public override void Dispose() - { - Client.DeleteDataset(Dataset.Id).GetAwaiter().GetResult(); - Client.Dispose(); - base.Dispose(); - } + public Dataset Dataset { get; private set; } + public KnowledgeBaseClient Client { get; } + + public DocumentApiTestFixture() + { + Client = ServiceProvider.GetRequiredKeyedService("knowledge"); + + // create a dataset + var uuid = Guid.NewGuid().ToString("N")[..6]; + Dataset = + Client.PostCreateDatasetAsync(new Create.RequestBody(Name: $"Test Dataset {uuid}")) + .GetAwaiter() + .GetResult(); + } + + public override void Dispose() + { + Client.DeleteDataset(Dataset.Id).GetAwaiter().GetResult(); + Client.Dispose(); + base.Dispose(); + } } [TestSubject(typeof(IDocumentApi))] public class DocumentApiTest( - DocumentApiTestFixture fixture, - ILogger logger + DocumentApiTestFixture fixture, + ILogger logger ) : IClassFixture { - private static Document? Document { get; set; } - private Dataset Dataset => fixture.Dataset; - - private KnowledgeBaseClient Client => fixture.Client; - - [Fact, TestPriority(1)] - public async Task TestCreateDocumentByText_ShouldHaveDocumentInfoInResponse() - { - Assert.Null(Document); - - var uuid = Guid.NewGuid().ToString("N")[..6]; - - var response = await Client.PostCreateDocumentByTextAsync( - Dataset.Id, - new CreateByText.RequestBody( - $"Test Document {uuid}", - "Test Content", - IndexingTechnique.Economy, - DocForm.TextModel, - "", - new ProcessRule( - "automatic", - new Rules( - [ - new PreProcessingRule( - "remove_extra_spaces", - true - ), - new PreProcessingRule( - "remove_urls_emails", - true - ) - ], - new Segmentation( - "\n\n", - 1000 - ), - "paragraph", - new SubChunkSegmentation( - "\n\n", - 1000, - 200 - ) - ) - ), - new CreateByText.RetrievalModel( - CreateByText.SearchMethod.HybridSearch, - false, - new CreateByText.RerankingModel( - "", - "" - ), - 4, - false, - 0.9f - ), - "", - "" - )); - - - Assert.NotNull(response.Document); - Assert.NotEmpty(response.Document.Id); - Document = response.Document; - } - - [Fact, TestPriority(2)] - public async Task TestListDocument_ShouldContainsDocumentInDataset() - { - Assert.NotNull(Document); - - var response = await Client.GetDocuments(Dataset.Id); - - var documents = response.Data; - - Assert.Contains(documents, doc => doc.Id == Document.Id); - } - - [Fact, TestPriority(3)] - public async Task TestDeleteDocument_ShouldNotContainsDocumentInDataset() - { - Assert.NotNull(Document); - - var response = await Client.DeleteDocument(Dataset.Id, Document.Id); - Assert.Equal("success", response.Result); - - var documents = await Client.GetDocuments(Dataset.Id); - Assert.DoesNotContain(documents.Data, doc => doc.Id == Document.Id); - } + private static List Documents { get; set; } = []; + private Dataset Dataset => fixture.Dataset; + + private KnowledgeBaseClient Client => fixture.Client; + + private readonly ProcessRule _defaultProcessRule = new( + "automatic", + new Rules( + [ + new PreProcessingRule( + "remove_extra_spaces", + true + ), + new PreProcessingRule( + "remove_urls_emails", + true + ) + ], + new Segmentation( + "\n\n", + 1000 + ), + "paragraph", + new SubChunkSegmentation( + "\n\n", + 1000, + 200 + ) + ) + ); + + private readonly CreateByText.RetrievalModel _defaultRetrievalModel = new( + CreateByText.SearchMethod.HybridSearch, + false, + new CreateByText.RerankingModel( + "", + "" + ), + 4, + false, + 0.9f + ); + + [Fact, TestPriority(1)] + public async Task TestCreateDocumentByText_ShouldHaveDocumentInfoInResponse() + { + // Assert.Null(Document); + + var uuid = Guid.NewGuid().ToString("N")[..6]; + + var response = await Client.PostCreateDocumentByTextAsync( + Dataset.Id, + new CreateByText.RequestBody( + $"Test Document {uuid}", + "Test Content", + IndexingTechnique.Economy, + DocForm.TextModel, + "", + _defaultProcessRule, + _defaultRetrievalModel, + "", + "" + )); + + + Assert.NotNull(response.Document); + Assert.NotEmpty(response.Document.Id); + Documents.Add(response.Document); + } + + + [Fact, TestPriority(1)] + public async Task TestCreateDocumentByFile_shouldHaveDocumentInfoInResponse() + { + var uuid = Guid.NewGuid().ToString("N")[..6]; + + var tempFileName = $"Test Document {uuid}.md"; + var tempFilePath = Path.Combine(Path.GetTempPath(), tempFileName); + await File.WriteAllTextAsync(tempFilePath, "Test Content"); + await File.WriteAllTextAsync(tempFilePath, "Test Content2"); + await File.WriteAllTextAsync(tempFilePath, "Test Content3"); + + var data = new CreateByFile.Data( + null, + IndexingTechnique.Economy, + DocForm.TextModel, + "", + _defaultProcessRule + ); + var response = await Client.PostCreateDocumentByFileAsync( // tmpFilePath + Dataset.Id, data, new FormDataFile(tempFilePath)); + + Assert.NotNull(response.Document); + Assert.NotEmpty(response.Document.Id); + Documents.Add(response.Document); + } + + [Fact, TestPriority(2)] + public async Task TestListDocument_ShouldContainsDocumentInDataset() + { + var response = await Client.GetDocuments(Dataset.Id); + + var documents = response.Data; + + foreach (var doc in Documents) + { + Assert.Contains(documents, d => d.Id == doc.Id); + } + } + + [Fact, TestPriority(3)] + public async Task TestDeleteDocument_ShouldNotContainsDocumentInDataset() + { + foreach (var doc in Documents) + { + var response = await Client.DeleteDocument(Dataset.Id, doc.Id); + Assert.Equal("success", response.Result); + } + + var documents = await Client.GetDocuments(Dataset.Id); + + foreach (var doc in Documents) + { + Assert.DoesNotContain(documents.Data, d => d.Id == doc.Id); + } + } } \ No newline at end of file diff --git a/DifySharp/Apis/KnowledgeBaseApi.cs b/DifySharp/Apis/KnowledgeBaseApi.cs index e3a14bf..588229f 100644 --- a/DifySharp/Apis/KnowledgeBaseApi.cs +++ b/DifySharp/Apis/KnowledgeBaseApi.cs @@ -3,6 +3,7 @@ using DifySharp.KnowledgeBase.Dataset; using DifySharp.KnowledgeBase.Document; using WebApiClientCore.Attributes; +using WebApiClientCore.Parameters; namespace DifySharp.Apis { @@ -73,14 +74,14 @@ [JsonContent] CreateByText.RequestBody body /// 此接口基于已存在知识库,在此知识库的基础上通过文件创建新的文档 /// /// - /// + /// /// /// [HttpPost("/v1/datasets/{datasetId}/document/create_by_file")] public Task PostCreateDocumentByFileAsync( - string datasetId, - [FormContent] CreateByFile.RequestBody body, - FileInfo file + [PathQuery] string datasetId, + [JsonFormDataText] CreateByFile.Data data, + [Parameter(Kind.FormData)] FormDataFile file ); /// diff --git a/DifySharp/DTOs/KnowledgeBase/Document/CreateByFile.cs b/DifySharp/DTOs/KnowledgeBase/Document/CreateByFile.cs index 0e31fda..3604503 100644 --- a/DifySharp/DTOs/KnowledgeBase/Document/CreateByFile.cs +++ b/DifySharp/DTOs/KnowledgeBase/Document/CreateByFile.cs @@ -3,20 +3,21 @@ public record CreateByFile { public record RequestBody( - Data Data + Data Data, + string File ); /// /// /// /// - /// + /// /// /// /// public record Data( string? OriginalDocumentId, - IndexingTechnique IndexTechnique, + IndexingTechnique IndexingTechnique, DocForm DocForm, string DocLanguage, ProcessRule ProcessRule diff --git a/DifySharp/DepedncyInjection.cs b/DifySharp/DepedncyInjection.cs index d452cf3..4866b16 100644 --- a/DifySharp/DepedncyInjection.cs +++ b/DifySharp/DepedncyInjection.cs @@ -70,7 +70,7 @@ public static IServiceCollection AddDifySharp(this IServiceCollection services, return services; } - + private static IServiceCollection ConfigureDifyApi(this IServiceCollection services) where TApi : class { @@ -81,21 +81,27 @@ private static IServiceCollection ConfigureDifyApi(this IServiceCollection apiOptions.HttpHost = new Uri(difyOptions.BaseUrl); // 序列化配置 - apiOptions.JsonDeserializeOptions.PropertyNamingPolicy = JsonNamingPolicy.SnakeCaseLower; + apiOptions.JsonDeserializeOptions.PropertyNamingPolicy = JsonNamingPolicy.SnakeCaseLower; + apiOptions.JsonDeserializeOptions.DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull; apiOptions.JsonDeserializeOptions.Converters.Add( new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower)); - apiOptions.JsonSerializeOptions.PropertyNamingPolicy = JsonNamingPolicy.SnakeCaseLower; + apiOptions.JsonSerializeOptions.PropertyNamingPolicy = JsonNamingPolicy.SnakeCaseLower; + apiOptions.JsonSerializeOptions.DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull; apiOptions.JsonSerializeOptions.Converters.Add( new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower)); - apiOptions.KeyValueSerializeOptions.IgnoreNullValues = true; - apiOptions.UseLogging = true; + apiOptions.KeyValueSerializeOptions.IgnoreNullValues = true; + apiOptions.KeyValueSerializeOptions.PropertyNamingPolicy = JsonNamingPolicy.SnakeCaseLower; + apiOptions.KeyValueSerializeOptions.Converters.Add( + new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower)); + + apiOptions.UseLogging = difyOptions.EnableLogging; }).Services ; } - - + + // private static IServiceCollection ConfigureDifyKnowledgeApi(this IServiceCollection services) // { // services diff --git a/DifySharp/DifyClient/DifyClient.cs b/DifySharp/DifyClient/DifyClient.cs index 0df6511..23bcdac 100644 --- a/DifySharp/DifyClient/DifyClient.cs +++ b/DifySharp/DifyClient/DifyClient.cs @@ -9,6 +9,7 @@ using DifySharp.KnowledgeBase.Document; using DifySharp.Workflow.Run; using Microsoft.Extensions.DependencyInjection; +using WebApiClientCore.Parameters; using Basic = DifySharp.Workflow.Application.Basic; using Create = DifySharp.KnowledgeBase.Dataset.Create; using Delete = DifySharp.KnowledgeBase.Document.Delete; @@ -118,17 +119,19 @@ public async Task DeleteDataset(string datasetId) await Api.DeleteDataset(datasetId); } - public async Task PostCreateDocumentByTextAsync(string datasetId, - CreateByText.RequestBody body) + public async Task PostCreateDocumentByTextAsync( + string datasetId, + CreateByText.RequestBody body) { return await Api.PostCreateDocumentByTextAsync(datasetId, body); } - public async Task PostCreateDocumentByFileAsync(string datasetId, - CreateByFile.RequestBody body, - FileInfo file) + public async Task PostCreateDocumentByFileAsync( + string datasetId, + CreateByFile.Data data, + FormDataFile file) { - return await Api.PostCreateDocumentByFileAsync(datasetId, body, file); + return await Api.PostCreateDocumentByFileAsync(datasetId, data, file); } public async Task PostUpdateDocumentByTextAsync( diff --git a/DifySharp/DifySharp.csproj b/DifySharp/DifySharp.csproj index 9fab833..ec6d031 100644 --- a/DifySharp/DifySharp.csproj +++ b/DifySharp/DifySharp.csproj @@ -14,6 +14,8 @@ LICENSE C# SDK for dify true + true + $(NoWarn);1591 @@ -21,6 +23,13 @@ + + + true + lib\$(TargetFramework)\ + + + From ddbd4cbe68afda282355ac4ea09dc1656f271e7a Mon Sep 17 00:00:00 2001 From: Bohan Feng <45181245+fengb3@users.noreply.github.com> Date: Tue, 11 Mar 2025 13:31:40 +0800 Subject: [PATCH 2/5] Update version to 0.0.1-alpha3 --- DifySharp/DifySharp.csproj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DifySharp/DifySharp.csproj b/DifySharp/DifySharp.csproj index ec6d031..9b06739 100644 --- a/DifySharp/DifySharp.csproj +++ b/DifySharp/DifySharp.csproj @@ -4,7 +4,7 @@ net8.0 enable enable - 0.0.3-alpha2 + 0.0.3-alpha3 fengb3 https://github.com/fengb3/DifySharp.git https://github.com/fengb3/DifySharp From a5dd59f3ba5f478dcc1844c553a4aad2e1a181f5 Mon Sep 17 00:00:00 2001 From: Bohan Feng <45181245+fengb3@users.noreply.github.com> Date: Tue, 11 Mar 2025 13:39:05 +0800 Subject: [PATCH 3/5] merge into main From e561ff03ce7a914614a09da0ff922d3775b30fdd Mon Sep 17 00:00:00 2001 From: Bohan Date: Thu, 27 Mar 2025 00:44:01 +0800 Subject: [PATCH 4/5] fix chunking mode naming --- DifySharp/DTOs/KnowledgeBase/DocForm.cs | 2 +- DifySharp/DifySharp.csproj | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/DifySharp/DTOs/KnowledgeBase/DocForm.cs b/DifySharp/DTOs/KnowledgeBase/DocForm.cs index 0035955..c4bbbb7 100644 --- a/DifySharp/DTOs/KnowledgeBase/DocForm.cs +++ b/DifySharp/DTOs/KnowledgeBase/DocForm.cs @@ -13,7 +13,7 @@ public enum DocForm /// /// Parent-child mode /// - HierarchicalMode, + HierarchicalModel, /// /// Q and A Mode: Generates Q and A pairs for segmented documents and then embeds the questions diff --git a/DifySharp/DifySharp.csproj b/DifySharp/DifySharp.csproj index 9b06739..4f6cbec 100644 --- a/DifySharp/DifySharp.csproj +++ b/DifySharp/DifySharp.csproj @@ -4,7 +4,7 @@ net8.0 enable enable - 0.0.3-alpha3 + 0.0.3-alpha4 fengb3 https://github.com/fengb3/DifySharp.git https://github.com/fengb3/DifySharp From b27e6fdfbdc22aaf10b298b4a9f898fe91acc480 Mon Sep 17 00:00:00 2001 From: Bohan Date: Thu, 27 Mar 2025 01:08:00 +0800 Subject: [PATCH 5/5] fix naming --- DifySharp.Demo.AspNet/Program.cs | 4 ++-- DifySharp.Test/Apis/KnowledgeBaseApiTest/ChunkApiTest.cs | 2 +- DifySharp.Test/Apis/KnowledgeBaseApiTest/DocumentApiTest.cs | 2 +- DifySharp/DTOs/KnowledgeBase/ProcessRule.cs | 6 +++--- DifySharp/DifySharp.csproj | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/DifySharp.Demo.AspNet/Program.cs b/DifySharp.Demo.AspNet/Program.cs index 6d8feec..b288a2b 100644 --- a/DifySharp.Demo.AspNet/Program.cs +++ b/DifySharp.Demo.AspNet/Program.cs @@ -56,7 +56,7 @@ 1000 ), "paragraph", - new SubChunkSegmentation( + new SubchunkSegmentation( "\n\n", 1000, 200 @@ -114,7 +114,7 @@ 1000 ), "paragraph", - new SubChunkSegmentation( + new SubchunkSegmentation( "\n\n", 1000, 200 diff --git a/DifySharp.Test/Apis/KnowledgeBaseApiTest/ChunkApiTest.cs b/DifySharp.Test/Apis/KnowledgeBaseApiTest/ChunkApiTest.cs index 2483a30..684094e 100644 --- a/DifySharp.Test/Apis/KnowledgeBaseApiTest/ChunkApiTest.cs +++ b/DifySharp.Test/Apis/KnowledgeBaseApiTest/ChunkApiTest.cs @@ -57,7 +57,7 @@ public ChunkApiTestFixture() 1000 ), "paragraph", - new SubChunkSegmentation( + new SubchunkSegmentation( "\n\n", 1000, 200 diff --git a/DifySharp.Test/Apis/KnowledgeBaseApiTest/DocumentApiTest.cs b/DifySharp.Test/Apis/KnowledgeBaseApiTest/DocumentApiTest.cs index d6329d6..e9805d9 100644 --- a/DifySharp.Test/Apis/KnowledgeBaseApiTest/DocumentApiTest.cs +++ b/DifySharp.Test/Apis/KnowledgeBaseApiTest/DocumentApiTest.cs @@ -63,7 +63,7 @@ ILogger logger 1000 ), "paragraph", - new SubChunkSegmentation( + new SubchunkSegmentation( "\n\n", 1000, 200 diff --git a/DifySharp/DTOs/KnowledgeBase/ProcessRule.cs b/DifySharp/DTOs/KnowledgeBase/ProcessRule.cs index 1a74659..8dd90dd 100644 --- a/DifySharp/DTOs/KnowledgeBase/ProcessRule.cs +++ b/DifySharp/DTOs/KnowledgeBase/ProcessRule.cs @@ -21,12 +21,12 @@ Rules Rules /// paragraph : paragraph retrieval /// /// -/// (object) Child chunk rules +/// (object) Child chunk rules public record Rules( ICollection PreProcessingRules, Segmentation Segmentation, string ParentMode, - SubChunkSegmentation SubChunkSegmentation + SubchunkSegmentation SubchunkSegmentation ); /// @@ -60,7 +60,7 @@ int MaxTokens /// Segmentation identifier. Currently, only one delimiter is allowed. The default is *** /// The maximum length (tokens) must be validated to be shorter than the length of the parent chunk /// Define the overlap between adjacent chunks (optional) -public record SubChunkSegmentation( +public record SubchunkSegmentation( string Separator, int MaxTokens, int? ChunkOverlap diff --git a/DifySharp/DifySharp.csproj b/DifySharp/DifySharp.csproj index 4f6cbec..e9cda08 100644 --- a/DifySharp/DifySharp.csproj +++ b/DifySharp/DifySharp.csproj @@ -4,7 +4,7 @@ net8.0 enable enable - 0.0.3-alpha4 + 0.0.3-alpha5 fengb3 https://github.com/fengb3/DifySharp.git https://github.com/fengb3/DifySharp