Skip to content

Commit

Permalink
Merge pull request #437 from solliancenet/cj-vectorization-content-so…
Browse files Browse the repository at this point in the history
…urces

Content source retrieval and text extraction handler
  • Loading branch information
joelhulen authored Jan 12, 2024
2 parents 5c48bf6 + f7e35b0 commit 238b9fd
Show file tree
Hide file tree
Showing 50 changed files with 1,295 additions and 206 deletions.
2 changes: 2 additions & 0 deletions src/dotnet/Common/Common.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Azure.Identity" Version="1.10.4" />
<PackageReference Include="Azure.Search.Documents" Version="11.5.0-alpha.20230531.1" />
<PackageReference Include="Azure.Storage.Files.DataLake" Version="12.17.1" />
<PackageReference Include="Newtonsoft.Json" Version="13.0.3" />
<PackageReference Include="Asp.Versioning.Http" Version="7.1.0" />
<PackageReference Include="Asp.Versioning.Mvc.ApiExplorer" Version="7.1.0" />
Expand Down
24 changes: 16 additions & 8 deletions src/dotnet/Common/Constants/AppConfigurationKeys.cs
Original file line number Diff line number Diff line change
Expand Up @@ -630,9 +630,13 @@ public static class AppConfigurationKeys
/// </summary>
public const string FoundationaLLM_SemanticKernelAPI_OpenAI_ShortSummaryPromptName = "FoundationaLLM:SemanticKernelAPI:OpenAI.ShortSummaryPromptName";
/// <summary>
/// The key for the FoundationaLLM:Vectorization:WorkerSettings app configuration setting.
/// The key section for the FoundationaLLM:Vectorization:ContentSourceManagerService app configuration setting.
/// </summary>
public const string FoundationaLLM_Vectorization_WorkerSettings = "FoundationaLLM:Vectorization:WorkerSettings";
public const string FoundationaLLM_Vectorization_ContentSourceManagerService = "FoundationaLLM:Vectorization:ContentSourceManagerService";
/// <summary>
/// The key section for the FoundationaLLM:Vectorization:VectorizationWorker app configuration setting.
/// </summary>
public const string FoundationaLLM_Vectorization_VectorizationWorker = "FoundationaLLM:Vectorization:VectorizationWorker";
}

/// <summary>
Expand Down Expand Up @@ -780,16 +784,20 @@ public static class AppConfigurationKeySections
/// </summary>
public const string FoundationaLLM_BlobStorageMemorySource = "FoundationaLLM:BlobStorageMemorySource";
/// <summary>
/// The key section for the FoundationaLLM:Vectorization:Queues app configuration settings.
/// The key section for the FoundationaLLM:Vectorization:ContentSources app configuration settings.
/// </summary>
public const string FoundationaLLM_Vectorization_Queues = "FoundationaLLM:Vectorization:Queues";
public const string FoundationaLLM_Vectorization_ContentSources = "FoundationaLLM:Vectorization:ContentSources";
/// <summary>
/// The key section for the FoundationaLLM:Vectorization:StateServiceSettings app configuration settings.
/// The key section for the FoundationaLLM:Vectorization:Steps app configuration settings.
/// </summary>
public const string FoundationaLLM_Vectorization_StateServiceSettings = "FoundationaLLM:Vectorization:StateServiceSettings";
public const string FoundationaLLM_Vectorization_Steps = "FoundationaLLM:Vectorization:Steps";
/// <summary>
/// The key section for the FoundationaLLM:Vectorization:Queues app configuration settings.
/// </summary>
public const string FoundationaLLM_Vectorization_Queues = "FoundationaLLM:Vectorization:Queues";
/// <summary>
/// The key section for the FoundationaLLM:Vectorization:WorkerSettings app configuration settings.
/// The key section for the FoundationaLLM:Vectorization:StateService app configuration settings.
/// </summary>
public const string FoundationaLLM_Vectorization_WorkerSettings = "FoundationaLLM:Vectorization:WorkerSettings";
public const string FoundationaLLM_Vectorization_StateService = "FoundationaLLM:Vectorization:StateService";
}
}
43 changes: 43 additions & 0 deletions src/dotnet/Common/Constants/FileExtensions.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace FoundationaLLM.Common.Constants
{
/// <summary>
/// Name constants used to identify file extension.
/// </summary>
public static class FileExtensions
{
/// <summary>
/// File extension for text files.
/// </summary>
public const string Text = ".txt";
/// <summary>
/// File extension for JSON files.
/// </summary>
public const string JSON = ".json";
/// <summary>
/// File extension for Markdown files.
/// </summary>
public const string Markdown = ".md";
/// <summary>
/// File extension for Microsoft Office Word files.
/// </summary>
public const string Word = ".docx";
/// <summary>
/// File extension for Microsoft Office PowerPoint files.
/// </summary>
public const string PowerPoint = ".pptx";
/// <summary>
/// File extension for Microsoft Office Excel files.
/// </summary>
public const string Excel = ".xlsx";
/// <summary>
/// File extension for PDF files.
/// </summary>
public const string PDF = ".pdf";
}
}
38 changes: 38 additions & 0 deletions src/dotnet/Common/Exceptions/ConfigurationValueException.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace FoundationaLLM.Common.Exceptions
{
/// <summary>
/// Represents an error with a configuration value.
/// </summary>
public class ConfigurationValueException : Exception
{
/// <summary>
/// Initializes a new instance of the <see cref="ConfigurationValueException"/> class with a default message.
/// </summary>
public ConfigurationValueException()
{
}

/// <summary>
/// Initializes a new instance of the <see cref="ConfigurationValueException"/> class with its message set to <paramref name="message"/>.
/// </summary>
/// <param name="message">A string that describes the error.</param>
public ConfigurationValueException(string? message) : base(message)
{
}

/// <summary>
/// Initializes a new instance of the <see cref="ConfigurationValueException"/> class with its message set to <paramref name="message"/>.
/// </summary>
/// <param name="message">A string that describes the error.</param>
/// <param name="innerException">The exception that is the cause of the current exception.</param>
public ConfigurationValueException(string? message, Exception? innerException) : base(message, innerException)
{
}
}
}
38 changes: 38 additions & 0 deletions src/dotnet/Common/Exceptions/ContentException.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace FoundationaLLM.Common.Exceptions
{
/// <summary>
/// Represents an error with accessing content.
/// </summary>
public class ContentException : Exception
{
/// <summary>
/// Initializes a new instance of the <see cref="ContentException"/> class with a default message.
/// </summary>
public ContentException()
{
}

/// <summary>
/// Initializes a new instance of the <see cref="ContentException"/> class with its message set to <paramref name="message"/>.
/// </summary>
/// <param name="message">A string that describes the error.</param>
public ContentException(string? message) : base(message)
{
}

/// <summary>
/// Initializes a new instance of the <see cref="ContentException"/> class with its message set to <paramref name="message"/>.
/// </summary>
/// <param name="message">A string that describes the error.</param>
/// <param name="innerException">The exception that is the cause of the current exception.</param>
public ContentException(string? message, Exception? innerException) : base(message, innerException)
{
}
}
}
37 changes: 37 additions & 0 deletions src/dotnet/Common/Interfaces/IStorageService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,42 @@ namespace FoundationaLLM.Common.Interfaces
/// </summary>
public interface IStorageService
{
/// <summary>
/// Reads the binary content of a specified file from the storage.
/// </summary>
/// <param name="containerName">The name of the container where the file is located.</param>
/// <param name="filePath">The path of the file to read.</param>
/// <param name="cancellationToken">The cancellation token that signals that operations should be cancelled.</param>
/// <returns>The binary content of the file.</returns>
Task<BinaryData> ReadFileAsync(string containerName, string filePath, CancellationToken cancellationToken);

/// <summary>
/// Writes the binary content to a specified file from the storage.
/// </summary>
/// <param name="containerName">The name of the container where the file is located.</param>
/// <param name="filePath">The path of the file to read.</param>
/// <param name="fileContent">The binary content written to the file.</param>
/// <param name="cancellationToken">The cancellation token that signals that operations should be cancelled.</param>
/// <returns></returns>
Task WriteFileAsync(string containerName, string filePath, Stream fileContent, CancellationToken cancellationToken);

/// <summary>
/// Writes the string content to a specified file from the storage.
/// </summary>
/// <param name="containerName">The name of the container where the file is located.</param>
/// <param name="filePath">The path of the file to read.</param>
/// <param name="fileContent">The string content written to the file.</param>
/// <param name="cancellationToken">The cancellation token that signals that operations should be cancelled.</param>
/// <returns></returns>
Task WriteFileAsync(string containerName, string filePath, string fileContent, CancellationToken cancellationToken);

/// <summary>
/// Checks if a file exists on the storage.
/// </summary>
/// <param name="containerName">The name of the container where the file is located.</param>
/// <param name="filePath">The path of the file to read.</param>
/// <param name="cancellationToken">The cancellation token that signals that operations should be cancelled.</param>
/// <returns></returns>
Task<bool> FileExistsAsync(string containerName, string filePath, CancellationToken cancellationToken);
}
}
22 changes: 11 additions & 11 deletions src/dotnet/Common/Models/ModelRegistry.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,42 +11,42 @@ public class ModelRegistry
/// <summary>
/// Dictionary of model names and their corresponding entries in the registry.
/// </summary>
public static Dictionary<string, ModelRegistryEntry> Models = new Dictionary<string, ModelRegistryEntry>
{
public static readonly Dictionary<string, ModelRegistryEntry> Models = new()
{
{
nameof(Customer),
new ModelRegistryEntry
{
Type = typeof(Customer),
TypeMatchingProperties = new List<string> { "customerId", "firstName" },
NamingProperties = new List<string> { "firstName", "lastName" },
TypeMatchingProperties = ["customerId", "firstName"],
NamingProperties = ["firstName", "lastName"],
}
},
{
nameof(Product),
new ModelRegistryEntry
{
Type = typeof(Product),
TypeMatchingProperties = new List<string> { "sku" },
NamingProperties = new List<string> { "name" }
TypeMatchingProperties = ["sku"],
NamingProperties = ["name"]
}
},
{
nameof(SalesOrder),
new ModelRegistryEntry
{
Type = typeof(SalesOrder),
TypeMatchingProperties = new List<string> { "orderDate", "shipDate" },
NamingProperties = new List<string> { "id" }
TypeMatchingProperties = ["orderDate", "shipDate"],
NamingProperties = ["id"]
}
},
{
nameof(ShortTermMemory),
new ModelRegistryEntry
{
Type = typeof(ShortTermMemory),
TypeMatchingProperties = new List<string> { "memory__" },
NamingProperties = new List<string>()
TypeMatchingProperties = ["memory__"],
NamingProperties = []
}
}
};
Expand All @@ -61,7 +61,7 @@ public class ModelRegistry
var result = ModelRegistry
.Models
.Select(m => m.Value)
.SingleOrDefault(x => objProps.Intersect(x.TypeMatchingProperties!).Count() == x.TypeMatchingProperties!.Count());
.SingleOrDefault(x => objProps.Intersect(x.TypeMatchingProperties!).Count() == x.TypeMatchingProperties!.Count);

return result;
}
Expand Down
114 changes: 114 additions & 0 deletions src/dotnet/Common/Services/BlobStorageService.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
using Azure;
using Azure.Identity;
using Azure.Storage;
using Azure.Storage.Blobs;
using Azure.Storage.Blobs.Models;
using FoundationaLLM.Common.Exceptions;
using FoundationaLLM.Common.Interfaces;
using FoundationaLLM.Common.Settings;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using System.Text;

namespace FoundationaLLM.Common.Services
{
/// <summary>
/// Provides access to Azure blob storage.
/// </summary>
/// <remarks>
/// Initializes a new instance of the <see cref="BlobStorageService"/> with the specified options and logger.
/// </remarks>
/// <param name="options">The options object containing the <see cref="BlobStorageServiceSettings"/> object with the settings.</param>
/// <param name="logger">The logger used for logging.</param>
#pragma warning disable CS8618 // Non-nullable field must contain a non-null value when exiting constructor. Consider declaring as nullable.
public class BlobStorageService(
#pragma warning restore CS8618 // Non-nullable field must contain a non-null value when exiting constructor. Consider declaring as nullable.
IOptions<BlobStorageServiceSettings> options,
ILogger<BlobStorageService> logger) : StorageServiceBase(options, logger), IStorageService
{
private BlobServiceClient _blobServiceClient;

/// <inheritdoc/>
public async Task<BinaryData> ReadFileAsync(
string containerName,
string filePath,
CancellationToken cancellationToken = default)
{
var containerClient = _blobServiceClient.GetBlobContainerClient(containerName);
var blobClient = containerClient.GetBlobClient(filePath);

try
{
Response<BlobDownloadResult>? content = await blobClient.DownloadContentAsync(cancellationToken).ConfigureAwait(false);

if (content != null && content.HasValue)
{
return content.Value.Content;
}

throw new ContentException($"Cannot read file {filePath} from container {containerName}.");
}
catch (RequestFailedException e) when (e.Status == 404)
{
_logger.LogWarning("File not found: {FilePath}", filePath);
throw new ContentException("File not found.", e);
}
}

/// <inheritdoc/>
public async Task WriteFileAsync(
string containerName,
string filePath,
Stream fileContent,
CancellationToken cancellationToken = default)
{
var containerClient = _blobServiceClient.GetBlobContainerClient(containerName);
var blobClient = containerClient.GetBlobClient(filePath);

fileContent.Seek(0, SeekOrigin.Begin);

BlobUploadOptions options = new();
await blobClient.UploadAsync(fileContent, options, cancellationToken).ConfigureAwait(false);
}

/// <inheritdoc/>
public async Task WriteFileAsync(
string containerName,
string filePath,
string fileContent,
CancellationToken cancellationToken = default) =>
await WriteFileAsync(
containerName,
filePath,
new MemoryStream(Encoding.UTF8.GetBytes(fileContent)),
cancellationToken).ConfigureAwait(false);

/// <inheritdoc/>
public async Task<bool> FileExistsAsync(
string containerName,
string filePath,
CancellationToken cancellationToken = default)
{
var containerClient = _blobServiceClient.GetBlobContainerClient(containerName);
var blobClient = containerClient.GetBlobClient(filePath);

return await blobClient.ExistsAsync(cancellationToken).ConfigureAwait(false);
}

/// <inheritdoc/>
protected override void CreateClientFromAccountKey(string accountName, string accountKey) =>
_blobServiceClient = new BlobServiceClient(
new Uri($"https://{accountName}.dfs.core.windows.net"),
new StorageSharedKeyCredential(accountName, accountKey));

/// <inheritdoc/>
protected override void CreateClientFromConnectionString(string connectionString) =>
_blobServiceClient = new BlobServiceClient(connectionString);

/// <inheritdoc/>
protected override void CreateClientFromIdentity(string accountName) =>
_blobServiceClient = new BlobServiceClient(
new Uri($"https://{accountName}.dfs.core.windows.net"),
new DefaultAzureCredential());
}
}
Loading

0 comments on commit 238b9fd

Please sign in to comment.