-
Notifications
You must be signed in to change notification settings - Fork 17
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #437 from solliancenet/cj-vectorization-content-so…
…urces Content source retrieval and text extraction handler
- Loading branch information
Showing
50 changed files
with
1,295 additions
and
206 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
using System.Text; | ||
using System.Threading.Tasks; | ||
|
||
namespace FoundationaLLM.Common.Constants | ||
{ | ||
/// <summary> | ||
/// Name constants used to identify file extension. | ||
/// </summary> | ||
public static class FileExtensions | ||
{ | ||
/// <summary> | ||
/// File extension for text files. | ||
/// </summary> | ||
public const string Text = ".txt"; | ||
/// <summary> | ||
/// File extension for JSON files. | ||
/// </summary> | ||
public const string JSON = ".json"; | ||
/// <summary> | ||
/// File extension for Markdown files. | ||
/// </summary> | ||
public const string Markdown = ".md"; | ||
/// <summary> | ||
/// File extension for Microsoft Office Word files. | ||
/// </summary> | ||
public const string Word = ".docx"; | ||
/// <summary> | ||
/// File extension for Microsoft Office PowerPoint files. | ||
/// </summary> | ||
public const string PowerPoint = ".pptx"; | ||
/// <summary> | ||
/// File extension for Microsoft Office Excel files. | ||
/// </summary> | ||
public const string Excel = ".xlsx"; | ||
/// <summary> | ||
/// File extension for PDF files. | ||
/// </summary> | ||
public const string PDF = ".pdf"; | ||
} | ||
} |
38 changes: 38 additions & 0 deletions
38
src/dotnet/Common/Exceptions/ConfigurationValueException.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
using System.Text; | ||
using System.Threading.Tasks; | ||
|
||
namespace FoundationaLLM.Common.Exceptions | ||
{ | ||
/// <summary> | ||
/// Represents an error with a configuration value. | ||
/// </summary> | ||
public class ConfigurationValueException : Exception | ||
{ | ||
/// <summary> | ||
/// Initializes a new instance of the <see cref="ConfigurationValueException"/> class with a default message. | ||
/// </summary> | ||
public ConfigurationValueException() | ||
{ | ||
} | ||
|
||
/// <summary> | ||
/// Initializes a new instance of the <see cref="ConfigurationValueException"/> class with its message set to <paramref name="message"/>. | ||
/// </summary> | ||
/// <param name="message">A string that describes the error.</param> | ||
public ConfigurationValueException(string? message) : base(message) | ||
{ | ||
} | ||
|
||
/// <summary> | ||
/// Initializes a new instance of the <see cref="ConfigurationValueException"/> class with its message set to <paramref name="message"/>. | ||
/// </summary> | ||
/// <param name="message">A string that describes the error.</param> | ||
/// <param name="innerException">The exception that is the cause of the current exception.</param> | ||
public ConfigurationValueException(string? message, Exception? innerException) : base(message, innerException) | ||
{ | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
using System.Text; | ||
using System.Threading.Tasks; | ||
|
||
namespace FoundationaLLM.Common.Exceptions | ||
{ | ||
/// <summary> | ||
/// Represents an error with accessing content. | ||
/// </summary> | ||
public class ContentException : Exception | ||
{ | ||
/// <summary> | ||
/// Initializes a new instance of the <see cref="ContentException"/> class with a default message. | ||
/// </summary> | ||
public ContentException() | ||
{ | ||
} | ||
|
||
/// <summary> | ||
/// Initializes a new instance of the <see cref="ContentException"/> class with its message set to <paramref name="message"/>. | ||
/// </summary> | ||
/// <param name="message">A string that describes the error.</param> | ||
public ContentException(string? message) : base(message) | ||
{ | ||
} | ||
|
||
/// <summary> | ||
/// Initializes a new instance of the <see cref="ContentException"/> class with its message set to <paramref name="message"/>. | ||
/// </summary> | ||
/// <param name="message">A string that describes the error.</param> | ||
/// <param name="innerException">The exception that is the cause of the current exception.</param> | ||
public ContentException(string? message, Exception? innerException) : base(message, innerException) | ||
{ | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
using Azure; | ||
using Azure.Identity; | ||
using Azure.Storage; | ||
using Azure.Storage.Blobs; | ||
using Azure.Storage.Blobs.Models; | ||
using FoundationaLLM.Common.Exceptions; | ||
using FoundationaLLM.Common.Interfaces; | ||
using FoundationaLLM.Common.Settings; | ||
using Microsoft.Extensions.Logging; | ||
using Microsoft.Extensions.Options; | ||
using System.Text; | ||
|
||
namespace FoundationaLLM.Common.Services | ||
{ | ||
/// <summary> | ||
/// Provides access to Azure blob storage. | ||
/// </summary> | ||
/// <remarks> | ||
/// Initializes a new instance of the <see cref="BlobStorageService"/> with the specified options and logger. | ||
/// </remarks> | ||
/// <param name="options">The options object containing the <see cref="BlobStorageServiceSettings"/> object with the settings.</param> | ||
/// <param name="logger">The logger used for logging.</param> | ||
#pragma warning disable CS8618 // Non-nullable field must contain a non-null value when exiting constructor. Consider declaring as nullable. | ||
public class BlobStorageService( | ||
#pragma warning restore CS8618 // Non-nullable field must contain a non-null value when exiting constructor. Consider declaring as nullable. | ||
IOptions<BlobStorageServiceSettings> options, | ||
ILogger<BlobStorageService> logger) : StorageServiceBase(options, logger), IStorageService | ||
{ | ||
private BlobServiceClient _blobServiceClient; | ||
|
||
/// <inheritdoc/> | ||
public async Task<BinaryData> ReadFileAsync( | ||
string containerName, | ||
string filePath, | ||
CancellationToken cancellationToken = default) | ||
{ | ||
var containerClient = _blobServiceClient.GetBlobContainerClient(containerName); | ||
var blobClient = containerClient.GetBlobClient(filePath); | ||
|
||
try | ||
{ | ||
Response<BlobDownloadResult>? content = await blobClient.DownloadContentAsync(cancellationToken).ConfigureAwait(false); | ||
|
||
if (content != null && content.HasValue) | ||
{ | ||
return content.Value.Content; | ||
} | ||
|
||
throw new ContentException($"Cannot read file {filePath} from container {containerName}."); | ||
} | ||
catch (RequestFailedException e) when (e.Status == 404) | ||
{ | ||
_logger.LogWarning("File not found: {FilePath}", filePath); | ||
throw new ContentException("File not found.", e); | ||
} | ||
} | ||
|
||
/// <inheritdoc/> | ||
public async Task WriteFileAsync( | ||
string containerName, | ||
string filePath, | ||
Stream fileContent, | ||
CancellationToken cancellationToken = default) | ||
{ | ||
var containerClient = _blobServiceClient.GetBlobContainerClient(containerName); | ||
var blobClient = containerClient.GetBlobClient(filePath); | ||
|
||
fileContent.Seek(0, SeekOrigin.Begin); | ||
|
||
BlobUploadOptions options = new(); | ||
await blobClient.UploadAsync(fileContent, options, cancellationToken).ConfigureAwait(false); | ||
} | ||
|
||
/// <inheritdoc/> | ||
public async Task WriteFileAsync( | ||
string containerName, | ||
string filePath, | ||
string fileContent, | ||
CancellationToken cancellationToken = default) => | ||
await WriteFileAsync( | ||
containerName, | ||
filePath, | ||
new MemoryStream(Encoding.UTF8.GetBytes(fileContent)), | ||
cancellationToken).ConfigureAwait(false); | ||
|
||
/// <inheritdoc/> | ||
public async Task<bool> FileExistsAsync( | ||
string containerName, | ||
string filePath, | ||
CancellationToken cancellationToken = default) | ||
{ | ||
var containerClient = _blobServiceClient.GetBlobContainerClient(containerName); | ||
var blobClient = containerClient.GetBlobClient(filePath); | ||
|
||
return await blobClient.ExistsAsync(cancellationToken).ConfigureAwait(false); | ||
} | ||
|
||
/// <inheritdoc/> | ||
protected override void CreateClientFromAccountKey(string accountName, string accountKey) => | ||
_blobServiceClient = new BlobServiceClient( | ||
new Uri($"https://{accountName}.dfs.core.windows.net"), | ||
new StorageSharedKeyCredential(accountName, accountKey)); | ||
|
||
/// <inheritdoc/> | ||
protected override void CreateClientFromConnectionString(string connectionString) => | ||
_blobServiceClient = new BlobServiceClient(connectionString); | ||
|
||
/// <inheritdoc/> | ||
protected override void CreateClientFromIdentity(string accountName) => | ||
_blobServiceClient = new BlobServiceClient( | ||
new Uri($"https://{accountName}.dfs.core.windows.net"), | ||
new DefaultAzureCredential()); | ||
} | ||
} |
Oops, something went wrong.