Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Adapter based on OpenAI and PostgreSQL pgvector extension #137

Draft
wants to merge 1 commit into
base: 0.6
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions packages/seal-openai-postgresql-adapter/.gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
.gitattributes export-ignore
.gitignore export-ignore
composer.lock export-ignore
/Tests export-ignore
phpunit.xml.dist export-ignore
.php-cs-fixer.dist.php export-ignore
phpstan.neon export-ignore
rector.php export-ignore
2 changes: 2 additions & 0 deletions packages/seal-openai-postgresql-adapter/.github/FUNDING.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
github: [alexander-schranz]
custom: ["https://paypal.me/L91"]
8 changes: 8 additions & 0 deletions packages/seal-openai-postgresql-adapter/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
/vendor/
/composer.phar
/phpunit.xml
/.phpunit.result.cache
/Tests/var
/docker-compose.override.yml
/.php-cs-fixer.php
/.php-cs-fixer.cache
13 changes: 13 additions & 0 deletions packages/seal-openai-postgresql-adapter/.php-cs-fixer.dist.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
<?php

declare(strict_types=1);

$phpCsConfig = require(dirname(__DIR__, 2) . '/.php-cs-fixer.dist.php');

$finder = (new PhpCsFixer\Finder())
->in(__DIR__)
->ignoreVCSIgnored(true);

$phpCsConfig->setFinder($finder);

return $phpCsConfig->setFinder($finder);
21 changes: 21 additions & 0 deletions packages/seal-openai-postgresql-adapter/LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
MIT License

Copyright (c) 2023 Alexander Schranz

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
<?php

declare(strict_types=1);

namespace Schranz\Search\SEAL\Adapter\OpenAIPostgreSQL;

use OpenAI\Client;
use Schranz\Search\SEAL\Adapter\AdapterInterface;
use Schranz\Search\SEAL\Adapter\IndexerInterface;
use Schranz\Search\SEAL\Adapter\SchemaManagerInterface;
use Schranz\Search\SEAL\Adapter\SearcherInterface;

final class OpenAIPostgreSQLAdapter implements AdapterInterface
{
private readonly SchemaManagerInterface $schemaManager;

private readonly IndexerInterface $indexer;

private readonly SearcherInterface $searcher;

public function __construct(
Client $openAiClient,
\PDO $pdoClient,
?SchemaManagerInterface $schemaManager = null,
?IndexerInterface $indexer = null,
?SearcherInterface $searcher = null,
) {
$this->schemaManager = $schemaManager ?? new OpenAIPostgreSQLSchemaManager($openAiClient, $pdoClient);
$this->indexer = $indexer ?? new OpenAIPostgreSQLIndexer($openAiClient, $pdoClient);
$this->searcher = $searcher ?? new OpenAIPostgreSQLSearcher($openAiClient, $pdoClient);
}

public function getSchemaManager(): SchemaManagerInterface
{
return $this->schemaManager;
}

public function getIndexer(): IndexerInterface
{
return $this->indexer;
}

public function getSearcher(): SearcherInterface
{
return $this->searcher;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
<?php

declare(strict_types=1);

namespace Schranz\Search\SEAL\Adapter\OpenAIPostgreSQL;

use OpenAI;
use OpenAI\Client;
use Psr\Container\ContainerInterface;
use Schranz\Search\SEAL\Adapter\AdapterFactoryInterface;
use Schranz\Search\SEAL\Adapter\AdapterInterface;

/**
* @experimental
*/
class OpenAIPostgreSQLAdapterFactory implements AdapterFactoryInterface
{
public function __construct(
private readonly ?ContainerInterface $container = null,
) {
}

public function createAdapter(array $dsn): AdapterInterface
{
$openAIClient = $this->createOpenAiClient($dsn);
$pdoClient = $this->createPdoClient($dsn);

return new OpenAIPostgreSQLAdapter($openAIClient, $pdoClient);
}

/**
* @internal
*
* @param array{
* query: array<string, string>,
* } $dsn
*/
public function createOpenAiClient(array $dsn): Client
{
return OpenAI::client($dsn['query']['openai-api-key'] ?? '', $dsn['query']['openai-organisation'] ?? null);
}

/**
* @internal
*
* @param array{
* host: string,
* port?: int,
* user?: string,
* pass?: string,
* path?: string,
* } $dsn
*/
public function createPdoClient(array $dsn): \PDO
{
$host = $dsn['host'];
$port = $dsn['port'] ?? 5432;
$user = $dsn['user'] ?? '';
$pass = $dsn['pass'] ?? '';
$db = $dsn['path'] ?? 'search';

return new \PDO(
'pgsql:host=' . $host . ';port=' . $port . ';dbname=' . $db . ';',
$user,
$pass,
[\PDO::ATTR_ERRMODE => \PDO::ERRMODE_EXCEPTION]
);
}

public static function getName(): string
{
return 'openai-postgresql';
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
<?php

declare(strict_types=1);

namespace Schranz\Search\SEAL\Adapter\OpenAIPostgreSQL;

use OpenAI\Client;
use Schranz\Search\SEAL\Adapter\IndexerInterface;
use Schranz\Search\SEAL\Marshaller\FlattenMarshaller;
use Schranz\Search\SEAL\Schema\Index;
use Schranz\Search\SEAL\Task\SyncTask;
use Schranz\Search\SEAL\Task\TaskInterface;

final class OpenAIPostgreSQLIndexer implements IndexerInterface
{
private readonly FlattenMarshaller $marshaller;

public function __construct(
private readonly Client $openAiClient,
private readonly \PDO $pdoClient,
) {
$this->marshaller = new FlattenMarshaller();
}

public function save(Index $index, array $document, array $options = []): ?TaskInterface
{
$identifierField = $index->getIdentifierField();

/** @var string|int|null $identifier */
$identifier = $document[$identifierField->name] ?? null;

$document = $this->marshaller->marshall($index->fields, $document);

/** @var \PDOStatement $statement */
$statement = $this->pdoClient->prepare('INSERT INTO ' . $index->name . ' (identifier, document) VALUES (:identifier, :document)');
$statement->execute([
'identifier' => $identifier,
'document' => \json_encode($document, \JSON_THROW_ON_ERROR),
]);

if (true !== ($options['return_slow_promise_result'] ?? false)) {
return null;
}

return new SyncTask($document);
}

public function delete(Index $index, string $identifier, array $options = []): ?TaskInterface
{
/** @var \PDOStatement $statement */
$statement = $this->pdoClient->prepare('DELETE FROM ' . $index->name . ' WHERE identifier = :identifier');
$statement->execute([
'identifier' => $identifier,
]);

if (true !== ($options['return_slow_promise_result'] ?? false)) {
return null;
}

return new SyncTask(null);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
<?php

declare(strict_types=1);

namespace Schranz\Search\SEAL\Adapter\OpenAIPostgreSQL;

use OpenAI\Client;
use Schranz\Search\SEAL\Adapter\SchemaManagerInterface;
use Schranz\Search\SEAL\Schema\Index;
use Schranz\Search\SEAL\Task\SyncTask;
use Schranz\Search\SEAL\Task\TaskInterface;

final class OpenAIPostgreSQLSchemaManager implements SchemaManagerInterface
{
public function __construct(
private readonly Client $openAiClient,
private readonly \PDO $pdoClient,
) {
}

public function existIndex(Index $index): bool
{
$statement = $this->pdoClient->query(
<<<SQL
SELECT EXISTS (
SELECT FROM
pg_tables
WHERE
schemaname = 'public' AND
tablename = '{$index->name}'
);
SQL
);

/** @var bool $exists */
$exists = $statement->fetchColumn();

return $exists;
}

public function dropIndex(Index $index, array $options = []): ?TaskInterface
{
$this->pdoClient->exec(
<<<SQL
DROP TABLE {$index->name}
SQL
);

if (true !== ($options['return_slow_promise_result'] ?? false)) {
return null;
}

return new SyncTask(null);
}

public function createIndex(Index $index, array $options = []): ?TaskInterface
{
$this->pdoClient->exec(
<<<SQL
CREATE EXTENSION IF NOT EXISTS vector;
SQL
);

$this->pdoClient->exec(
<<<SQL
CREATE TABLE IF NOT EXISTS {$index->name} (
identifier VARCHAR(48) PRIMARY KEY,
document JSONB,
embedding vector(1536)
);
SQL
); // OpenAI's text-embedding-ada-002 model outputs 1536 dimensions, so we will use that for our vector size.

// TODO optimize index: https://github.com/pgvector/pgvector/tree/v0.4.1#indexing
// TODO make a filterable columns

if (true !== ($options['return_slow_promise_result'] ?? false)) {
return null;
}

return new SyncTask(null);
}
}
Loading