From 68bf89b01b1c5123e37a42f34c537f7280b22bb0 Mon Sep 17 00:00:00 2001 From: Mateusz Charytoniuk <mateusz.charytoniuk@protonmail.com> Date: Thu, 18 Jan 2024 22:02:15 +0100 Subject: [PATCH] feat: llama.cpp integration --- config.ini.example | 8 +- docs/pages/docs/changelog/index.md | 4 + docs/pages/docs/features/ai/index.md | 11 + .../pages/docs/features/ai/llama-cpp/index.md | 73 ++++++ .../tutorials/basic-graphql-schema/index.md | 2 +- .../tutorials/connect-to-llama-cpp/index.md | 144 +++++++++++ docs/pages/tutorials/hello-world/index.md | 2 +- ...llamaGenerate.php => LlamaCppGenerate.php} | 25 +- src/Command/LlamaCppGenerate/Completion.php | 32 +++ src/Command/LlamaCppGenerate/Embedding.php | 41 +++ src/Command/LlamaCppHealth.php | 34 +++ src/Command/LlamaCppInfill.php | 44 ++++ src/Command/OllamaChat.php | 71 ------ src/Command/OllamaGenerate/Completion.php | 33 --- src/Command/OllamaGenerate/Embedding.php | 51 ---- src/JsonSerializer.php | 19 +- src/LlamaCppClient.php | 233 ++++++++++++++++++ src/LlamaCppCompletionRequest.php | 26 ++ src/LlamaCppCompletionToken.php | 20 ++ ...guration.php => LlamaCppConfiguration.php} | 3 +- src/LlamaCppEmbedding.php | 15 ++ ...ssage.php => LlamaCppEmbeddingRequest.php} | 10 +- src/LlamaCppHealthStatus.php | 12 + src/LlamaCppInfill.php | 21 ++ src/LlamaCppInfillRequest.php | 27 ++ ...inkBuilder.php => LlamaCppLinkBuilder.php} | 4 +- src/OllamaChatRequest.php | 30 --- src/OllamaChatRole.php | 12 - src/OllamaChatSession.php | 41 --- src/OllamaChatToken.php | 21 -- src/OllamaClient.php | 184 -------------- src/OllamaCompletionRequest.php | 32 --- src/OllamaCompletionToken.php | 21 -- src/OllamaEmbeddingRequest.php | 27 -- src/OllamaEmbeddingResponse.php | 20 -- src/OllamaRequestOptions.php | 27 -- src/OllamaRequestStopDelimiter.php | 23 -- ....php => LlamaCppConfigurationProvider.php} | 22 +- 38 files changed, 786 insertions(+), 639 deletions(-) create mode 100644 docs/pages/docs/features/ai/index.md create mode 100644 docs/pages/docs/features/ai/llama-cpp/index.md create mode 100644 docs/pages/tutorials/connect-to-llama-cpp/index.md rename src/Command/{OllamaGenerate.php => LlamaCppGenerate.php} (52%) create mode 100644 src/Command/LlamaCppGenerate/Completion.php create mode 100644 src/Command/LlamaCppGenerate/Embedding.php create mode 100644 src/Command/LlamaCppHealth.php create mode 100644 src/Command/LlamaCppInfill.php delete mode 100644 src/Command/OllamaChat.php delete mode 100644 src/Command/OllamaGenerate/Completion.php delete mode 100644 src/Command/OllamaGenerate/Embedding.php create mode 100644 src/LlamaCppClient.php create mode 100644 src/LlamaCppCompletionRequest.php create mode 100644 src/LlamaCppCompletionToken.php rename src/{OllamaConfiguration.php => LlamaCppConfiguration.php} (74%) create mode 100644 src/LlamaCppEmbedding.php rename src/{OllamaChatMessage.php => LlamaCppEmbeddingRequest.php} (53%) create mode 100644 src/LlamaCppHealthStatus.php create mode 100644 src/LlamaCppInfill.php create mode 100644 src/LlamaCppInfillRequest.php rename src/{OllamaLinkBuilder.php => LlamaCppLinkBuilder.php} (82%) delete mode 100644 src/OllamaChatRequest.php delete mode 100644 src/OllamaChatRole.php delete mode 100644 src/OllamaChatSession.php delete mode 100644 src/OllamaChatToken.php delete mode 100644 src/OllamaClient.php delete mode 100644 src/OllamaCompletionRequest.php delete mode 100644 src/OllamaCompletionToken.php delete mode 100644 src/OllamaEmbeddingRequest.php delete mode 100644 src/OllamaEmbeddingResponse.php delete mode 100644 src/OllamaRequestOptions.php delete mode 100644 src/OllamaRequestStopDelimiter.php rename src/SingletonProvider/ConfigurationProvider/{OllamaConfigurationProvider.php => LlamaCppConfigurationProvider.php} (64%) diff --git a/config.ini.example b/config.ini.example index cdb398fb..61f30836 100644 --- a/config.ini.example +++ b/config.ini.example @@ -15,14 +15,14 @@ default[log_queries] = false default[pool_prefill] = false default[pool_size] = 8 +[llamacpp] +host = 127.0.0.1 +port = 8081 + [manifest] background_color = "#ffffff" theme_color = "#ffffff" -[ollama] -host = 127.0.0.1 -port = 11434 - [redis] default[db_index] = 0 default[host] = 127.0.0.1 diff --git a/docs/pages/docs/changelog/index.md b/docs/pages/docs/changelog/index.md index b702a1cb..a7445a8c 100644 --- a/docs/pages/docs/changelog/index.md +++ b/docs/pages/docs/changelog/index.md @@ -10,6 +10,10 @@ title: Changelog # Changelog +## v0.14.0 + +- Feature: added {{docs/features/ai/llama-cpp/index}} to integrate with LLMs + ## v0.11.1 - Fix: translation files were incorrectly loaded diff --git a/docs/pages/docs/features/ai/index.md b/docs/pages/docs/features/ai/index.md new file mode 100644 index 00000000..4413ef18 --- /dev/null +++ b/docs/pages/docs/features/ai/index.md @@ -0,0 +1,11 @@ +--- +collections: + - documents +layout: dm:document +parent: docs/features/index +title: AI +description: > + Use integration features to serve or use AI models. +--- + +{{docs/features/ai/*/index}} diff --git a/docs/pages/docs/features/ai/llama-cpp/index.md b/docs/pages/docs/features/ai/llama-cpp/index.md new file mode 100644 index 00000000..faa10672 --- /dev/null +++ b/docs/pages/docs/features/ai/llama-cpp/index.md @@ -0,0 +1,73 @@ +--- +collections: + - documents +layout: dm:document +parent: docs/features/ai/index +title: llama.cpp +description: > + Use Resonance to connect with llama.cpp server. +--- + +## llama.cpp + +[llama.cpp](https://github.com/ggerganov/llama.cpp) is an open source framework +capable of running various LLM models. + +It has a built-in HTTP server that supports continuous batching, parallel +requests and is optimized for resouces usage. + +You can use Resonance to connect with it and process LLM responses. + +# Usage + +You can also check the tutorial: {{tutorials/connect-to-llama-cpp/index}} + +## Configuration + +All you need to do is add a configuration section that specifies the llama.cpp +server location: + +```ini +[llamacpp] +host = 127.0.0.1 +port = 8081 +``` + +## Programmatic Use + +In your class, you need to use {{docs/features/dependency-injection/index}} to +inject `LlamaCppClient`: + +```php +<?php + +namespace App; + +use Distantmagic\Resonance\LlamaCppClient; +use Distantmagic\Resonance\LlamaCppCompletionRequest; + +#[Singleton] +class LlamaCppGenerate +{ + public function __construct(protected LlamaCppClient $llamaCppClient) + { + } + + public function doSomething(): void + { + $request = new LlamaCppCompletionRequest('How to make a cat happy?'); + + $completion = $this->llamaCppClient->generateCompletion($request); + + // each token is a chunk of text, usually few-several letters returned + // from the model you are using + foreach ($completion as $token) { + swoole_error_log(SWOOLE_LOG_DEBUG, (string) $token); + + if ($token->isLast) { + // ...do something else + } + } + } +} +``` diff --git a/docs/pages/tutorials/basic-graphql-schema/index.md b/docs/pages/tutorials/basic-graphql-schema/index.md index 12538ef4..d6042c24 100644 --- a/docs/pages/tutorials/basic-graphql-schema/index.md +++ b/docs/pages/tutorials/basic-graphql-schema/index.md @@ -3,7 +3,7 @@ collections: - tutorials layout: dm:tutorial parent: tutorials/index -title: Basic GraphQL Schema +title: Building a Basic GraphQL Schema description: > Learn How to Build a Basic GraphQL Schema --- diff --git a/docs/pages/tutorials/connect-to-llama-cpp/index.md b/docs/pages/tutorials/connect-to-llama-cpp/index.md new file mode 100644 index 00000000..26041365 --- /dev/null +++ b/docs/pages/tutorials/connect-to-llama-cpp/index.md @@ -0,0 +1,144 @@ +--- +collections: + - tutorials +layout: dm:tutorial +parent: tutorials/index +title: How to Serve LLM Completions (With llama.cpp) +description: > + How to connect with llama.cpp and issue parallel requests for LLM + completions and embeddings with Resonance. +--- + +## Preparations + +To start, you need to compile +[llama.cpp](https://github.com/ggerganov/llama.cpp). You can follow their +[README](https://github.com/ggerganov/llama.cpp/blob/master/README.md) for +instructions. + +The server is compiled alongside other targets by default. + +Once you have the server running, we can continue. + +## Troubleshooting + +### Obtaining Open-Source LLM + +I recommend starting either with [llama2](https://ai.meta.com/llama/) or +[Mistral](https://mistral.ai/). You need to download the pretrained weights +and convert them into GGUF format before they can be used with +[llama.cpp](https://github.com/ggerganov/llama.cpp). + +### Starting Server Without a GPU + +[llama.cpp](https://github.com/ggerganov/llama.cpp) supports CPU-only setups, +so you don't have to do any additional configuration. It will be slow, but +you will still have tokens generated. + +### Running With a Low VRAM Memory + +You can try quantization if you don't have enough VRAM on your GPU to run a +specific model. That lowers the response quality and the memory the model +needs to use. Llama.cpp has a utility to quantize models: + +```shell +$ ./quantize ./models/7B/ggml-model-f16.gguf ./models/7B/ggml-model-q4_0.gguf q4_0 +``` + +10GB of VRAM is enough to run most quantized models. + +## Starting llama.cpp Server + +While writing this tutorial, I had a server started with a command: + +```shell +$ ./server + --model ~/llama-2-7b-chat/ggml-model-q4_0.gguf + --n-gpu-layers 200000 + --ctx-size 2048 + --parallel 8 + --cont-batching + --mlock + --port 8081 +``` + +`cont-batching` parameter is essential, because it enables +continuous batching, which is an optimization technique that allows parallel +request. + +Without it, even with multiple `parallel` slots, the server could +answer to only one request at a time. `cont-batching` allows the server to +respond +to multiple completion requests in parallel. + +## Configuring Resonance + +All you need to do is add a configuration section that specifies the llama.cpp +server location: + +```ini +[llamacpp] +host = 127.0.0.1 +port = 8081 +``` + +## Testing + +Resonance has built-in commands that connect to llama.cpp and issue requests. +You can send a sample prompt through `llamacpp:completion`: + +```shell +$ php ./bin/resonance.php llamacpp:completion "How to write a 'Hello, world' in PHP?" +To write a "Hello, world" in PHP, you can use the following code: + +<?php + echo "Hello, world!"; +?> + +This will produce a simple "Hello, world!" message when executed. +``` + +## Programmatic Use + +In your class, you need to use {{docs/features/dependency-injection/index}} to +inject `LlamaCppClient`: + +```php +<?php + +namespace App; + +use Distantmagic\Resonance\LlamaCppClient; +use Distantmagic\Resonance\LlamaCppCompletionRequest; + +#[Singleton] +class LlamaCppGenerate +{ + public function __construct(protected LlamaCppClient $llamaCppClient) + { + } + + public function doSomething(): void + { + $request = new LlamaCppCompletionRequest('How to make a cat happy?'); + + $completion = $this->llamaCppClient->generateCompletion($request); + + // each token is a chunk of text, usually few-several letters returned + // from the model you are using + foreach ($completion as $token) { + swoole_error_log(SWOOLE_LOG_DEBUG, (string) $token); + + if ($token->isLast) { + // ...do something else + } + } + } +} +``` + +## Summary + +In this tutorial, we went through how to start +[llama.cpp](https://github.com/ggerganov/llama.cpp) server and connect to it +with Resonance. diff --git a/docs/pages/tutorials/hello-world/index.md b/docs/pages/tutorials/hello-world/index.md index 268fc59b..696d334e 100644 --- a/docs/pages/tutorials/hello-world/index.md +++ b/docs/pages/tutorials/hello-world/index.md @@ -3,7 +3,7 @@ collections: - tutorials layout: dm:tutorial parent: tutorials/index -title: Hello, World! +title: "'Hello, World' with Resonance" description: > Let's walk step by step through the basic Resonance project. --- diff --git a/src/Command/OllamaGenerate.php b/src/Command/LlamaCppGenerate.php similarity index 52% rename from src/Command/OllamaGenerate.php rename to src/Command/LlamaCppGenerate.php index 9cbc8bb1..681b789e 100644 --- a/src/Command/OllamaGenerate.php +++ b/src/Command/LlamaCppGenerate.php @@ -5,19 +5,18 @@ declare(strict_types=1); namespace Distantmagic\Resonance\Command; use Distantmagic\Resonance\CoroutineCommand; -use Distantmagic\Resonance\OllamaClient; +use Distantmagic\Resonance\LlamaCppClient; use Distantmagic\Resonance\SwooleConfiguration; use Symfony\Component\Console\Input\InputArgument; use Symfony\Component\Console\Input\InputInterface; -use Symfony\Component\Console\Input\InputOption; use Symfony\Component\Console\Output\OutputInterface; -abstract class OllamaGenerate extends CoroutineCommand +abstract class LlamaCppGenerate extends CoroutineCommand { - abstract protected function executeOllamaCommand(InputInterface $input, OutputInterface $output, string $model, string $prompt): int; + abstract protected function executeLlamaCppCommand(InputInterface $input, OutputInterface $output, string $prompt): int; public function __construct( - protected OllamaClient $ollamaClient, + protected LlamaCppClient $llamaCppClient, SwooleConfiguration $swooleConfiguration, ) { parent::__construct($swooleConfiguration); @@ -25,26 +24,20 @@ abstract class OllamaGenerate extends CoroutineCommand protected function configure(): void { - $this->addArgument('prompt', InputArgument::REQUIRED); - $this->addOption( - default: 'mistral', - mode: InputOption::VALUE_REQUIRED, - name: 'model', + $this->addArgument( + name: 'prompt', + mode: InputArgument::OPTIONAL, + default: 'How to make a cat happy? Be brief, respond in 1 sentence.', ); } protected function executeInCoroutine(InputInterface $input, OutputInterface $output): int { - /** - * @var string $model - */ - $model = $input->getOption('model'); - /** * @var string $prompt */ $prompt = $input->getArgument('prompt'); - return $this->executeOllamaCommand($input, $output, $model, $prompt); + return $this->executeLlamaCppCommand($input, $output, $prompt); } } diff --git a/src/Command/LlamaCppGenerate/Completion.php b/src/Command/LlamaCppGenerate/Completion.php new file mode 100644 index 00000000..6ae0b135 --- /dev/null +++ b/src/Command/LlamaCppGenerate/Completion.php @@ -0,0 +1,32 @@ +<?php + +declare(strict_types=1); + +namespace Distantmagic\Resonance\Command\LlamaCppGenerate; + +use Distantmagic\Resonance\Attribute\ConsoleCommand; +use Distantmagic\Resonance\Command; +use Distantmagic\Resonance\Command\LlamaCppGenerate; +use Distantmagic\Resonance\LlamaCppCompletionRequest; +use Symfony\Component\Console\Input\InputInterface; +use Symfony\Component\Console\Output\OutputInterface; + +#[ConsoleCommand( + name: 'llamacpp:completion', + description: 'Generate completion based on a prompt' +)] +final class Completion extends LlamaCppGenerate +{ + protected function executeLlamaCppCommand(InputInterface $input, OutputInterface $output, string $prompt): int + { + $request = new LlamaCppCompletionRequest($prompt); + + $completion = $this->llamaCppClient->generateCompletion($request); + + foreach ($completion as $token) { + $output->write((string) $token); + } + + return Command::SUCCESS; + } +} diff --git a/src/Command/LlamaCppGenerate/Embedding.php b/src/Command/LlamaCppGenerate/Embedding.php new file mode 100644 index 00000000..d1e76f8d --- /dev/null +++ b/src/Command/LlamaCppGenerate/Embedding.php @@ -0,0 +1,41 @@ +<?php + +declare(strict_types=1); + +namespace Distantmagic\Resonance\Command\LlamaCppGenerate; + +use Distantmagic\Resonance\Attribute\ConsoleCommand; +use Distantmagic\Resonance\Command; +use Distantmagic\Resonance\Command\LlamaCppGenerate; +use Distantmagic\Resonance\JsonSerializer; +use Distantmagic\Resonance\LlamaCppClient; +use Distantmagic\Resonance\LlamaCppEmbeddingRequest; +use Distantmagic\Resonance\SwooleConfiguration; +use Symfony\Component\Console\Input\InputInterface; +use Symfony\Component\Console\Output\OutputInterface; + +#[ConsoleCommand( + name: 'llamacpp:embedding', + description: 'Generate embedding based on a prompt' +)] +final class Embedding extends LlamaCppGenerate +{ + public function __construct( + private JsonSerializer $jsonSerializer, + LlamaCppClient $llamaCppClient, + SwooleConfiguration $swooleConfiguration, + ) { + parent::__construct($llamaCppClient, $swooleConfiguration); + } + + protected function executeLlamaCppCommand(InputInterface $input, OutputInterface $output, string $prompt): int + { + $request = new LlamaCppEmbeddingRequest($prompt); + + $embedding = $this->llamaCppClient->generateEmbedding($request); + + $output->writeln($this->jsonSerializer->serialize($embedding->embedding)); + + return Command::SUCCESS; + } +} diff --git a/src/Command/LlamaCppHealth.php b/src/Command/LlamaCppHealth.php new file mode 100644 index 00000000..4c887b1d --- /dev/null +++ b/src/Command/LlamaCppHealth.php @@ -0,0 +1,34 @@ +<?php + +declare(strict_types=1); + +namespace Distantmagic\Resonance\Command; + +use Distantmagic\Resonance\Attribute\ConsoleCommand; +use Distantmagic\Resonance\Command; +use Distantmagic\Resonance\CoroutineCommand; +use Distantmagic\Resonance\LlamaCppClient; +use Distantmagic\Resonance\SwooleConfiguration; +use Symfony\Component\Console\Input\InputInterface; +use Symfony\Component\Console\Output\OutputInterface; + +#[ConsoleCommand( + name: 'llamacpp:health', + description: 'Get server\'s health status' +)] +final class LlamaCppHealth extends CoroutineCommand +{ + public function __construct( + private LlamaCppClient $llamaCppClient, + SwooleConfiguration $swooleConfiguration, + ) { + parent::__construct($swooleConfiguration); + } + + protected function executeInCoroutine(InputInterface $input, OutputInterface $output): int + { + $output->writeln($this->llamaCppClient->getHealth()->value); + + return Command::SUCCESS; + } +} diff --git a/src/Command/LlamaCppInfill.php b/src/Command/LlamaCppInfill.php new file mode 100644 index 00000000..30dbf0d1 --- /dev/null +++ b/src/Command/LlamaCppInfill.php @@ -0,0 +1,44 @@ +<?php + +declare(strict_types=1); + +namespace Distantmagic\Resonance\Command; + +use Distantmagic\Resonance\Attribute\ConsoleCommand; +use Distantmagic\Resonance\Command; +use Distantmagic\Resonance\CoroutineCommand; +use Distantmagic\Resonance\JsonSerializer; +use Distantmagic\Resonance\LlamaCppClient; +use Distantmagic\Resonance\LlamaCppInfillRequest; +use Distantmagic\Resonance\SwooleConfiguration; +use Symfony\Component\Console\Input\InputInterface; +use Symfony\Component\Console\Output\OutputInterface; + +#[ConsoleCommand( + name: 'llamacpp:infill', + description: 'Generate code infill' +)] +final class LlamaCppInfill extends CoroutineCommand +{ + public function __construct( + private JsonSerializer $jsonSerializer, + private LlamaCppClient $llamaCppClient, + SwooleConfiguration $swooleConfiguration, + ) { + parent::__construct($swooleConfiguration); + } + + protected function executeInCoroutine(InputInterface $input, OutputInterface $output): int + { + $request = new LlamaCppInfillRequest( + before: '<?php // hello world', + after: '?>', + ); + + foreach ($this->llamaCppClient->generateInfill($request) as $token) { + $output->write((string) $token); + } + + return Command::SUCCESS; + } +} diff --git a/src/Command/OllamaChat.php b/src/Command/OllamaChat.php deleted file mode 100644 index 656210f4..00000000 --- a/src/Command/OllamaChat.php +++ /dev/null @@ -1,71 +0,0 @@ -<?php - -declare(strict_types=1); - -namespace Distantmagic\Resonance\Command; - -use Distantmagic\Resonance\Attribute\ConsoleCommand; -use Distantmagic\Resonance\CoroutineCommand; -use Distantmagic\Resonance\OllamaChatSession; -use Distantmagic\Resonance\OllamaClient; -use Distantmagic\Resonance\SwooleConfiguration; -use Symfony\Component\Console\Command\Command; -use Symfony\Component\Console\Helper\QuestionHelper; -use Symfony\Component\Console\Input\InputInterface; -use Symfony\Component\Console\Input\InputOption; -use Symfony\Component\Console\Output\OutputInterface; -use Symfony\Component\Console\Question\Question; - -#[ConsoleCommand( - name: 'ollama:chat', - description: 'Chat with LLM model through Ollama' -)] -final class OllamaChat extends CoroutineCommand -{ - public function __construct( - protected OllamaClient $ollamaClient, - SwooleConfiguration $swooleConfiguration, - ) { - parent::__construct($swooleConfiguration); - } - - protected function configure(): void - { - $this->addOption( - default: 'mistral', - mode: InputOption::VALUE_REQUIRED, - name: 'model', - ); - } - - protected function executeInCoroutine(InputInterface $input, OutputInterface $output): int - { - /** - * @var string $model - */ - $model = $input->getOption('model'); - - /** - * @var QuestionHelper $helper - */ - $helper = $this->getHelper('question'); - $userInputQuestion = new Question('> '); - - $chatSession = new OllamaChatSession( - model: $model, - ollamaClient: $this->ollamaClient, - ); - - while (true) { - $userMessageContent = $helper->ask($input, $output, $userInputQuestion); - - foreach ($chatSession->respond($userMessageContent) as $value) { - $output->write((string) $value); - } - - $output->writeln(''); - } - - return Command::SUCCESS; - } -} diff --git a/src/Command/OllamaGenerate/Completion.php b/src/Command/OllamaGenerate/Completion.php deleted file mode 100644 index 9d9d523b..00000000 --- a/src/Command/OllamaGenerate/Completion.php +++ /dev/null @@ -1,33 +0,0 @@ -<?php - -declare(strict_types=1); - -namespace Distantmagic\Resonance\Command\OllamaGenerate; - -use Distantmagic\Resonance\Attribute\ConsoleCommand; -use Distantmagic\Resonance\Command; -use Distantmagic\Resonance\Command\OllamaGenerate; -use Distantmagic\Resonance\OllamaCompletionRequest; -use Symfony\Component\Console\Input\InputInterface; -use Symfony\Component\Console\Output\OutputInterface; - -#[ConsoleCommand( - name: 'ollama:completion', - description: 'Generate LLM completion' -)] -final class Completion extends OllamaGenerate -{ - protected function executeOllamaCommand(InputInterface $input, OutputInterface $output, string $model, string $prompt): int - { - $completionRequest = new OllamaCompletionRequest( - model: $model, - prompt: $prompt, - ); - - foreach ($this->ollamaClient->generateCompletion($completionRequest) as $token) { - $output->write((string) $token); - } - - return Command::SUCCESS; - } -} diff --git a/src/Command/OllamaGenerate/Embedding.php b/src/Command/OllamaGenerate/Embedding.php deleted file mode 100644 index 9c5ec24e..00000000 --- a/src/Command/OllamaGenerate/Embedding.php +++ /dev/null @@ -1,51 +0,0 @@ -<?php - -declare(strict_types=1); - -namespace Distantmagic\Resonance\Command\OllamaGenerate; - -use Distantmagic\Resonance\Attribute\ConsoleCommand; -use Distantmagic\Resonance\Command; -use Distantmagic\Resonance\Command\OllamaGenerate; -use Distantmagic\Resonance\JsonSerializer; -use Distantmagic\Resonance\OllamaClient; -use Distantmagic\Resonance\OllamaEmbeddingRequest; -use Distantmagic\Resonance\SwooleConfiguration; -use Symfony\Component\Console\Input\InputInterface; -use Symfony\Component\Console\Output\OutputInterface; - -#[ConsoleCommand( - name: 'ollama:embedding', - description: 'Generate LLM embedding' -)] -final class Embedding extends OllamaGenerate -{ - public function __construct( - private JsonSerializer $jsonSerializer, - OllamaClient $ollamaClient, - SwooleConfiguration $swooleConfiguration, - ) { - parent::__construct($ollamaClient, $swooleConfiguration); - } - - protected function executeOllamaCommand(InputInterface $input, OutputInterface $output, string $model, string $prompt): int - { - $embeddingRequest = new OllamaEmbeddingRequest( - model: $model, - prompt: $prompt, - ); - - $embeddingResponse = $this - ->ollamaClient - ->generateEmbedding($embeddingRequest) - ; - - $output->writeln( - $this - ->jsonSerializer - ->serialize($embeddingResponse) - ); - - return Command::SUCCESS; - } -} diff --git a/src/JsonSerializer.php b/src/JsonSerializer.php index 7fb69fa9..be467ddf 100644 --- a/src/JsonSerializer.php +++ b/src/JsonSerializer.php @@ -5,11 +5,17 @@ declare(strict_types=1); namespace Distantmagic\Resonance; use Distantmagic\Resonance\Attribute\Singleton; +use RuntimeException; #[Singleton] readonly class JsonSerializer { - public function __construct(private ApplicationConfiguration $applicationConfiguration) {} + public function __construct(private ApplicationConfiguration $applicationConfiguration) + { + if (!function_exists('swoole_substr_json_decode')) { + throw new RuntimeException('You need to compile Swoole with JSON support'); + } + } public function serialize(mixed $data): string { @@ -21,11 +27,14 @@ readonly class JsonSerializer ); } - public function unserialize(string $data): mixed - { - return json_decode( - json: $data, + public function unserialize( + string $json, + int $offset = 0, + ): mixed { + return swoole_substr_json_decode( flags: JSON_THROW_ON_ERROR, + offset: $offset, + str: $json, ); } } diff --git a/src/LlamaCppClient.php b/src/LlamaCppClient.php new file mode 100644 index 00000000..b0f88345 --- /dev/null +++ b/src/LlamaCppClient.php @@ -0,0 +1,233 @@ +<?php + +declare(strict_types=1); + +namespace Distantmagic\Resonance; + +use CurlHandle; +use Distantmagic\Resonance\Attribute\Singleton; +use Generator; +use JsonSerializable; +use RuntimeException; +use Swoole\Coroutine\Channel; + +#[Singleton] +readonly class LlamaCppClient +{ + // strlen('data: ') + public const COMPLETION_CHUNKED_DATA_PREFIX_LENGTH = 6; + + public function __construct( + private JsonSerializer $jsonSerializer, + private LlamaCppConfiguration $llamaCppConfiguration, + private LlamaCppLinkBuilder $llamaCppLinkBuilder, + ) {} + + /** + * @return Generator<LlamaCppCompletionToken> + */ + public function generateCompletion(LlamaCppCompletionRequest $request): Generator + { + $curlHandle = $this->createCurlHandle(); + + curl_setopt($curlHandle, CURLOPT_POST, true); + + $responseChunks = $this->streamResponse($curlHandle, $request, '/completion'); + + /** + * @var null|string + */ + $previousContent = null; + + foreach ($responseChunks as $responseChunk) { + /** + * @var object{ + * content: string, + * stop: boolean, + * } + */ + $unserializedToken = $this->jsonSerializer->unserialize( + json: $responseChunk, + offset: self::COMPLETION_CHUNKED_DATA_PREFIX_LENGTH, + ); + + if (is_string($previousContent)) { + yield new LlamaCppCompletionToken( + content: $previousContent, + isLast: $unserializedToken->stop, + ); + + $previousContent = null; + } + + if (!$unserializedToken->stop) { + $previousContent = $unserializedToken->content; + } + } + } + + public function generateEmbedding(LlamaCppEmbeddingRequest $request): LlamaCppEmbedding + { + $curlHandle = $this->createCurlHandle(); + + $requestData = json_encode($request); + + curl_setopt($curlHandle, CURLOPT_POST, true); + curl_setopt($curlHandle, CURLOPT_POSTFIELDS, $requestData); + curl_setopt($curlHandle, CURLOPT_RETURNTRANSFER, true); + curl_setopt($curlHandle, CURLOPT_URL, $this->llamaCppLinkBuilder->build('/embedding')); + + /** + * @var false|string $responseContent + */ + $responseContent = curl_exec($curlHandle); + + if (false === $responseContent) { + throw new CurlException($curlHandle); + } + + $this->assertStatusCode($curlHandle, 200); + + /** + * @var object{ embedding: array<float> } $responseData + */ + $responseData = $this + ->jsonSerializer + ->unserialize($responseContent) + ; + + return new LlamaCppEmbedding($responseData->embedding); + } + + /** + * @return Generator<LlamaCppInfill> + */ + public function generateInfill(LlamaCppInfillRequest $request): Generator + { + $curlHandle = $this->createCurlHandle(); + + curl_setopt($curlHandle, CURLOPT_POST, true); + + $responseChunks = $this->streamResponse($curlHandle, $request, '/infill'); + + foreach ($responseChunks as $responseChunk) { + /** + * @var object{ content: string } + */ + $token = $this->jsonSerializer->unserialize($responseChunk); + + yield new LlamaCppInfill( + after: $request->after, + before: $request->before, + content: $token->content, + ); + } + } + + public function getHealth(): LlamaCppHealthStatus + { + $curlHandle = $this->createCurlHandle(); + + curl_setopt($curlHandle, CURLOPT_RETURNTRANSFER, true); + curl_setopt($curlHandle, CURLOPT_URL, $this->llamaCppLinkBuilder->build('/health')); + + /** + * @var false|string $responseContent + */ + $responseContent = curl_exec($curlHandle); + + if (false === $responseContent) { + throw new CurlException($curlHandle); + } + + $this->assertStatusCode($curlHandle, 200); + + /** + * @var object{ status: string } $responseData + */ + $responseData = $this + ->jsonSerializer + ->unserialize($responseContent) + ; + + return LlamaCppHealthStatus::from($responseData->status); + } + + private function assertStatusCode(CurlHandle $curlHandle, int $expectedStatusCode): void + { + /** + * @var int $statusCode + */ + $statusCode = curl_getinfo($curlHandle, CURLINFO_RESPONSE_CODE); + + if ($expectedStatusCode === $statusCode) { + return; + } + + throw new RuntimeException(sprintf( + 'curl request finished with unexpected status code: "%s"', + $statusCode, + )); + } + + private function createCurlHandle(): CurlHandle + { + $curlHandle = curl_init(); + + /** + * @var array<string> + */ + $headers = [ + 'Content-Type: application/json', + ]; + + if ($this->llamaCppConfiguration->apiKey) { + $headers[] = sprintf('Authorization: Bearer %s', $this->llamaCppConfiguration->apiKey); + } + + curl_setopt($curlHandle, CURLOPT_HTTPHEADER, $headers); + + return $curlHandle; + } + + /** + * @return SwooleChannelIterator<string> + */ + private function streamResponse(CurlHandle $curlHandle, JsonSerializable $request, string $path): SwooleChannelIterator + { + $channel = new Channel(1); + $requestData = json_encode($request); + + $cid = go(function () use ($channel, $curlHandle, $path, $requestData) { + try { + curl_setopt($curlHandle, CURLOPT_POSTFIELDS, $requestData); + curl_setopt($curlHandle, CURLOPT_RETURNTRANSFER, false); + curl_setopt($curlHandle, CURLOPT_URL, $this->llamaCppLinkBuilder->build($path)); + curl_setopt($curlHandle, CURLOPT_WRITEFUNCTION, static function (CurlHandle $curlHandle, string $data) use ($channel) { + $channel->push($data); + + return strlen($data); + }); + + if (!curl_exec($curlHandle)) { + throw new CurlException($curlHandle); + } + + $this->assertStatusCode($curlHandle, 200); + } finally { + curl_setopt($curlHandle, CURLOPT_WRITEFUNCTION, null); + + $channel->close(); + } + }); + + if (!is_int($cid)) { + throw new RuntimeException('Unable to start a coroutine'); + } + + /** + * @var SwooleChannelIterator<string> + */ + return new SwooleChannelIterator($channel); + } +} diff --git a/src/LlamaCppCompletionRequest.php b/src/LlamaCppCompletionRequest.php new file mode 100644 index 00000000..53c54b86 --- /dev/null +++ b/src/LlamaCppCompletionRequest.php @@ -0,0 +1,26 @@ +<?php + +declare(strict_types=1); + +namespace Distantmagic\Resonance; + +use JsonSerializable; + +readonly class LlamaCppCompletionRequest implements JsonSerializable +{ + public function __construct( + public string $prompt, + ) {} + + public function jsonSerialize(): array + { + return [ + 'prompt' => sprintf('[INT]%s[SYST]', $this->prompt), + 'stop' => [ + '[INST]', + '[SYST]', + ], + 'stream' => true, + ]; + } +} diff --git a/src/LlamaCppCompletionToken.php b/src/LlamaCppCompletionToken.php new file mode 100644 index 00000000..6b3e9fb9 --- /dev/null +++ b/src/LlamaCppCompletionToken.php @@ -0,0 +1,20 @@ +<?php + +declare(strict_types=1); + +namespace Distantmagic\Resonance; + +use Stringable; + +readonly class LlamaCppCompletionToken implements Stringable +{ + public function __construct( + public string $content, + public bool $isLast, + ) {} + + public function __toString(): string + { + return $this->content; + } +} diff --git a/src/OllamaConfiguration.php b/src/LlamaCppConfiguration.php similarity index 74% rename from src/OllamaConfiguration.php rename to src/LlamaCppConfiguration.php index 9796d667..1da4070f 100644 --- a/src/OllamaConfiguration.php +++ b/src/LlamaCppConfiguration.php @@ -4,9 +4,10 @@ declare(strict_types=1); namespace Distantmagic\Resonance; -readonly class OllamaConfiguration +readonly class LlamaCppConfiguration { public function __construct( + public ?string $apiKey, public string $host, public int $port, public string $scheme, diff --git a/src/LlamaCppEmbedding.php b/src/LlamaCppEmbedding.php new file mode 100644 index 00000000..a779d6f9 --- /dev/null +++ b/src/LlamaCppEmbedding.php @@ -0,0 +1,15 @@ +<?php + +declare(strict_types=1); + +namespace Distantmagic\Resonance; + +readonly class LlamaCppEmbedding +{ + /** + * @param array<float> $embedding + */ + public function __construct( + public array $embedding, + ) {} +} diff --git a/src/OllamaChatMessage.php b/src/LlamaCppEmbeddingRequest.php similarity index 53% rename from src/OllamaChatMessage.php rename to src/LlamaCppEmbeddingRequest.php index a50091df..39985104 100644 --- a/src/OllamaChatMessage.php +++ b/src/LlamaCppEmbeddingRequest.php @@ -5,25 +5,17 @@ declare(strict_types=1); namespace Distantmagic\Resonance; use JsonSerializable; -use Stringable; -readonly class OllamaChatMessage implements JsonSerializable, Stringable +readonly class LlamaCppEmbeddingRequest implements JsonSerializable { public function __construct( public string $content, - public OllamaChatRole $role, ) {} - public function __toString(): string - { - return $this->content; - } - public function jsonSerialize(): array { return [ 'content' => $this->content, - 'role' => $this->role->value, ]; } } diff --git a/src/LlamaCppHealthStatus.php b/src/LlamaCppHealthStatus.php new file mode 100644 index 00000000..1dcff198 --- /dev/null +++ b/src/LlamaCppHealthStatus.php @@ -0,0 +1,12 @@ +<?php + +declare(strict_types=1); + +namespace Distantmagic\Resonance; + +enum LlamaCppHealthStatus: string +{ + case Error = 'error'; + case LoadingModel = 'loading model'; + case Ok = 'ok'; +} diff --git a/src/LlamaCppInfill.php b/src/LlamaCppInfill.php new file mode 100644 index 00000000..1284e1c2 --- /dev/null +++ b/src/LlamaCppInfill.php @@ -0,0 +1,21 @@ +<?php + +declare(strict_types=1); + +namespace Distantmagic\Resonance; + +use Stringable; + +readonly class LlamaCppInfill implements Stringable +{ + public function __construct( + public string $after, + public string $before, + public string $content, + ) {} + + public function __toString(): string + { + return $this->content; + } +} diff --git a/src/LlamaCppInfillRequest.php b/src/LlamaCppInfillRequest.php new file mode 100644 index 00000000..adfa49fc --- /dev/null +++ b/src/LlamaCppInfillRequest.php @@ -0,0 +1,27 @@ +<?php + +declare(strict_types=1); + +namespace Distantmagic\Resonance; + +use JsonSerializable; + +readonly class LlamaCppInfillRequest implements JsonSerializable +{ + public function __construct( + public string $after, + public string $before, + ) {} + + public function jsonSerialize(): array + { + return [ + 'infill_prefix' => $this->before, + 'infill_suffix' => $this->after, + + // prompt field should not be mandatory, its a bug: + // https://github.com/ggerganov/llama.cpp/issues/4027 + 'prompt' => 'prompt', + ]; + } +} diff --git a/src/OllamaLinkBuilder.php b/src/LlamaCppLinkBuilder.php similarity index 82% rename from src/OllamaLinkBuilder.php rename to src/LlamaCppLinkBuilder.php index 281a2de6..cea1533f 100644 --- a/src/OllamaLinkBuilder.php +++ b/src/LlamaCppLinkBuilder.php @@ -7,10 +7,10 @@ namespace Distantmagic\Resonance; use Distantmagic\Resonance\Attribute\Singleton; #[Singleton] -readonly class OllamaLinkBuilder +readonly class LlamaCppLinkBuilder { public function __construct( - private OllamaConfiguration $ollamaConfiguration, + private LlamaCppConfiguration $ollamaConfiguration, ) {} public function build(string $path): string diff --git a/src/OllamaChatRequest.php b/src/OllamaChatRequest.php deleted file mode 100644 index c87c64eb..00000000 --- a/src/OllamaChatRequest.php +++ /dev/null @@ -1,30 +0,0 @@ -<?php - -declare(strict_types=1); - -namespace Distantmagic\Resonance; - -use JsonSerializable; - -readonly class OllamaChatRequest implements JsonSerializable -{ - /** - * @param array<OllamaChatMessage> $messages - */ - public function __construct( - public string $model, - public array $messages, - public OllamaRequestOptions $options = new OllamaRequestOptions(), - ) {} - - public function jsonSerialize(): array - { - return [ - 'model' => $this->model, - 'messages' => $this->messages, - 'options' => $this->options, - 'raw' => true, - 'stream' => true, - ]; - } -} diff --git a/src/OllamaChatRole.php b/src/OllamaChatRole.php deleted file mode 100644 index 0a66628a..00000000 --- a/src/OllamaChatRole.php +++ /dev/null @@ -1,12 +0,0 @@ -<?php - -declare(strict_types=1); - -namespace Distantmagic\Resonance; - -enum OllamaChatRole: string -{ - case Assistant = 'assistant'; - case System = 'system'; - case User = 'user'; -} diff --git a/src/OllamaChatSession.php b/src/OllamaChatSession.php deleted file mode 100644 index 2ff877d7..00000000 --- a/src/OllamaChatSession.php +++ /dev/null @@ -1,41 +0,0 @@ -<?php - -declare(strict_types=1); - -namespace Distantmagic\Resonance; - -use Ds\Set; -use Generator; - -readonly class OllamaChatSession -{ - /** - * @var Set<OllamaChatMessage> - */ - private Set $messages; - - public function __construct( - public string $model, - public OllamaClient $ollamaClient, - ) { - $this->messages = new Set(); - } - - /** - * @return Generator<OllamaChatToken> - */ - public function respond(string $userMessageContent): Generator - { - $this - ->messages - ->add(new OllamaChatMessage($userMessageContent, OllamaChatRole::User)) - ; - - $chatRequest = new OllamaChatRequest( - model: $this->model, - messages: $this->messages->toArray(), - ); - - yield from $this->ollamaClient->generateChatCompletion($chatRequest); - } -} diff --git a/src/OllamaChatToken.php b/src/OllamaChatToken.php deleted file mode 100644 index 9e77141a..00000000 --- a/src/OllamaChatToken.php +++ /dev/null @@ -1,21 +0,0 @@ -<?php - -declare(strict_types=1); - -namespace Distantmagic\Resonance; - -use DateTimeImmutable; -use Stringable; - -readonly class OllamaChatToken implements Stringable -{ - public function __construct( - public DateTimeImmutable $createdAt, - public OllamaChatMessage $message, - ) {} - - public function __toString(): string - { - return (string) $this->message; - } -} diff --git a/src/OllamaClient.php b/src/OllamaClient.php deleted file mode 100644 index 2d351aa0..00000000 --- a/src/OllamaClient.php +++ /dev/null @@ -1,184 +0,0 @@ -<?php - -declare(strict_types=1); - -namespace Distantmagic\Resonance; - -use CurlHandle; -use DateTimeImmutable; -use Distantmagic\Resonance\Attribute\Singleton; -use Generator; -use JsonSerializable; -use Psr\Log\LoggerInterface; -use RuntimeException; -use Swoole\Coroutine\Channel; - -#[Singleton] -readonly class OllamaClient -{ - private CurlHandle $ch; - - public function __construct( - private JsonSerializer $jsonSerializer, - private LoggerInterface $logger, - private OllamaLinkBuilder $ollamaLinkBuilder, - ) { - $this->ch = curl_init(); - - curl_setopt($this->ch, CURLOPT_POST, true); - curl_setopt($this->ch, CURLOPT_HTTPHEADER, [ - 'Content-Type: application/json', - ]); - } - - public function __destruct() - { - curl_close($this->ch); - } - - /** - * @return Generator<OllamaChatToken> - */ - public function generateChatCompletion(OllamaChatRequest $request): Generator - { - $channel = $this->streamJson($request, '/api/chat'); - - /** - * @var SwooleChannelIterator<object{ error: string }|object{ - * created_at: string, - * message: object{ - * content: string, - * role: string, - * }, - * response: string, - * }> - */ - $swooleChannelIterator = new SwooleChannelIterator($channel); - - foreach ($swooleChannelIterator as $data) { - if (isset($data->error)) { - $this->logger->error($data->error); - } else { - yield new OllamaChatToken( - createdAt: new DateTimeImmutable($data->created_at), - message: new OllamaChatMessage( - content: $data->message->content, - role: OllamaChatRole::from($data->message->role), - ) - ); - } - } - } - - /** - * @return Generator<OllamaCompletionToken> - */ - public function generateCompletion(OllamaCompletionRequest $request): Generator - { - $channel = $this->streamJson($request, '/api/generate'); - - /** - * @var SwooleChannelIterator<object{ created_at: string, response: string }> - */ - $swooleChannelIterator = new SwooleChannelIterator($channel); - - foreach ($swooleChannelIterator as $token) { - yield new OllamaCompletionToken( - createdAt: new DateTimeImmutable($token->created_at), - response: $token->response, - ); - } - } - - public function generateEmbedding(OllamaEmbeddingRequest $request): OllamaEmbeddingResponse - { - $requestData = json_encode($request); - - curl_setopt($this->ch, CURLOPT_URL, $this->ollamaLinkBuilder->build('/api/embeddings')); - curl_setopt($this->ch, CURLOPT_POSTFIELDS, $requestData); - curl_setopt($this->ch, CURLOPT_RETURNTRANSFER, true); - - /** - * @var false|string $responseContent - */ - $responseContent = curl_exec($this->ch); - - if (false === $responseContent) { - throw new CurlException($this->ch); - } - - $this->assertStatusCode(200); - - /** - * @var object{ embedding: array<float> } $responseData - */ - $responseData = $this - ->jsonSerializer - ->unserialize($responseContent) - ; - - return new OllamaEmbeddingResponse($responseData->embedding); - } - - private function assertStatusCode(int $expectedStatusCode): void - { - /** - * @var int $statusCode - */ - $statusCode = curl_getinfo($this->ch, CURLINFO_RESPONSE_CODE); - - if ($expectedStatusCode === $statusCode) { - return; - } - - throw new RuntimeException(sprintf( - 'curl request finished with unexpected status code: "%s"', - $statusCode, - )); - } - - private function streamJson(JsonSerializable $request, string $path): Channel - { - $channel = new Channel(1); - $requestData = json_encode($request); - - $cid = go(function () use ($channel, $path, $requestData) { - try { - curl_setopt($this->ch, CURLOPT_URL, $this->ollamaLinkBuilder->build($path)); - curl_setopt($this->ch, CURLOPT_POSTFIELDS, $requestData); - curl_setopt($this->ch, CURLOPT_RETURNTRANSFER, false); - curl_setopt($this->ch, CURLOPT_WRITEFUNCTION, function (CurlHandle $ch, string $data) use ($channel) { - $dataChunks = explode("\n", $data); - - foreach ($dataChunks as $dataChunk) { - if (!empty($dataChunk)) { - $channel->push( - $this - ->jsonSerializer - ->unserialize($dataChunk) - ); - } - } - - return strlen($data); - }); - - if (!curl_exec($this->ch)) { - throw new CurlException($this->ch); - } - - $this->assertStatusCode(200); - } finally { - curl_setopt($this->ch, CURLOPT_WRITEFUNCTION, null); - - $channel->close(); - } - }); - - if (!is_int($cid)) { - throw new RuntimeException('Unable to start a coroutine'); - } - - return $channel; - } -} diff --git a/src/OllamaCompletionRequest.php b/src/OllamaCompletionRequest.php deleted file mode 100644 index 1215d4fe..00000000 --- a/src/OllamaCompletionRequest.php +++ /dev/null @@ -1,32 +0,0 @@ -<?php - -declare(strict_types=1); - -namespace Distantmagic\Resonance; - -use JsonSerializable; - -readonly class OllamaCompletionRequest implements JsonSerializable -{ - public function __construct( - public string $model, - public string $prompt, - public OllamaRequestOptions $options = new OllamaRequestOptions(), - ) {} - - public function jsonSerialize(): array - { - return [ - 'model' => $this->model, - 'options' => $this->options, - 'prompt' => sprintf( - '%s%s%s', - $this->options->stopDelimiter->instructions, - $this->prompt, - $this->options->stopDelimiter->system, - ), - 'raw' => true, - 'stream' => true, - ]; - } -} diff --git a/src/OllamaCompletionToken.php b/src/OllamaCompletionToken.php deleted file mode 100644 index bb4947d2..00000000 --- a/src/OllamaCompletionToken.php +++ /dev/null @@ -1,21 +0,0 @@ -<?php - -declare(strict_types=1); - -namespace Distantmagic\Resonance; - -use DateTimeImmutable; -use Stringable; - -readonly class OllamaCompletionToken implements Stringable -{ - public function __construct( - public DateTimeImmutable $createdAt, - public string $response, - ) {} - - public function __toString(): string - { - return $this->response; - } -} diff --git a/src/OllamaEmbeddingRequest.php b/src/OllamaEmbeddingRequest.php deleted file mode 100644 index cdfa7b47..00000000 --- a/src/OllamaEmbeddingRequest.php +++ /dev/null @@ -1,27 +0,0 @@ -<?php - -declare(strict_types=1); - -namespace Distantmagic\Resonance; - -use JsonSerializable; - -readonly class OllamaEmbeddingRequest implements JsonSerializable -{ - public function __construct( - public string $model, - public string $prompt, - public OllamaRequestOptions $options = new OllamaRequestOptions(), - ) {} - - public function jsonSerialize(): array - { - return [ - 'model' => $this->model, - 'options' => $this->options, - 'prompt' => $this->prompt, - 'raw' => true, - 'stream' => true, - ]; - } -} diff --git a/src/OllamaEmbeddingResponse.php b/src/OllamaEmbeddingResponse.php deleted file mode 100644 index 955bc499..00000000 --- a/src/OllamaEmbeddingResponse.php +++ /dev/null @@ -1,20 +0,0 @@ -<?php - -declare(strict_types=1); - -namespace Distantmagic\Resonance; - -use JsonSerializable; - -readonly class OllamaEmbeddingResponse implements JsonSerializable -{ - /** - * @param array<float> $embedding - */ - public function __construct(public array $embedding) {} - - public function jsonSerialize(): array - { - return $this->embedding; - } -} diff --git a/src/OllamaRequestOptions.php b/src/OllamaRequestOptions.php deleted file mode 100644 index 24a5a62c..00000000 --- a/src/OllamaRequestOptions.php +++ /dev/null @@ -1,27 +0,0 @@ -<?php - -declare(strict_types=1); - -namespace Distantmagic\Resonance; - -use JsonSerializable; - -readonly class OllamaRequestOptions implements JsonSerializable -{ - public function __construct( - public float $numPredict = -1, - public float $temperature = 0.5, - public OllamaRequestStopDelimiter $stopDelimiter = new OllamaRequestStopDelimiter(), - ) {} - - public function jsonSerialize(): array - { - $ret = []; - - $ret['num_predict'] = $this->numPredict; - $ret['stop'] = $this->stopDelimiter; - $ret['temperature'] = $this->temperature; - - return $ret; - } -} diff --git a/src/OllamaRequestStopDelimiter.php b/src/OllamaRequestStopDelimiter.php deleted file mode 100644 index 750b3f0b..00000000 --- a/src/OllamaRequestStopDelimiter.php +++ /dev/null @@ -1,23 +0,0 @@ -<?php - -declare(strict_types=1); - -namespace Distantmagic\Resonance; - -use JsonSerializable; - -readonly class OllamaRequestStopDelimiter implements JsonSerializable -{ - public function __construct( - public string $instructions = '[INST]', - public string $system = '[SYS]', - ) {} - - public function jsonSerialize(): array - { - return [ - $this->instructions, - $this->system, - ]; - } -} diff --git a/src/SingletonProvider/ConfigurationProvider/OllamaConfigurationProvider.php b/src/SingletonProvider/ConfigurationProvider/LlamaCppConfigurationProvider.php similarity index 64% rename from src/SingletonProvider/ConfigurationProvider/OllamaConfigurationProvider.php rename to src/SingletonProvider/ConfigurationProvider/LlamaCppConfigurationProvider.php index 086d16d6..7aebce60 100644 --- a/src/SingletonProvider/ConfigurationProvider/OllamaConfigurationProvider.php +++ b/src/SingletonProvider/ConfigurationProvider/LlamaCppConfigurationProvider.php @@ -6,22 +6,23 @@ namespace Distantmagic\Resonance\SingletonProvider\ConfigurationProvider; use Distantmagic\Resonance\Attribute\Singleton; use Distantmagic\Resonance\JsonSchema; -use Distantmagic\Resonance\OllamaConfiguration; +use Distantmagic\Resonance\LlamaCppConfiguration; use Distantmagic\Resonance\SingletonProvider\ConfigurationProvider; /** - * @template-extends ConfigurationProvider<OllamaConfiguration, object{ + * @template-extends ConfigurationProvider<LlamaCppConfiguration, object{ + * apiKey: null|string, * host: string, * port: int, * scheme: string, * }> */ -#[Singleton(provides: OllamaConfiguration::class)] -final readonly class OllamaConfigurationProvider extends ConfigurationProvider +#[Singleton(provides: LlamaCppConfiguration::class)] +final readonly class LlamaCppConfigurationProvider extends ConfigurationProvider { protected function getConfigurationKey(): string { - return 'ollama'; + return 'llamacpp'; } protected function makeSchema(): JsonSchema @@ -29,6 +30,12 @@ final readonly class OllamaConfigurationProvider extends ConfigurationProvider return new JsonSchema([ 'type' => 'object', 'properties' => [ + 'apiKey' => [ + 'type' => 'string', + 'minLength' => 1, + 'nullable' => true, + 'default' => null, + ], 'host' => [ 'type' => 'string', 'minLength' => 1, @@ -48,9 +55,10 @@ final readonly class OllamaConfigurationProvider extends ConfigurationProvider ]); } - protected function provideConfiguration($validatedData): OllamaConfiguration + protected function provideConfiguration($validatedData): LlamaCppConfiguration { - return new OllamaConfiguration( + return new LlamaCppConfiguration( + apiKey: $validatedData->apiKey, host: $validatedData->host, port: $validatedData->port, scheme: $validatedData->scheme, -- GitLab