Skip to content

Commit e78f0ab

Browse files
committed
feat(platform): add Speech
1 parent ff8fb28 commit e78f0ab

35 files changed

+1395
-3
lines changed

.phpactor.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{
2+
"$schema": "/phpactor.schema.json",
3+
"language_server_phpstan.enabled": true
4+
}

demo/tests/Blog/Command/StreamCommandTest.php

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,13 @@
1616
use Symfony\AI\Agent\AgentInterface;
1717
use Symfony\AI\Platform\Message\MessageBag;
1818
use Symfony\AI\Platform\Metadata\Metadata;
19+
use Symfony\AI\Platform\Result\DeferredResult;
20+
use Symfony\AI\Platform\Result\InMemoryRawResult;
1921
use Symfony\AI\Platform\Result\RawResultInterface;
2022
use Symfony\AI\Platform\Result\ResultInterface;
23+
use Symfony\AI\Platform\Result\TextResult;
24+
use Symfony\AI\Platform\Speech\Speech;
25+
use Symfony\AI\Platform\Test\PlainConverter;
2126
use Symfony\Component\Console\Input\ArrayInput;
2227
use Symfony\Component\Console\Output\BufferedOutput;
2328
use Symfony\Component\Console\Style\SymfonyStyle;
@@ -52,6 +57,15 @@ public function getRawResult(): ?RawResultInterface
5257
public function setRawResult(RawResultInterface $rawResult): void
5358
{
5459
}
60+
61+
public function addSpeech(Speech $speech): void
62+
{
63+
}
64+
65+
public function getSpeech(string $identifier): Speech
66+
{
67+
return new Speech([], new DeferredResult(new PlainConverter(new TextResult('foo')), new InMemoryRawResult()), 'bar');
68+
}
5569
});
5670

5771
$input = new ArrayInput([]);

docs/components/platform.rst

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -527,6 +527,63 @@ This allows fast and isolated testing of AI-powered features without relying on
527527

528528
This requires `cURL` and the `ext-curl` extension to be installed.
529529

530+
Speech support
531+
~~~~~~~~~~~~~~
532+
533+
Using speech to send messages / receive answers as audio is a common use case when integrating agents and/or chats.
534+
535+
Speech support can be enable using ``Symfony\AI\Platform\Speech\SpeechProviderListener``::
536+
537+
use Symfony\AI\Agent\Agent;
538+
use Symfony\AI\Platform\Bridge\ElevenLabs\ElevenLabsSpeechProvider;
539+
use Symfony\AI\Platform\Bridge\ElevenLabs\PlatformFactory;
540+
use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory as OpenAiPlatformFactory;
541+
use Symfony\AI\Platform\Message\Message;
542+
use Symfony\AI\Platform\Message\MessageBag;
543+
use Symfony\AI\Platform\Speech\SpeechConfiguration;
544+
use Symfony\AI\Platform\Speech\SpeechProviderListener;
545+
use Symfony\Component\EventDispatcher\EventDispatcher;
546+
547+
$eventDispatcher = new EventDispatcher();
548+
$eventDispatcher->addSubscriber(new SpeechProviderListener([
549+
new ElevenLabsSpeechProvider(PlatformFactory::create(
550+
apiKey: $elevenLabsApiKey,
551+
httpClient: http_client(),
552+
speechConfiguration: new SpeechConfiguration(
553+
ttsModel: 'eleven_multilingual_v2',
554+
ttsVoice: 'Dslrhjl3ZpzrctukrQSN', // Brad (https://elevenlabs.io/app/voice-library?voiceId=Dslrhjl3ZpzrctukrQSN)
555+
sttModel: 'eleven_multilingual_v2'
556+
)),
557+
),
558+
], []));
559+
560+
$platform = OpenAiPlatformFactory::create($openAiApiKey, httpClient: HttpClient::create(), eventDispatcher: $eventDispatcher);
561+
562+
$agent = new Agent($platform, 'gpt-4o');
563+
$answer = $agent->call(new MessageBag(
564+
Message::ofUser('Tina has one brother and one sister. How many sisters do Tina\'s siblings have?'),
565+
));
566+
567+
echo $answer->getSpeech('eleven_labs')->asBinary();
568+
569+
When using the bundle, the configuration allows to configure models and voices::
570+
571+
ai:
572+
platform:
573+
eleven_labs:
574+
api_key: '%env(ELEVEN_LABS_API_KEY)%'
575+
576+
speech:
577+
eleven_labs:
578+
tts_model: 'eleven_multilingual_v2'
579+
tts_voice: '%env(ELEVEN_LABS_VOICE_IDENTIFIER)%'
580+
tts_extra_options:
581+
foo: bar
582+
583+
.. note::
584+
585+
Please be aware that enabling speech support requires to define corresponding platforms.
586+
530587
Code Examples
531588
~~~~~~~~~~~~~
532589

examples/speech/README.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# Speech Examples
2+
3+
Speech is mainly used to transform text to audio and vice versa, it can also be used to create an audio to audio pipeline.
4+
5+
To run the examples, you can use additional tools like (mpg123)[https://www.mpg123.de/]:
6+
7+
```bash
8+
php speech/agent-eleven-labs-speech-tts.php | mpg123 -
9+
php speech/agent-eleven-labs-speech-sts.php | mpg123 -
10+
```
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <fabien@symfony.com>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
use Symfony\AI\Agent\Agent;
13+
use Symfony\AI\Platform\Bridge\ElevenLabs\ElevenLabsSpeechListener;
14+
use Symfony\AI\Platform\Bridge\ElevenLabs\ElevenLabsSpeechProvider;
15+
use Symfony\AI\Platform\Bridge\ElevenLabs\PlatformFactory;
16+
use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory as OpenAiPlatformFactory;
17+
use Symfony\AI\Platform\Message\Content\Audio;
18+
use Symfony\AI\Platform\Message\Message;
19+
use Symfony\AI\Platform\Message\MessageBag;
20+
use Symfony\AI\Platform\Speech\SpeechAwarePlatform;
21+
use Symfony\AI\Platform\Speech\SpeechConfiguration;
22+
use Symfony\AI\Platform\Speech\SpeechProviderListener;
23+
use Symfony\Component\EventDispatcher\EventDispatcher;
24+
25+
require_once dirname(__DIR__).'/bootstrap.php';
26+
27+
$eventDispatcher = new EventDispatcher();
28+
$eventDispatcher->addSubscriber(new SpeechProviderListener([
29+
new ElevenLabsSpeechProvider(new SpeechAwarePlatform(
30+
PlatformFactory::create(
31+
apiKey: env('ELEVEN_LABS_API_KEY'),
32+
httpClient: http_client(),
33+
),
34+
speechConfiguration: new SpeechConfiguration(
35+
ttsModel: 'eleven_multilingual_v2',
36+
ttsVoice: 'Dslrhjl3ZpzrctukrQSN', // Brad (https://elevenlabs.io/app/voice-library?voiceId=Dslrhjl3ZpzrctukrQSN)
37+
sttModel: 'eleven_multilingual_v2'
38+
)),
39+
),
40+
], [
41+
new ElevenLabsSpeechListener(PlatformFactory::create(
42+
apiKey: env('ELEVEN_LABS_API_KEY'),
43+
httpClient: http_client(),
44+
speechConfiguration: new SpeechConfiguration(
45+
sttModel: 'scribe_v1'
46+
)),
47+
),
48+
]));
49+
50+
$platform = OpenAiPlatformFactory::create(env('OPENAI_API_KEY'), httpClient: http_client(), eventDispatcher: $eventDispatcher);
51+
52+
$agent = new Agent($platform, 'gpt-4o');
53+
$answer = $agent->call(new MessageBag(
54+
Message::ofUser(Audio::fromFile(dirname(__DIR__, 2).'/fixtures/audio.mp3'))
55+
));
56+
57+
echo $answer->getSpeech('eleven_labs')->asBinary();
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <fabien@symfony.com>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
use Symfony\AI\Agent\Agent;
13+
use Symfony\AI\Platform\Bridge\ElevenLabs\ElevenLabsSpeechListener;
14+
use Symfony\AI\Platform\Bridge\ElevenLabs\PlatformFactory;
15+
use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory as OpenAiPlatformFactory;
16+
use Symfony\AI\Platform\Message\Content\Audio;
17+
use Symfony\AI\Platform\Message\Message;
18+
use Symfony\AI\Platform\Message\MessageBag;
19+
use Symfony\AI\Platform\Speech\SpeechAwarePlatform;
20+
use Symfony\AI\Platform\Speech\SpeechConfiguration;
21+
use Symfony\AI\Platform\Speech\SpeechProviderListener;
22+
use Symfony\Component\EventDispatcher\EventDispatcher;
23+
24+
require_once dirname(__DIR__).'/bootstrap.php';
25+
26+
$eventDispatcher = new EventDispatcher();
27+
$eventDispatcher->addSubscriber(new SpeechProviderListener([], [
28+
new ElevenLabsSpeechListener(new SpeechAwarePlatform(
29+
PlatformFactory::create(
30+
apiKey: env('ELEVEN_LABS_API_KEY'),
31+
httpClient: http_client(),
32+
),
33+
speechConfiguration: new SpeechConfiguration(
34+
ttsModel: 'eleven_multilingual_v2',
35+
ttsVoice: 'Dslrhjl3ZpzrctukrQSN', // Brad (https://elevenlabs.io/app/voice-library?voiceId=Dslrhjl3ZpzrctukrQSN)
36+
sttModel: 'eleven_multilingual_v2'
37+
)),
38+
),
39+
]));
40+
41+
$platform = OpenAiPlatformFactory::create(env('OPENAI_API_KEY'), httpClient: http_client(), eventDispatcher: $eventDispatcher);
42+
43+
$agent = new Agent($platform, 'gpt-4o');
44+
$answer = $agent->call(new MessageBag(
45+
Message::ofUser(Audio::fromFile(dirname(__DIR__, 2).'/fixtures/audio.mp3'))
46+
));
47+
48+
echo $answer->getContent();
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <fabien@symfony.com>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
use Symfony\AI\Agent\Agent;
13+
use Symfony\AI\Platform\Bridge\ElevenLabs\ElevenLabsSpeechProvider;
14+
use Symfony\AI\Platform\Bridge\ElevenLabs\PlatformFactory;
15+
use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory as OpenAiPlatformFactory;
16+
use Symfony\AI\Platform\Message\Message;
17+
use Symfony\AI\Platform\Message\MessageBag;
18+
use Symfony\AI\Platform\Speech\SpeechAwarePlatform;
19+
use Symfony\AI\Platform\Speech\SpeechConfiguration;
20+
use Symfony\AI\Platform\Speech\SpeechProviderListener;
21+
use Symfony\Component\EventDispatcher\EventDispatcher;
22+
23+
require_once dirname(__DIR__).'/bootstrap.php';
24+
25+
$eventDispatcher = new EventDispatcher();
26+
$eventDispatcher->addSubscriber(new SpeechProviderListener([
27+
new ElevenLabsSpeechProvider(new SpeechAwarePlatform(
28+
PlatformFactory::create(
29+
apiKey: env('ELEVEN_LABS_API_KEY'),
30+
httpClient: http_client(),
31+
),
32+
speechConfiguration: new SpeechConfiguration(
33+
ttsModel: 'eleven_multilingual_v2',
34+
ttsVoice: 'Dslrhjl3ZpzrctukrQSN', // Brad (https://elevenlabs.io/app/voice-library?voiceId=Dslrhjl3ZpzrctukrQSN)
35+
sttModel: 'eleven_multilingual_v2'
36+
)),
37+
),
38+
], []));
39+
40+
$platform = OpenAiPlatformFactory::create(env('OPENAI_API_KEY'), httpClient: http_client(), eventDispatcher: $eventDispatcher);
41+
42+
$agent = new Agent($platform, 'gpt-4o');
43+
$answer = $agent->call(new MessageBag(
44+
Message::ofUser('Tina has one brother and one sister. How many sisters do Tina\'s siblings have?'),
45+
));
46+
47+
echo $answer->getSpeech('eleven_labs')->asBinary();

src/agent/src/Output.php

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
use Symfony\AI\Platform\Message\MessageBag;
1515
use Symfony\AI\Platform\Result\ResultInterface;
16+
use Symfony\AI\Platform\Speech\Speech;
1617

1718
/**
1819
* @author Christopher Hertel <mail@christopher-hertel.de>
@@ -27,6 +28,7 @@ public function __construct(
2728
private ResultInterface $result,
2829
private readonly MessageBag $messageBag,
2930
private readonly array $options = [],
31+
private ?Speech $speech = null,
3032
) {
3133
}
3234

@@ -57,4 +59,14 @@ public function getOptions(): array
5759
{
5860
return $this->options;
5961
}
62+
63+
public function setSpeech(?Speech $speech): void
64+
{
65+
$this->speech = $speech;
66+
}
67+
68+
public function getSpeech(): ?Speech
69+
{
70+
return $this->speech;
71+
}
6072
}

src/ai-bundle/CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,4 @@ CHANGELOG
3434
- Token usage metadata in agent results including prompt, completion, total, cached, and thinking tokens
3535
- Rate limit information tracking for supported platforms
3636
* Add support for configuring chats and message stores
37+
* Add support for configuring speeches

src/ai-bundle/config/options.php

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1076,6 +1076,23 @@
10761076
->end()
10771077
->end()
10781078
->end()
1079+
->arrayNode('speech')
1080+
->useAttributeAsKey('name')
1081+
->arrayPrototype()
1082+
->children()
1083+
->stringNode('platform')->isRequired()->end()
1084+
->stringNode('tts_model')->end()
1085+
->stringNode('tts_voice')->end()
1086+
->arrayNode('tts_extra_options')
1087+
->scalarPrototype()->end()
1088+
->end()
1089+
->stringNode('stt_model')->end()
1090+
->arrayNode('stt_extra_options')
1091+
->scalarPrototype()->end()
1092+
->end()
1093+
->end()
1094+
->end()
1095+
->end()
10791096
->arrayNode('vectorizer')
10801097
->info('Vectorizers for converting strings to Vector objects and transforming TextDocument arrays to VectorDocument arrays')
10811098
->useAttributeAsKey('name')

0 commit comments

Comments
 (0)