
    iH                       U d Z ddlZddlZddlZddlZddlZddlZddlZddlZddl	Z	ddl
Z
ddlZddlZddlZddlZddlZddlmZ ddlmZmZmZmZ ddlmZ ddlmZ  ej        e          ZddZddlm Z  dd	l!m"Z"m#Z#m$Z$ dd
l%m&Z& d Z'd Z(d Z)d Z*d Z+d Z,d Z-dZ.dZ/dZ0dZ1dZ2dZ3dZ4dZ5dZ6dZ7dZ8dZ9dZ:dZ;d Z<d!Z=d"Z>d#Z?d$Z@d%ZAd&ZBd'ZCd(ZDd)ZEd$ZFd*ZGd+ZHd,eIfd-ZJ eJ            ZKd.d/d0d1d2d.d1d3d3d.d4
ZLeeIeMf         eNd5<   d.d.d1d1d1d1d6d7d8ZOeeIeMf         eNd9<   d2ZPePZQ	 dd:eeI         d;eeeIef                  d,eMfd<ZRd,eeIef         fd=ZSd;eeIef         d,eIfd>ZT eUh d?          ZVd@ZWdAZX eUh dB          ZYd.ZZd;eeIef         dCeId,eeIef         fdDZ[d;eeIef         dCeId,eeIef         fdEZ\dFeeIef         d,e]fdGZ^d:eId;eeIef         d,eeeIef                  fdHZ_d;eeIef         fdIZ`dFeeIef         d,eafdJZb	 ddFeeIef         dKeeI         d,eIfdLZcdFeeIef         d,e]fdMZddNeIdOeMd,eeI         fdPZedQeIdReeI         d,eIfdSZfdNeIdTeeIeIf         d,eIfdUZgdVejh        d,dfdWZidXeIdYead,ejj        fdZZkd[edFeeIef         d,efd\Zld]eIdKeId^eIdFeeIef         d;eeIef         d,eIfd_Zmdd;eeeIef                  d,e]fd`Znd,e]fdaZodbeId,eeI         fdcZpd]eIdKeId;eeIef         d,eIfddZqd]eIdKeId;eeIef         d,eIfdeZrd]eIdKeId;eeIef         d,eIfdfZsd]eIdKeId;eeIef         d,eIfdgZtd]eIdKeId;eeIef         d,eIfdhZud]eIdKeId;eeIef         d,eIfdiZveFeGeHfdjewdkeMdleMdmeMd,ewf
dnZxd]eIdKeId;eeIef         d,eIfdoZyd,e]fdpZzd,e]fdqZ{d,eIfdrZ|d,eIfdsZ}d]eIdKeId;eeIef         d,eIfdtZ~i aeeIef         eNdu<   d,e]fdvZd,efdwZdxeIdyed,eIfdzZd]eIdKeId;eeIef         d,eIfd{Zi aeeIef         eNd|<   d]eIdKeId;eeIef         d,eIfd}Z	 dd]eIdKeeI         d,eIfd~Zd,e]fdZd,eeIeIf         fdZd,e]fdZ ej        d          Z ej        d          Z ej        d          Z ej        d          Z ej        d          Z ej        d          Z ej        d          Z ej        dej                  Z ej        dej                  Z ej        d          Z ej        d          Zd]eId,eIfdZ	 ddej        dej        dej        deeeIgdf                  fdZedk    r- ed            ed           d Z ed            ed ee'd          rdnd             ed ee(d          rdnd             ed ed          rdnd             ed ee)d          rdnd             ed e$            rdnd             ed ed          rdnd             ed e            rdnd             ed eo            rdnd             edeK             eS            Z eTe          Z ede            ddlmZmZ dddddddd e             dddd]gddZ ej        dded ed           dS )a  
Text-to-Speech Tool Module

Built-in TTS providers:
- Edge TTS (default, free, no API key): Microsoft Edge neural voices
- ElevenLabs (premium): High-quality voices, needs ELEVENLABS_API_KEY
- OpenAI TTS: Good quality, needs OPENAI_API_KEY
- MiniMax TTS: High-quality with voice cloning, needs MINIMAX_API_KEY
- Mistral (Voxtral TTS): Multilingual, native Opus, needs MISTRAL_API_KEY
- Google Gemini TTS: Controllable, 30 prebuilt voices, needs GEMINI_API_KEY
- xAI TTS: Grok voices, needs XAI_API_KEY
- NeuTTS (local, free, no API key): On-device TTS via neutts
- KittenTTS (local, free, no API key): On-device 25MB model
- Piper (local, free, no API key): OHF-Voice/piper1-gpl neural VITS, 44 languages

Custom command providers:
- Users can declare any number of named providers with ``type: command``
  under ``tts.providers.<name>`` in ``~/.hermes/config.yaml``. Hermes
  writes the input text to a temp file and runs the configured shell
  command, which must produce the audio file at the expected path.
  See the Local Command section of ``website/docs/user-guide/features/tts.md``.

Output formats:
- Opus (.ogg) for Telegram voice bubbles (requires ffmpeg for Edge TTS)
- MP3 (.mp3) for everything else (CLI, Discord, WhatsApp)

Configuration is loaded from ~/.hermes/config.yaml under the 'tts:' key.
The user chooses the provider and voice; the model just sends text.

Usage:
    from tools.tts_tool import text_to_speech_tool, check_tts_requirements

    result = text_to_speech_tool(text="Hello world")
    N)Path)CallableDictAnyOptional)urljoin)display_hermes_homec                 ~    	 ddl m} n%# t          $ r t          j        | |          cY S w xY w ||           }||n|S )a  Read env values through the live config module.

    Tests may monkeypatch and later restore ``hermes_cli.config.get_env_value``
    before this module is imported. Resolve the helper at call time so TTS does
    not keep a stale imported function for the rest of the test process.
    r   )get_env_value)hermes_cli.configr   ImportErrorosgetenv)namedefault_get_env_valuevalues       3/home/ubuntu/.hermes/hermes-agent/tools/tts_tool.pyr   r   ;   sl    (EEEEEEE ( ( (yw'''''(N4  Em77.s   	 ++)resolve_managed_tool_gateway)managed_nous_tools_enabledprefers_gatewayresolve_openai_audio_api_key)hermes_xai_user_agentc                      ddl } | S )z?Lazy import edge_tts. Returns the module or raises ImportError.r   Nedge_ttsr   s    r   _import_edge_ttsr   Q   s    OOOO    c                      ddl m}  | S )zGLazy import ElevenLabs client. Returns the class or raises ImportError.r   
ElevenLabs)elevenlabs.clientr!   r    s    r   _import_elevenlabsr#   V   s    ,,,,,,r   c                      ddl m}  | S )zCLazy import OpenAI client. Returns the class or raises ImportError.r   )OpenAI)openair%   )OpenAIClients    r   _import_openai_clientr(   [   s    ------r   c                      ddl m}  | S )zDLazy import Mistral client. Returns the class or raises ImportError.r   Mistral)mistralai.clientr+   r*   s    r   _import_mistral_clientr-   `   s    ((((((Nr   c                      ddl } | S )zJLazy import sounddevice. Returns the module or raises ImportError/OSError.r   N)sounddevice)sds    r   _import_sounddevicer1   e   s    Ir   c                      ddl m}  | S )z?Lazy import KittenTTS. Returns the class or raises ImportError.r   	KittenTTS)	kittenttsr4   r3   s    r   _import_kittenttsr6   k   s    ######r   c                      ddl m}  | S )at  Lazy import Piper. Returns the PiperVoice class or raises ImportError.

    Piper is an optional, fully-local neural TTS engine (Home Assistant /
    Open Home Foundation). ``pip install piper-tts`` provides cross-platform
    wheels (Linux / macOS / Windows, x86_64 + ARM64) with embedded espeak-ng.
    Voice models (.onnx + .onnx.json) are downloaded on first use.
    r   
PiperVoice)piperr9   r8   s    r   _import_piperr;   q   s     !     r   edgezen-US-AriaNeuralpNInz6obpgDQGcFmaJgBeleven_multilingual_v2eleven_flash_v2_5zgpt-4o-mini-ttsz!KittenML/kitten-tts-nano-0.8-int8Jasperzen_US-lessac-mediumalloyzhttps://api.openai.com/v1zspeech-2.8-hdEnglish_Graceful_Ladyz https://api.minimax.io/v1/t2a_v2zvoxtral-mini-tts-2603z$c69964a6-ab8b-4f8a-9465-ec0925096ec8eveen]    zhttps://api.x.ai/v1zgemini-2.5-flash-preview-ttsKorez0https://generativelanguage.googleapis.com/v1beta      returnc                  @    ddl m}  t           | dd                    S )Nr   get_hermes_dirzcache/audioaudio_cache)hermes_constantsrM   strrL   s    r   _get_default_output_dirrQ      s.    //////~~m];;<<<r   i  i   i:  i'  i  i  )
r<   r&   xaiminimaxmistralgemini
elevenlabsneuttsr5   r:   PROVIDER_MAX_TEXT_LENGTHi0u  i@  )	eleven_v3eleven_ttv_v3r>   eleven_multilingual_v1eleven_english_sts_v2eleven_english_sts_v1eleven_flash_v2r?    ELEVENLABS_MODEL_MAX_TEXT_LENGTHprovider
tts_configc                 l   | st           S |                                                                 }|pi }t          |                    |          t
                    r|                    |          ni }|r|                    d          nd}t          |t                    rd}t          |t                    r|dk    r|S |dk    r[|pi                     d          pt          }t                              t          |                                                    }|r|S |t          v rt          |         S |t          vrot          ||          }t          |          rP|                    d          }	t          |	t                    rd}	t          |	t                    r|	dk    r|	S t          S t           S )a  Return the input-character cap for *provider*.

    Resolution order:
      1. ``tts.<provider>.max_text_length`` (user override in config.yaml)
      2. ``tts.providers.<provider>.max_text_length`` for user-declared
         command providers
      3. ElevenLabs model-aware table (keyed on configured ``model_id``)
      4. ``PROVIDER_MAX_TEXT_LENGTH`` default
      5. ``DEFAULT_COMMAND_TTS_MAX_TEXT_LENGTH`` when the provider is a
         command-type user provider without an explicit cap
      6. ``FALLBACK_MAX_TEXT_LENGTH`` (4000)

    Non-positive or non-integer overrides fall through to the default so a
    broken config can't accidentally disable truncation entirely.
    max_text_lengthNr   rV   model_id)FALLBACK_MAX_TEXT_LENGTHlowerstrip
isinstancegetdictboolintDEFAULT_ELEVENLABS_MODEL_IDr_   rP   rX   BUILTIN_TTS_PROVIDERS_get_named_provider_config_is_command_provider_config#DEFAULT_COMMAND_TTS_MAX_TEXT_LENGTH)
r`   ra   keycfgprov_cfgoverriderd   mappednamednamed_overrides
             r   _resolve_max_text_lengthry      s   &  (''
..


 
 
"
"C

C  *#''#,,==Eswws|||2H2:Dx||-...H(D!! (C   X\\
lN''
33R7R155c(mm6I6I6K6KLL 	M
&&&',, '''*344&u-- 	7"YY'899N.$// &!%.#.. &>A3E3E%%66##r   c                  
   	 ddl m}   |             }|                    di           S # t          $ r t                              d           i cY S t          $ r)}t                              d|d           i cY d}~S d}~ww xY w)	z
    Load TTS configuration from ~/.hermes/config.yaml.

    Returns a dict with provider settings. Falls back to defaults
    for any missing fields.
    r   )load_configttsz9hermes_cli.config not available, using default TTS configzFailed to load TTS config: %sTexc_infoN)r   r{   ri   r   loggerdebug	Exceptionwarning)r{   configes      r   _load_tts_configr     s    	111111zz%$$$   PQQQ			   6DIII						s!   %( &B	BA=7B=Bc                     |                      d          pt                                                                          S )z%Get the configured TTS provider name.r`   )ri   DEFAULT_PROVIDERrf   rg   )ra   s    r   _get_providerr     s2    NN:&&:*:AACCIIKKKr   >
   rR   r<   r:   rU   rW   r&   rS   rT   r5   rV   x   mp3>   r   oggwavflacr   c                     t          | t                    si S |                     |          }t          |t                    r|ni S )zBReturn a provider config block if it's a dict, else an empty dict.)rh   rj   ri   )ra   r   sections      r   _get_provider_sectionr   O  sC    j$'' 	nnT""G $//777R7r   c                 
   t          | d          }t          |t                    r|                    |          nd}t          |t                    r|S |                                t
          vrt          | |          }|r|S i S )a  Return the config dict for a user-declared provider.

    Looks up ``tts.providers.<name>`` first (the canonical location), and
    falls back to ``tts.<name>`` so users who followed the built-in layout
    still work. Returns an empty dict when the provider is not declared.
    	providersN)r   rh   rj   ri   rf   rn   )ra   r   r   r   legacys        r   ro   ro   W  s     &j+>>I%/	4%@%@JimmD!!!dG'4    zz||000&z488 	MIr   r   c                 j   t          | t                    sdS t          |                     d          pd                                                                          }|r|dk    rdS |                     d          }t          |t                    o t          |                                          S )z;Return True when *config* declares a command-type provider.Ftype command)rh   rj   rP   ri   rg   rf   rk   )r   ptyper   s      r   rp   rp   o  s    fd## u

6""(b))//117799E )##ujj##Ggs##=W]]__(=(==r   c                     | sdS |                                                                  }|t          v rdS t          ||          }t	          |          r|S dS )zReturn the provider config if *provider* resolves to a command type.

    Built-in provider names are rejected (they have native handlers).
    Returns None when the name is a built-in, unknown, or not a command
    type.
    N)rf   rg   rn   ro   rp   )r`   ra   rr   r   s       r    _resolve_command_provider_configr   z  sf      t
..


 
 
"
"C
###t'
C88F"6** 4r   c              #     K   t          | t                    sdS t          | d          }|pi                                 D ]J\  }}t          |t                    r0|                                t          vrt          |          r||fV  KdS )zDYield (name, config) pairs for every declared command-type provider.Nr   )rh   rj   r   itemsrP   rf   rn   rp   )ra   r   r   rs   s       r   _iter_command_providersr     s      j$'' %j+>>Io2,,..    	cdC   	 TZZ\\9N%N%N*3//  Ci   r   c                    |                      d|                      dt                              }	 t          |          }n+# t          t          f$ r t          t                    cY S w xY w|dk    rt          t                    S |S )z5Return timeout in seconds, falling back when invalid.timeouttimeout_secondsr   )ri   #DEFAULT_COMMAND_TTS_TIMEOUT_SECONDSfloat	TypeError
ValueError)r   rawr   s      r   _get_command_tts_timeoutr     s    
**Y

+<>a b b
c
cC:c

z" : : :899999:zz8999Ls   A %A)(A)output_pathc                    |rVt          |          j                                                                                            d          }|t
          v r|S |                     d          p|                     d          pt          }t          |                                                                                              d          }|t
          v r|nt          S )z6Return the validated output format (mp3/wav/ogg/flac)..formatoutput_format)	r   suffixrf   rg   lstripCOMMAND_TTS_OUTPUT_FORMATSri   !DEFAULT_COMMAND_TTS_OUTPUT_FORMATrP   )r   r   r   r   fmts        r   _get_command_tts_output_formatr     s    
  k"")//117799@@EE///M

8 	-::o&&	-, 
 c((..


 
 
"
"
)
)#
.
.C333339ZZr   c                     |                      dd          }t          |t                    r(|                                                                dv S t          |          S )zEReturn True only when the user explicitly opted in to voice delivery.voice_compatibleF>   1onyestrue)ri   rh   rP   rg   rf   rk   )r   r   s     r    _is_command_tts_voice_compatibler     sU    JJ)511E% C{{}}""$$(BBB;;r   command_templatepositionc                     d}d}d}||k     r\| |         }|dk    r	|dk    rd}n:|dk    r|rd}n/|dk    rd}n&|dk    rd}n|dk    rd}n|dk    rd}n|dk    r|dz  }|dz  }||k     \|S )	zReturn the shell quote character active right before *position*.

    Returns ``"'"`` / ``'"'`` when inside a single- / double-quoted region
    of the template, ``None`` for bare context.
    NFr   '"\TrH    )r   r   quoteescapedichars         r   _shell_quote_contextr     s      EG	A
h,,"C<<s{{c\\ s{{Q	Q' h,,( Lr   r   quote_contextc                 \   |dk    r|                      dd          S |dk    rR|                      dd                               dd                               dd                               d	d
          S t          j        dk    rt          j        | g          S t          j        |           S )zGQuote a placeholder value for its position in a shell command template.r   z'\''r   r   z\\z\"$z\$`z\`nt)replacer   r   
subprocesslist2cmdlineshlexr   )r   r   s     r   _quote_command_tts_placeholderr     s    }}S'***WT6""WS%  WS%  WS%  	
 
w$&w///;ur   placeholdersc                     d                     d D                       }t          j        d| d| d          }g dt          j        t                   dt          f fd}|                    |           }|                    d	d
                              dd          }D ]\  }}|                    ||          }|S )z@Replace supported placeholders while preserving ``{{`` / ``}}``.|c              3   >   K   | ]}t          j        |          V  d S N)reescape).0r   s     r   	<genexpr>z/_render_command_tts_template.<locals>.<genexpr>  s*      >>RYt__>>>>>>r   z(?<!\$)(?:\{\{(?P<double>z)\}\}|\{(?P<single>z)\})matchrJ   c                    |                      d          p|                      d          }dt                     d}                    |t          |         t	          |                                                     f           |S )Ndoublesingle__HERMES_TTS_PLACEHOLDER___)grouplenappendr   r   start)r   r   tokenr   r   replacementss      r   replace_matchz3_render_command_tts_template.<locals>.replace_match  s    {{8$$=H(=(=AC,=,=AAA*T"$%5u{{}}EE 
 	 	 	 r   z{{{z}}})joinr   compileMatchrP   subr   )	r   r   namespatternr   renderedr   r   r   s	   ``      @r   _render_command_tts_templater     s    
 HH>>>>>>>EjPuPPEPPP G +-L
RXc] 
s 
 
 
 
 
 
 
 
 {{=*:;;Hc**224==H$ 2 2u##E511Or   procc           	         |                                  dS t          j        dk    rk	 t          j        ddddt          | j                  gt          j        t          j        d           n$# t          $ r | 	                                 Y nw xY wdS 	 t          j
        | j        t          j                   n0# t          $ r Y dS t          $ r |                                  Y nw xY w	 |                     d	
           dS # t          j        $ r Y nw xY w	 t          j
        | j        t          j                   dS # t          $ r Y dS t          $ r | 	                                 Y dS w xY w)zCBest-effort termination of a shell process and all of its children.Nr   taskkillz/Fz/Tz/PID   )stdoutstderrr   rI   r   )pollr   r   r   runrP   pidDEVNULLr   killkillpgsignalSIGTERMProcessLookupError	terminatewaitTimeoutExpiredSIGKILL)r   s    r   #_terminate_command_tts_process_treer    s   yy{{	w$	NT4TX?!)!)	      	 	 	IIKKKKK	
	$(FN++++      		!	$   
	$(FN+++++      		sT   AA, ,BB$B8 8
C%C%$C%)D DD$D= =
E+
E+*E+r   r   c                    dt           j        t           j        dd}t          j        dk    rt	          t           dd          |d<   nd|d<   t          j        | fi |}	 |                    |          \  }}n# t           j        $ rz}t          |           	 |                    d	          \  }}n2# t          $ r% t	          |d
d          }t	          |dd          }Y nw xY wt          j        | |||          |d}~ww xY w|j
        rt          j        |j
        | ||          t          j        | |j
        ||          S )zGRun a command-provider shell command with process-tree timeout cleanup.T)shellr   r   textr   CREATE_NEW_PROCESS_GROUPr   creationflagsstart_new_sessionr   rH   outputNr   )r
  r   )r   PIPEr   r   getattrPopencommunicater  r  r   
returncodeCalledProcessErrorCompletedProcess)r   r   popen_kwargsr   r   r   excs          r   _run_command_ttsr  5  s    //	$ $L 
w$(/
<VXY(Z(Z_%%,0()G44|44D))')::$   +D111	2!--a-88NFFF 	2 	2 	2S(D11FS(D11FFF	2 '	
 
 

 	  
+O	
 
 
 	
 &wPPPs<   A8 8DC<B10C<1,C C<C  C<<Dpathc                 P    t          |          }|                     d|           S )zKReturn an output path whose extension matches the provider's output_format.r   )r   with_suffix)r  r   r   s      r   #_configured_command_tts_output_pathr  ]  s)    
(
0
0CIII&&&r   r  provider_namec                 n   t          |                    d          pd                                          }|st          d| d          t	          |                                          }|j                            dd           |                                r|	                                 t          |          }t          |t          |                    }|                    d|                    dd                    }	t          j                    5 }
t	          |
          dz  }|                    | d	
           t          |          t          |          t          |          |t          |                    dd                    t          |                    dd                    t          |	          d}t          ||          }	 t!          ||           n# t"          j        $ r}t'          d| d|dd          |d}~wt"          j        $ r}g }|j        r/|                    d|j                                                    |j        r/|                    d|j                                                    d                    |          pd}t'          d| d|j         d|           |d}~ww xY w	 ddd           n# 1 swxY w Y   |                                r|                                j        dk    rt'          d| d|           t          |          S )a
  Generate speech by running a user-configured shell command.

    Returns the absolute path of the audio file the command wrote.
    Raises ``ValueError`` when the provider config is invalid, and
    ``RuntimeError`` for timeouts / non-zero exits / empty output.
    r   r   ztts.providers.z.command is not configuredTparentsexist_okspeedz	input.txtutf-8)encodingvoicemodel)
input_path	text_pathr   r   r!  r"  r  zTTS provider 'z' timed out after gsNzstderr: zstdout: z; zno command outputz' exited with code z: r   z' produced no output at )rP   ri   rg   r   r   
expanduserparentmkdirexistsunlinkr   r   tempfileTemporaryDirectory
write_textr   r  r   r  RuntimeErrorr  r   r   r   r   r  statst_size)r  r   r  r   ra   r   r
  r   r   r  tmpdirr$  r   r   r  detail_partsdetails                    r   _generate_command_ttsr5  c  s    6::i006B77==?? 
F]FFF
 
 	
 +))++F
Mt444}} &v..G263v;;GGMJJw
w ; ;<<E		$	&	& &LL;.	TG444 i..Yv;;#GR0011GR0011ZZ
 
 //?NN	Wg....( 	 	 	NNN'NNNN  , 
	 
	 
	Lz E##$Csz/?/?/A/A$C$CDDDz E##$Csz/?/?/A/A$C$CDDDYY|,,C0CF. . .>. .%+. .  
	 /!              B ==?? 
fkkmm3q88L]LLFLL
 
 	
 v;;sD   B:KG&%K&K5HKB#KKKKKc                 T    | t                      } t          |           D ]\  }} dS dS )z=Return True when any command-type TTS provider is configured.NTF)r   r   )ra   _name_cfgs      r   _has_any_command_tts_providerr9    s:    %''
.z::  ttt5r   c                  .    t          j        d          duS )z+Check if ffmpeg is available on the system.ffmpegN)shutilwhichr   r   r   _has_ffmpegr>    s    <!!--r   mp3_pathc                    t                      sdS |                     dd          d         dz   }	 t          j        dd| dd	d
ddddd|dgdd          }|j        dk    rEt
                              d|j        |j                            dd          dd                    dS t          j
                            |          r%t          j
                            |          dk    r|S n# t          j        $ r t
                              d           Y nXt          $ r t
                              d           Y n3t          $ r'}t
                              d|d           Y d}~nd}~ww xY wdS )z
    Convert an MP3 file to OGG Opus format for Telegram voice bubbles.

    Args:
        mp3_path: Path to the input MP3 file.

    Returns:
        Path to the .ogg file, or None if conversion fails.
    Nr   rH   r   .oggr;  -i-acodeclibopus-acr   -b:a64k-vbroff-yT   capture_outputr   z0ffmpeg conversion failed with return code %d: %sr  ignoreerrors   z)ffmpeg OGG conversion timed out after 30szffmpeg not found in PATHz ffmpeg OGG conversion failed: %sr}   )r>  rsplitr   r   r  r   r   r   decoder   r  r*  getsizer  FileNotFoundErrorr   )r?  ogg_pathresultr   s       r   _convert_to_opusrX    s    == tsA&&q)F2HMtXy)CxG
 
 

 !!NNM +V]-A-A'RZ-A-[-[\`]`\`-ac c c47>>(## 	(A(AA(E(EO$ D D DBCCCCC 3 3 3122222 M M M91tLLLLLLLLM4s+   A2C* %AC* *)E*#E*:	E*E%%E*c           	        K   t                      }|                    di           }|                    dt                    }t          |                    d|                    dd                              }d|i}|dk    rt	          |dz
  dz            }|dd|d<    |j        | fi |}	|	                    |           d	{V  |S )
z
    Generate audio using Edge TTS.

    Args:
        text: Text to convert.
        output_path: Where to save the MP3 file.
        tts_config: TTS config dict.

    Returns:
        Path to the saved audio file.
    r<   r!  r        ?d   z+d%rateN)r   ri   DEFAULT_EDGE_VOICEr   roundCommunicatesave)
r  r   ra   	_edge_ttsedge_configr!  r  kwargspctr  s
             r   _generate_edge_ttsrf    s       !""I..,,KOOG%788E+//':>>'3+G+GHHIIEuF||US[C'((v')'7777K


;
'
''''''''r   c                    t          d          pd}|st          d          |                    di           }|                    dt                    }|                    dt                    }|                    d          rd}nd	}t                      } ||
          }	|	j                            | |||          }
t          |d          5 }|
D ]}|
                    |           	 ddd           n# 1 swxY w Y   |S )z
    Generate audio using ElevenLabs.

    Args:
        text: Text to convert.
        output_path: Where to save the audio file.
        tts_config: TTS config dict.

    Returns:
        Path to the saved audio file.
    ELEVENLABS_API_KEYr   z=ELEVENLABS_API_KEY not set. Get one at https://elevenlabs.io/rV   voice_idrd   rA  opus_48000_64mp3_44100_128api_keyr  ri  rd   r   wbN)r   r   ri   DEFAULT_ELEVENLABS_VOICE_IDrm   endswithr#   text_to_speechconvertopenwrite)r  r   ra   rm  	el_configri  rd   r   r!   clientaudio_generatorfchunks                r   _generate_elevenlabsr{    se    1228bG ZXYYY|R00I}}Z)DEEH}}Z)DEEH F## (''#%%JZ(((F+33#	 4  O 
k4	 	  A$ 	 	EGGENNNN	               s   C99C= C=c                    t                      \  }}|                    di           }|                    dt                    }|                    dt                    }|                    d|          }t	          |                    d|                    dd                              }|                    d          rd}	nd	}	t                      }
 |
||
          }	 t          ||| |	dt          t          j
                              i          }|dk    r!t          dt          d|                    |d<    |j        j        j        di |}|                    |           |t#          |dd          }t%          |          r |             S S # t#          |dd          }t%          |          r |             w w xY w)z
    Generate audio using OpenAI TTS.

    Args:
        text: Text to convert.
        output_path: Where to save the audio file.
        tts_config: TTS config dict.

    Returns:
        Path to the saved audio file.
    r&   r"  r!  base_urlr  rZ  rA  opusr   )rm  r}  zx-idempotency-key)r"  r!  inputresponse_formatextra_headersg      ?g      @closeNr   )#_resolve_openai_audio_client_configri   DEFAULT_OPENAI_MODELDEFAULT_OPENAI_VOICEr   rq  r(   rj   rP   uuiduuid4maxminaudiospeechcreatestream_to_filer  callable)r  r   ra   rm  r}  
oai_configr"  r!  r  r  r'   rw  create_kwargsresponser  s                  r   _generate_openai_ttsr  *  s    <==GX"--JNN7$899ENN7$899E~~j(33H*..*..#*F*FGGHHE F##   (**L\'H===F+.DJLL0A0AB
 
 
 C<<%(s3%?%?M'"-6<&->>>>,,,..E?? 	EGGGG	 ..E?? 	EGGGG	s   !BF -Gc                    ddl }t          d          pd                                }|st          d          |                    di           }t          |                    dt                                                              pt          }t          |                    dt                                                              pt          }t          |                    d	t                              }t          |                    d
t                              }	t          |                    d          pt          d          pt                                                                        d          }
|                    d          rdnd}| ||d}|dk    s|t          k    s|dk    r(|	t          k    rd|i}|r||d	<   |dk    r|	r|	|d
<   ||d<   |                    |
 dd| dt                      d|d          }|                                 t#          |d          5 }|                    |j                   ddd           n# 1 swxY w Y   |S )z
    Generate audio using xAI TTS.

    xAI exposes a dedicated /v1/tts endpoint instead of the OpenAI audio.speech
    API shape, so this is implemented as a separate backend.
    r   NXAI_API_KEYr   z5XAI_API_KEY not set. Get one at https://console.x.ai/rR   ri  languagesample_ratebit_rater}  XAI_BASE_URL/.wavr   r   )r  ri  r  codecr   z/ttsBearer application/json)AuthorizationContent-Typez
User-Agent<   )headersjsonr   ro  )requestsr   rg   r   ri   rP   DEFAULT_XAI_VOICE_IDDEFAULT_XAI_LANGUAGErl   DEFAULT_XAI_SAMPLE_RATEDEFAULT_XAI_BIT_RATEDEFAULT_XAI_BASE_URLrstriprq  postr   raise_for_statusrt  ru  content)r  r   ra   r  rm  
xai_configri  r  r  r  r}  r  payloadr   r  ry  s                   r   _generate_xai_ttsr  ]  s    OOO]++1r88::G RPQQQr**J:>>*.BCCDDJJLLdPdH:>>*.BCCDDJJLLdPdHjnn]4KLLMMK:>>*.BCCDDHz"" 	 ((	   eggffSkk	  !))&11<EEuE G 	111UNNx+???)0%(8 	7+6M-(E>>h>(0M*%#0 }}0w00./11
 

   	 	H 	k4	 	  "A	 !!!" " " " " " " " " " " " " " " s   /IIIc           	         ddl }t          d          pd}|st          d          |                    di           }|                    dt                    }|                    dt
                    }|                    d	|                    d	d
                    }|                    dd
          }	|                    dd          }
|                    dt                    }|                    d          rd}n|                    d          rd}nd}|| d|||	|
ddd|d
dd}dd| d}|                    |||d          }|	                                 |
                                }|                    di           }|                    dd           }|dk    r+|                    d!d"          }t          d#| d$|           |                    d%i                               d&d          }|st          d'          t                              |          }t          |d(          5 }|                    |           ddd           n# 1 swxY w Y   |S ))a  
    Generate audio using MiniMax TTS API.

    MiniMax returns hex-encoded audio data. Supports streaming (SSE) and
    non-streaming modes. This implementation uses non-streaming for simplicity.

    Args:
        text: Text to convert (max 10,000 characters).
        output_path: Where to save the audio file.
        tts_config: TTS config dict.

    Returns:
        Path to the saved audio file.
    r   NMINIMAX_API_KEYr   z@MINIMAX_API_KEY not set. Get one at https://platform.minimax.io/rS   r"  ri  r  rH   volpitchr}  r  r   .flacr   r   F)ri  r  r  r  i }  rF   )r  bitrater   channel)r"  r  streamvoice_settingaudio_settingr  r  )r  r  r  )r  r  r   	base_respstatus_code
status_msgunknown errorzMiniMax TTS API error (code ): datar  z%MiniMax TTS returned empty audio dataro  )r  r   r   ri   DEFAULT_MINIMAX_MODELDEFAULT_MINIMAX_VOICE_IDDEFAULT_MINIMAX_BASE_URLrq  r  r  r  r/  bytesfromhexrt  ru  )r  r   ra   r  rm  	mm_configr"  ri  r  r  r  r}  audio_formatr  r  r  rW  r  r  r  	hex_audioaudio_bytesry  s                          r   _generate_minimax_ttsr    s    OOO.//52G ][\\\y"--IMM'#899E}}Z)ABBHMM':>>'1#=#=>>E
--q
!
!CMM'1%%E}}Z)ABBH F## 			g	&	&   	
 
 !"	
 
 G& +,7,, G
 }}XGWb}QQH]]__F

;++I--r22Ka]]<AA
V+VV*VVWWW

62&&**7B77I DBCCC --	**K	k4	 	  A	               s   3IIIc                    t          d          pd}|st          d          |                    di           }|                    dt                    }|                    d          pt          }|                    d          rd}n2|                    d	          rd
}n|                    d          rd}nd}t                      }	  ||          5 }	|	j        j        	                    || ||          }
t          j        |
j                  }ddd           n# 1 swxY w Y   n^# t          $ r  t          $ rG}t                              d|d           t!          dt#          |          j                   |d}~ww xY wt'          |d          5 }|                    |           ddd           n# 1 swxY w Y   |S )zGenerate audio using Mistral Voxtral TTS API.

    The API returns base64-encoded audio; this function decodes it
    and writes the raw bytes to *output_path*.
    Supports native Opus output for Telegram voice bubbles.
    MISTRAL_API_KEYr   z?MISTRAL_API_KEY not set. Get one at https://console.mistral.ai/rT   r"  ri  rA  r~  r  r   r  r   r   rl  )r"  r  ri  r  NzMistral TTS failed: %sTr}   zMistral TTS failed: ro  )r   r   ri   DEFAULT_MISTRAL_TTS_MODELDEFAULT_MISTRAL_TTS_VOICE_IDrq  r-   r  r  completebase64	b64decode
audio_datar   r   errorr/  r   __name__rt  ru  )r  r   ra   rm  	mi_configr"  ri  r  r+   rw  r  r  r   ry  s                 r   _generate_mistral_ttsr    s    .//52G \Z[[[y"--IMM'#<==E}}Z((H,HHF##   			f	%	%  			g	&	&   $&&GMWW%%% 	@|*33! /	 4  H !*8+>??K	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@     M M M-q4@@@D$q''2BDDEE1LM 
k4	 	  A	               sO   	D* =DD* D""D* %D"&D* *F>AF  FF::F>F>	pcm_bytesr  channelssample_widthc                 @   ddl }||z  |z  }||z  }t          |           }|                    dddd|||||dz  	  	        }|                    dd	|          }	d
t          |          z   t          |	          z   |z   }
|                    dd|
d          }||z   |	z   | z   S )a  Wrap raw signed-little-endian PCM with a standard WAV RIFF header.

    Gemini TTS returns audio/L16;codec=pcm;rate=24000 -- raw PCM samples with
    no container. We add a minimal WAV header so the file is playable and
    ffmpeg can re-encode it to MP3/Opus downstream.
    r   Nz
<4sIHHIIHHs   fmt    rH      z<4sIs   data   z<4sI4ss   RIFFs   WAVE)structr   pack)r  r  r  r  r  	byte_rateblock_align	data_size	fmt_chunkdata_chunk_header	riff_sizeriff_headers               r   _wrap_pcm_as_wavr  &  s     MMMh&5I\)KII
	q
 
I FGY??C	NN"S):%;%;;iGI++hGDDK"%66BBr   c                 	   ddl }t          d          pt          d          pd                                }|st          d          |                    di           }t          |                    dt                                                              pt          }t          |                    d	t                                                              pt          }t          |                    d
          pt          d          pt                                                    	                    d          }dd| igigdgddd|iiidd}	| d| d}
|
                    |
d|iddi|	d          }|j        dk    r	 |                                                    di           }|                    d          p|j        dd         }n# t          $ r |j        dd         }Y nw xY wt          d |j         d!|           	 |                                }|d"         d         d#         d         }t!          d$ |D             d          }|t          d%          |                    d&          p|                    d'          pi }|                    d(d          }n2# t"          t$          t&          f$ r}t          d)|           |d}~ww xY w|st          d*          t)          j        |          }t-          |          }|                                                    d+          r?t3          |d,          5 }|                    |           ddd           n# 1 swxY w Y   |S t7          j        d+d-.          5 }|                    |           |j        }ddd           n# 1 swxY w Y   	 t=          j        d/          }|r|                                                    d0          r|d1|d2d3d4d5d6d7d8d9d:d;d|g}n	|d1|d:d;d|g}tA          j!        |d<d=>          }|j"        dk    r6|j#        $                    d?d@A          dd         }t          dB|           n0tJ          &                    dC|           t=          j'        ||           	 tQ          j)        |           n:# tT          $ r Y n.w xY w# 	 tQ          j)        |           w # tT          $ r Y w w xY wxY w|S )Da  Generate audio using Google Gemini TTS.

    Gemini's generateContent endpoint with responseModalities=["AUDIO"] returns
    raw 24kHz mono 16-bit PCM (L16) as base64. We wrap it with a WAV RIFF
    header to produce a playable file, then ffmpeg-convert to MP3 / Opus if
    the caller requested those formats (same pattern as NeuTTS).

    Args:
        text: Text to convert (prompt-style; supports inline direction like
              "Say cheerfully:" and audio tags like [whispers]).
        output_path: Where to save the audio file (.wav, .mp3, or .ogg).
        tts_config: TTS config dict.

    Returns:
        Path to the saved audio file.
    r   NGEMINI_API_KEYGOOGLE_API_KEYr   zIGEMINI_API_KEY not set. Get one at https://aistudio.google.com/app/apikeyrU   r"  r!  r}  GEMINI_BASE_URLr  partsr  AUDIOvoiceConfigprebuiltVoiceConfig	voiceName)responseModalitiesspeechConfig)contentsgenerationConfigz/models/z:generateContentrr   r  r  r  )paramsr  r  r   rQ  r  message,  zGemini TTS API error (HTTP r  
candidatesr  c              3   *   K   | ]}d |v sd|v 
|V  dS )
inlineDatainline_dataNr   )r   ps     r   r   z'_generate_gemini_tts.<locals>.<genexpr>  s7      WW|q/@/@MUVDVDV1DVDVDVDVWWr   z+Gemini TTS response contained no audio datar  r  r  z#Gemini TTS response was malformed: z$Gemini TTS returned empty audio datar  ro  Fr   deleter;  rA  rB  rC  rD  rE  r   rF  rG  rH  rI  rJ  	-loglevelTrK  rL  r  rN  rO  zffmpeg conversion failed: zEffmpeg not found; writing raw WAV to %s (extension may be misleading))+r  r   rg   r   ri   rP   DEFAULT_GEMINI_TTS_MODELDEFAULT_GEMINI_TTS_VOICEDEFAULT_GEMINI_TTS_BASE_URLr  r  r  r  r  r   r/  nextKeyError
IndexErrorr   r  r  r  rf   rq  rt  ru  r,  NamedTemporaryFiler   r<  r=  r   r   r  r   rS  r   r   copyfiler   removeOSError)r  r   ra   r  rm  gemini_configr"  r!  r}  r  endpointr  errr4  r  r  
audio_partinline	audio_b64r   r  	wav_bytesry  tmpwav_pathr;  cmdrW  r   s                                r   _generate_gemini_ttsr  H  s&   " OOO-..W-@P2Q2QWUW^^``G 
W
 
 	
 NN8R00M!!'+CDDEEKKMMiQiE!!'+CDDEEKKMMiQiE*%% 	'*++	'&  eggffSkk	  /01#*))K+? 
 

 
G ;;E;;;H}}w!34   H s""	)--//%%gr22CWWY''>8=#+>FF 	) 	) 	)]4C4(FFF	)K(*>KK6KK
 
 	
	M}}\"1%i09WWeWWWY]^^
LMMM--T1N1NTRTJJvr**		j), M M MDDDEE1LM  CABBB ++I ++I ##F++ +t$$ 	GGI	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 
	$F5	A	A	A S		)8              h'' 	3   ""++F33 	XD(y%E65+w tXt[';W^CbIIIF A%%--gh-GGM"#H#H#HIII & NNW   OHk222	Ih 	 	 	D		Ih 	 	 	D	 s   AG G-,G-BJ' 'K>KKM&&M*-M*	N22N69N6>C!S  R5 5
SSS,SS,
S)&S,(S))S,c                  f    	 ddl } | j                            d          duS # t          $ r Y dS w xY w)z=Check if the neutts engine is importable (installed locally).r   NrW   Fimportlib.utilutil	find_specr   	importlibs    r   _check_neutts_availabler    sP    ~''11==   uu   " 
00c                  f    	 ddl } | j                            d          duS # t          $ r Y dS w xY w)z@Check if the kittentts engine is importable (installed locally).r   Nr5   Fr  r  s    r   _check_kittentts_availabler    sP    ~''44D@@   uur  c                  Z    t          t          t                    j        dz  dz            S )z9Return path to the bundled default voice reference audio.neutts_sampleszjo.wavrP   r   __file__r(  r   r   r   _default_neutts_ref_audior$    $    tH~~$'77(BCCCr   c                  Z    t          t          t                    j        dz  dz            S )z>Return path to the bundled default voice reference transcript.r!  zjo.txtr"  r   r   r   _default_neutts_ref_textr'    r%  r   c                    ddl }|                    di           }|                    dd          pt                      }|                    dd          pt                      }|                    dd          }|                    d	d
          }|}	|                    d          s|                    dd          d         dz   }	t          t          t                    j	        dz            }
|j
        |
d| d|	d|d|d|d|g}t          j        |ddd          }|j        dk    rk|j                                        }d |                                D             }t#          dt%          d                              |          pd           |	|k    r`t)          j        d          }|r5|d|	ddd |g}t          j        |dd!"           t-          j        |	           nt-          j        |	|           |S )#a  Generate speech using the local NeuTTS engine.

    Runs synthesis in a subprocess via tools/neutts_synth.py to keep the
    ~500MB model in a separate process that exits after synthesis.
    Outputs WAV; the caller handles conversion for Telegram if needed.
    r   NrW   	ref_audior   ref_textr"  zneuphonic/neutts-air-q4-ggufdevicecpur  r   rH   zneutts_synth.pyz--textz--outz--ref-audioz
--ref-textz--modelz--deviceTr   rM  r  r   c                 <    g | ]}|                     d           |S )zOK:)
startswith)r   ls     r   
<listcomp>z$_generate_neutts.<locals>.<listcomp>  s)    QQQQQ\\%=P=PQqQQQr   zNeuTTS synthesis failed: 
   r  r;  rB  rJ  r  r  rK  checkr   )sysri   r$  r'  rq  rR  rP   r   r#  r(  
executabler   r   r  r   rg   
splitlinesr/  chrr   r<  r=  r   r  rename)r  r   ra   r5  neutts_configr)  r*  r"  r+  r  synth_scriptr  rW  r   error_linesr;  conv_cmds                    r   _generate_neuttsr>    s    JJJNN8R00M!!+r22Q6O6Q6QI  R00N4L4N4NHg'EFFEx//F H'' :%%c1--a069tH~~,/@@AAL$yh5FC ^C4MMMFA$$&&QQ&"3"3"5"5QQQes2ww||K7P7P7cTceefff ;h'' 	-hk7KXHN84<<<<Ih Ih,,,r   _piper_voice_cachec                  f    	 ddl } | j                            d          duS # t          $ r Y dS w xY w)z2Check whether the piper-tts package is importable.r   Nr:   Fr  r  s    r   _check_piper_availablerA  *  sP    ~''00<<   uur  c                  r    ddl m}  t           | dd                    }|                    dd           |S )zReturn the directory where Hermes caches Piper voice models.

    Resolves to ``~/.hermes/cache/piper-voices/`` under the active
    HERMES_HOME so voice downloads follow profile boundaries.
    r   rL   zcache/piper-voicespiper_voices_cacheTr  )rO   rM   r   r)  )rM   roots     r   _get_piper_voices_dirrE  3  sL     0/////35IJJKKDJJtdJ+++Kr   r!  download_dirc           
      P   | st           } t          |                                           }|j                                        dk    r#|                                rt          |          S ||  dz  }|                                r)||  dz                                  rt          |          S ddl}t          	                    d| |           	 t          j        |j        dd| dt          |          gd	d	d
          }n+# t          j        $ r}t          d|  d          |d}~ww xY w|j        dk    r:|j        pd                                pd}t          d|  d|dd                    |                                st          d| d          t          |          S )a}  Resolve *voice* (a model name or path) to a concrete .onnx file path.

    Accepts any of:
      - Absolute / expanded path to an .onnx file the user already has
      - A voice *name* like ``en_US-lessac-medium`` (downloads to
        ``download_dir`` on first use via ``python -m piper.download_voices``)

    Raises RuntimeError if the model can't be located or downloaded.
    z.onnxz
.onnx.jsonr   Nz0[Piper] Downloading voice '%s' to %s (first use)z-mzpiper.download_voicesz--download-dirTr  r-  z/Piper voice download timed out after 300s for 'r   r   zno stderr outputz!Piper voice download failed for 'z': i  z#Piper voice download completed but uh    is missing — check voice name (see: https://github.com/OHF-Voice/piper1-gpl/blob/main/docs/VOICES.md))DEFAULT_PIPER_VOICEr   r'  r   rf   r*  rP   r5  r   infor   r   r6  r  r/  r  r   rg   )r!  rF  	candidatecached_sysrW  r  r   s           r   _resolve_piper_voice_pathrM  ?  s     $# U&&((I7**y/?/?/A/A*9~~ uOOO+F}} Le+?+?+??GGII 6{{ 
KKBE<XXX	_d$;Us<002dC
 
 

 $   FeFFF
 
	
 A-%2,,..D2DHHH&#,HH
 
 	
 ==?? 
)& ) ) )
 
 	

 v;;s   0D D*D%%D*c                    t                      }ddl}t          |t                    r|                    di           ni                     d          pt
          }t                              d          pt                                                                }|	                    dd           t                              dd	                    }t          ||          }| d
| }	|	t          vrTt                              d|           |                    ||          t          |	<   t                              d           t          |	         }
d}t!          fddD                       }|r	 ddlm}  |t'                              dd                    t'                              dd                    t'                              dd                    t'                              dd                    t                              dd                              }n*# t(          $ r t                              d           Y nw xY w|}|                    d          s|                    dd          d         dz   }|                    |d          5 }||
                    | ||           n|
                    | |           ddd           n# 1 swxY w Y   ||k    rqt5          j        d          }|rF|d |d!d"d#|g}t9          j        |dd$%           	 t=          j        |           n%# t@          $ r Y nw xY wt=          j!        ||           |S )&zGenerate speech using the local Piper engine.

    Loads the voice model once per process (cached by absolute path) and
    writes a WAV file. Caller is responsible for converting to MP3/Opus
    via ffmpeg when a different output format is required.
    r   Nr:   r!  
voices_dirTr  use_cudaFz::cuda=z[Piper] Loading voice: %s)rP  z[Piper] Voice loadedc              3       K   | ]}|v V  	d S r   r   )r   kpiper_configs     r   r   z&_generate_piper_tts.<locals>.<genexpr>  s<         	
\     r   )length_scalenoise_scalenoise_w_scalevolumenormalize_audio)SynthesisConfigrT  rZ  rU  gMbX?rV  g?rW  rX  uZ   [Piper] SynthesisConfig not available in this piper-tts version — advanced knobs ignoredr  r   rH   ro  )
syn_configr;  rB  rJ  r  r  rK  r3  )"r;   waverh   rj   ri   rH  r   rE  r'  r)  rk   rM  r?  r   rI  loadanyr:   rY  r   r   r   rq  rR  rt  synthesize_wavr<  r=  r   r   r   r  r	  r9  )r  r   ra   r9   r[  
voice_namerF  rP  
model_path	cache_keyr!  rZ  has_advancedrY  r  wav_filer;  r=  rS  s                     @r   _generate_piper_ttsrd  s  s    JKKK2<Z2N2NV:>>'2...TVL!!'**A.AJ((66Q:O:Q:QRR]]__Ltd333L$$Z7788H*:|DDJ00h00I***/<<<(2
X(V(V9%*+++y)E
 J    ^    L  	------("<#3#3NC#H#HII!,"2"2=%"H"HII#L$4$4_c$J$JKK\--h<<== $\%5%56G%N%N O O  JJ  	 	 	NN5    	 H'' :%%c1--a069	8T	"	" 1h!  xJ GGGG  x000	1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ;h'' 		-hk7KXHN84<<<<	(####    Ih,,,s7   :B;H6 6$II,2K**K.1K.2M 
MM_kittentts_model_cachec                 n   t                      }|                    di           }|                    dt                    }|                    dt                    }|                    dd          }|                    dd          }|t          vrHt
                              d|            ||          t          |<   t
                              d	           t          |         }	|	                    | |||
          }
ddl}|}|	                    d          s|
                    dd          d         dz   }|                    ||
d           ||k    r`t          j        d          }|r5|d|ddd|g}t          j        |dd           t!          j        |           nt!          j        ||           |S )at  Generate speech using KittenTTS local ONNX model.

    KittenTTS is a lightweight TTS engine (25-80MB models) that runs
    entirely on CPU without requiring a GPU or API key.

    Args:
        text: Text to convert to speech.
        output_path: Where to save the audio file.
        tts_config: TTS config dict.

    Returns:
        Path to the saved audio file.
    r5   r"  r!  r  rZ  
clean_textTz[KittenTTS] Loading model: %sz%[KittenTTS] Model loaded successfully)r!  r  rg  r   Nr  r   rH   rE   r;  rB  rJ  r  r  rK  r3  )r6   ri   DEFAULT_KITTENTTS_MODELDEFAULT_KITTENTTS_VOICEre  r   rI  generate	soundfilerq  rR  ru  r<  r=  r   r   r   r  r9  )r  r   ra   r4   	kt_config
model_namer!  r  rg  r"  r  sfr  r;  r=  s                  r   _generate_kittenttsro    s    "##I{B//Iw(?@@JMM'#:;;EMM'3''E|T22J ///3Z@@@-6Yz-B-Bz*;<<<":.E NN4uEjNQQE H'' :%%c1--a069HHXue$$$ ;h'' 	-hk7KXHN84<<<<Ih Ih,,,r   c                      r                                  st          dd          S t                      t                    }t	          |          }t          |          }t                     |k    r4t                              d|t                     |            d|          ddl	m
}  |dd	                                          }|d
k    }|r4t          |                                          }|t          ||          }nt          j                                                            d          }	t          t$                    }
|
                    dd           |t)          |          }|
d|	 d| z  }n|r|dv r
|
d|	 dz  }n	|
d|	 dz  }|j                            dd           t-          |          	 |0t                              d|           t1           ||          n>|dk    rf	 t3                       n)# t4          $ r t7          j        dddd          cY S w xY wt                              d           t;                      n|dk    rf	 t=                       n)# t4          $ r t7          j        dddd          cY S w xY wt                              d           t?                      nf|dk    r-t                              d           tA                      n3|dk    r-t                              d           tC                      n |d k    rf	 tE                       n)# t4          $ r t7          j        dd!dd          cY S w xY wt                              d"           tG                      n|d#k    r-t                              d$           tI                      na|d%k    rTtK                      st7          j        dd&dd          S t                              d'           tM                      n|d(k    rf	 tO                       n)# t4          $ r t7          j        dd)dd          cY S w xY wt                              d*           tQ                      n|d+k    rf	 tS                       n)# t4          $ r t7          j        dd,dd          cY S w xY wt                              d-           tU                      n/d}	 tW                       n# t4          $ r d}Y nw xY w|rt                              d.           	 ddl,}|j-        .                    d/0          5 }|/                     fd1          0                    d23           ddd           n# 1 swxY w Y   n# tb          $ r& te          j3        ti                                Y nYw xY wtK                      r.t                              d4           d%}tM                      nt7          j        dd5dd          S tj          j6        7                              r#tj          j6        8                              dk    rt7          j        dd6| d7dd          S d}|Mts          |          r=:                    d          stw                    }|r|:                    d          }nH|d8v r+:                    d          stw                    }|r|d}n|d9v r:                    d          }tj          j6        8                              }t                              d:|d;|           d< }|rd=| }t7          j        d|||d>d          S # tx          $ r>}d?| d@| }t          =                    dA|           t          |d          cY d}~S d}~wt|          $ r@}dB| d@| }t          =                    dA|dC           t          |d          cY d}~S d}~wt~          $ r@}dD| d@| }t          =                    dA|dC           t          |d          cY d}~S d}~ww xY w)Eac  
    Convert text to speech audio.

    Reads provider/voice config from ~/.hermes/config.yaml (tts: section).
    The model sends text; the user configures voice and provider.

    On messaging platforms, the returned MEDIA:<path> tag is intercepted
    by the send pipeline and delivered as a native voice message.
    In CLI mode, the file is saved to ~/voice-memos/.

    Args:
        text: The text to convert to speech.
        output_path: Optional custom save path. Defaults to ~/voice-memos/<timestamp>.mp3

    Returns:
        str: JSON result with success, file_path, and optionally MEDIA tag.
    zText is requiredF)successz>TTS text too long for provider %s (%d chars), truncating to %dNr   )get_session_envHERMES_SESSION_PLATFORMr   telegramz%Y%m%d_%H%M%STr  tts_r   )r&   rV   rT   rU   rA  z.mp3z3Generating speech with command TTS provider '%s'...rV   z`ElevenLabs provider selected but 'elevenlabs' package not installed. Run: pip install elevenlabs)rq  r  )ensure_asciiz$Generating speech with ElevenLabs...r&   z<OpenAI provider selected but 'openai' package not installed.z$Generating speech with OpenAI TTS...rS   z%Generating speech with MiniMax TTS...rR   z!Generating speech with xAI TTS...rT   ziMistral provider selected but 'mistralai' package not installed. Run: pip install 'hermes-agent[mistral]'z-Generating speech with Mistral Voxtral TTS...rU   z+Generating speech with Google Gemini TTS...rW   zNeuTTS provider selected but neutts is not installed. Run hermes setup and choose NeuTTS, or install espeak-ng and run python -m pip install -U neutts[all].z(Generating speech with NeuTTS (local)...r5   zKittenTTS provider selected but 'kittentts' package not installed. Run 'hermes setup tts' and choose KittenTTS, or install manually: pip install https://github.com/KittenML/KittenTTS/releases/download/0.8.1/kittentts-0.8.1-py3-none-any.whlz2Generating speech with KittenTTS (local, ~25MB)...r:   zPiper provider selected but 'piper-tts' package not installed. Run 'hermes tools' and select Piper under TTS, or install manually: pip install piper-ttsz'Generating speech with Piper (local)...z"Generating speech with Edge TTS...rH   )max_workersc                  J    t          j        t                               S r   )asyncior   rf  )file_strr  ra   s   r   <lambda>z%text_to_speech_tool.<locals>.<lambda>  s    GK0B4S]0^0^$_$_ r   r  r   z9Edge TTS not available, falling back to NeuTTS (local)...zhNo TTS provider available. Install edge-tts (pip install edge-tts) or set up NeuTTS for local synthesis.z-TTS generation produced no output (provider: ))r<   rW   rS   rR   r5   r:   )rV   r&   rT   rU   z,TTS audio saved: %s (%s bytes, provider: %s),zMEDIA:z[[audio_as_voice]]
)rq  	file_path	media_tagr`   r   zTTS configuration error (r  z%szTTS dependency missing (r}   zTTS generation failed ()@rg   
tool_errorr   r   r   ry   r   r   r   gateway.session_contextrr  rf   r   r'  r  datetimenowstrftimeDEFAULT_OUTPUT_DIRr)  r   r(  rP   rI  r5  r#   r   r  dumpsr{  r(   r  r  r  r-   r  r  r  r>  r6   ro  r;   rd  r   concurrent.futuresfuturesThreadPoolExecutorsubmitrW  r/  ry  r   rf  r   r  r*  rT  r   rq  rX  r   r  rU  r   )r  r   r`   command_provider_configmax_lenrr  platform	want_opusr~  	timestampout_dirr   edge_available
concurrentpoolr   	opus_path	file_sizer  r   	error_msgrz  ra   s   `                    @@r   text_to_speech_toolr    s1   *  =tzz|| =,e<<<<!##JZ((H ?xTT 'x<<G
4yy7Lc$ii	
 	
 	
 HWH~ 8777778"==CCEEHZ'I  9%%0022	". <2 I %))++44_EE	)**dT222".01HIIC":":":S":"::II  	98'TTT"8"8"8"88II"8"8"8"88I 4$7779~~Hs4".KKEx   -h*A: HH %%'"$$$$ ' ' 'z$# # !&' ' ' ' ' ''
 KK>??? x<<<<!!'%'''' ' ' 'z$[# # !&' ' ' ' ' ''
 KK>??? x<<<<""KK?@@@!$*====KK;<<<dHj9999""'&(((( ' ' 'z$H# # !&	' ' ' ' ' '' KKGHHH!$*====!!KKEFFF x<<<<!!*,, 'z$F# # !&	' ' ' '
 KKBCCCT8Z8888$$'!#### ' ' 'z$J# #
 !&' ' ' ' ' '' KKLMMMh
;;;;  ' ' ' 'z$5# #
 !&' ' ' ' ' '' KKABBBh
;;;; "N' """" ' ' '!&'  '@AAAP----#+>>1>MM -QU______  &&,,,- - - - - - - - - - - - - - - $ P P PK 24: N NOOOOOP(** 	'WXXX# x<<<<z$E# # !&	' ' ' ' w~~h'' 	#27??8+D+D+I+I: TTTT  "# # # # !". 00GHH =((00 - 0 : :I  -#,#+#4#4V#<#< SSS\d\m\mnt\u\uS(22I ($#' FFF'0088GOOH--	BHQZN^N^`hiii (X''	 	;:y::Iz!"  0
 
    	  4 4 4@@@Q@@	T9%%%)U333333333 4 4 4?x??A??	T9t444)U333333333 4 4 4>h>>1>>	T9t444)U333333333	4sy  8^ H ^ #H;8^ :H;;6^ 2J  ^ #J'$^ &J''B^ M ^ #M96^ 8M99B^ 	3^ =Q ^ #Q2/^ 1Q226^ )R8 7^ 8#S^ S2^ T  ^  T/,^ .T//^ V5 ./V)V5 )V--V5 0V-1V5 4^ 5-W%"^ $W%%A^ =A^ C>^ 
a3%3_a3a3+5`& a3&a335a.(a3.a3c                  p   t                      rdS 	 t                       dS # t          $ r Y nw xY w	 t                       t	          d          rdS n# t          $ r Y nw xY w	 t                       t                      rdS n# t          $ r Y nw xY wt	          d          rdS t	          d          rdS t	          d          st	          d          rdS 	 t                       t	          d          rdS n# t          $ r Y nw xY wt                      rdS t                      rdS t                      rdS dS )	a,  
    Check if at least one TTS provider is available.

    Edge TTS needs no API key and is the default, so if the package
    is installed, TTS is available. A user-declared command provider
    also satisfies the requirement.

    Returns:
        bool: True if at least one provider can work.
    Trh  r  r  r  r  r  F)r9  r   r   r#   r   r(   _has_openai_audio_backendr-   r  r  rA  r   r   r   check_tts_requirementsr    s    %&& tt   -.. 	4	   $&& 	4	   &'' t]## t%&& -8H*I*I t   *++ 	4	      t!## t t5sA   " 
//A 
A A $B 
BBC6 6
DDc                      t                      } | rt          d          s	| t          fS t          d          }|$d}t	                      r|dz  }t          |          |j        t          |j        	                    d           dd          fS )zReturn direct OpenAI audio config or a managed gateway fallback.

    When ``tts.use_gateway`` is set in config, the Tool Gateway is preferred
    even if direct OpenAI credentials are present.
    r|   openai-audioNz8Neither VOICE_TOOLS_OPENAI_KEY nor OPENAI_API_KEY is setz5, and the managed OpenAI audio gateway is unavailabler  v1)
r   r   DEFAULT_OPENAI_BASE_URLr   r   r   nous_user_tokenr   gateway_originr  )direct_api_keymanaged_gatewayr  s      r   r  r  ?  s     233N 7oe44 76662>BBOL%'' 	ONNG!!!*G)0055888$- -  r   c                  V    t          t                      pt          d                    S )zPReturn True when OpenAI audio can use direct credentials or the managed gateway.r  )rk   r   r   r   r   r   r  r  U  s%    ,..^2N~2^2^___r   z(?<=[.!?])(?:\s|\n)|(?:\n\n)z```[\s\S]*?```z\[([^\]]+)\]\([^)]+\)zhttps?://\S+z\*\*(.+?)\*\*z	\*(.+?)\*z`(.+?)`z^#+\s*flagsz^\s*[-*]\s+z---+z\n{3,}c                 F   t                               d|           } t                              d|           } t                              d|           } t                              d|           } t
                              d|           } t                              d|           } t                              d|           } t                              d|           } t                              d|           } t                              d|           } |                                 S )z:Remove markdown formatting that shouldn't be spoken aloud. z\1r   z

)_MD_CODE_BLOCKr   _MD_LINK_MD_URL_MD_BOLD
_MD_ITALIC_MD_INLINE_CODE
_MD_HEADER_MD_LIST_ITEM_MD_HR_MD_EXCESS_NLrg   )r  s    r   _strip_markdown_for_ttsr  m  s    c4((D<<t$$D;;r4  D<<t$$D>>%&&Dud++D>>"d##DR&&D::b$DVT**D::<<r   
text_queue
stop_eventtts_done_eventdisplay_callbackc           
         |                                  	 ddt          t          t                      }|                    di           }|                    d          |                    d|                    d                    t          di |di |dii          t          d          pd}|st                              d           n	 t                      } ||	          n*# t          $ r t                              d
           Y nw xY w	 t                      }|                    ddd                                           nj# t          t          f$ r'}	t                              d|	           dY d}	~	n7d}	~	wt           $ r'}	t                              d|	           dY d}	~	nd}	~	ww xY wd}
d}d}d}g t#          j        dt"          j                  }dt(          ff	d}d                                 s;	 |                     |          }n5# t,          j        $ r# t1          |
          |k    r ||
           d}
Y ^w xY w|6|                    d|
          }
|
                                r ||
           n|
|z  }
|                    d|
          }
d|
v rd|
vr	 t6                              |
          }|n_|                                }|
d|         }|
|d         }
t1          |                                          |k     r||
z   }
n ||           |                                ;	 	 |                                  n# t,          j        $ r Y nw xY w,n2# t           $ r%}	t                              d|	           Y d}	~	nd}	~	ww xY w:	                                                                    n# t           $ r Y nw xY w|!                                 dS # :	                                                                    n# t           $ r Y nw xY w|!                                 w xY w)a  Consume text deltas from *text_queue*, buffer them into sentences,
    and stream each sentence through ElevenLabs TTS to the speaker in
    real-time.

    Protocol:
        * The producer puts ``str`` deltas onto *text_queue*.
        * A ``None`` sentinel signals end-of-text (flush remaining buffer).
        * *stop_event* can be set to abort early (e.g. user interrupt).
        * *tts_done_event* is **set** in the ``finally`` block so callers
          waiting on it (continuous voice mode) know playback is finished.
    NrV   ri  streaming_model_idrd   rh  r   z8ELEVENLABS_API_KEY not set; streaming TTS audio disabledrl  z8elevenlabs package not installed; streaming TTS disabledrE   rH   int16)
samplerater  dtypezsounddevice not available: %sz#sounddevice OutputStream failed: %s   r[  g      ?z<think[\s>].*?</think>r  sentencec                 H  	                                  rdS t          |                                           }|sdS |                                                    d          }
D ]0}|                                                    d          |k    r dS 1
                    |            |            dS t          |          k    r
|d         }	 j                            |d          }h|D ]a}                                 r nLddl	}|
                    ||j                  }                    |                    dd                     bdS dS  	|           dS # t          $ r&}t                              d	|           Y d}~dS d}~ww xY w)
z6Display sentence and optionally generate + play audio.Nz.!,	pcm_24000rn  r   )r  r  rH   z!Streaming TTS sentence failed: %s)is_setr  rg   rf   r  r   r   rr  rs  numpy
frombufferr  ru  reshaper   r   r   )r  cleanedcleaned_lowerprev
audio_iterrz  _npaudio_arrayr  _play_via_tempfile_spoken_sentencesrw  r  rd   output_streamr  stream_max_lenri  s            r   _speak_sentencez.stream_tts_to_speaker.<locals>._speak_sentence  s     "" -h77==??G #MMOO22599M)  ::<<&&u-->>FF ?$$W---+  ***~7||n,,!/>/2I#2:: %%"-	 ;  
 !,!+ H H%,,.. "!E++++&)nnU#)n&L&L%++K,?,?A,F,FGGGGH H!E '&z:>>>>> I I IBCHHHHHHHHHIs   BE1 #E1 1
F!;FF!c                    d}	 ddl }t          j        dd          }|j        }|                    |d          5 }|                    d           |                    d           |                    d	           | D ]-}|                                r n|	                    |           .ddd           n# 1 swxY w Y   dd
l
m}  ||           n2# t          $ r%}t                              d|           Y d}~nd}~ww xY w|r(	 t          j        |           dS # t"          $ r Y dS w xY wdS # |r&	 t          j        |           w # t"          $ r Y w w xY ww xY w)z0Write PCM chunks to a temp WAV file and play it.Nr   r  Fr  ro  rH   rI   rE   )play_audio_filez!Temp-file TTS fallback failed: %s)r[  r,  r  r   rt  setnchannelssetsampwidthsetframerater  writeframestools.voice_moder  r   r   r   r   r+  r	  )	r  stop_evttmp_pathr[  r  wfrz  r  r  s	            r   r  z1stream_tts_to_speaker.<locals>._play_via_tempfile  s   H1NNN8YYsD)) .ROOA&&&OOA&&&OOE***!+ . .#??,, "!Eu----. . . . . . . . . . . . . . . =<<<<<)))) I I IBCHHHHHHHHI  	(+++++"    8 	(++++"   s   7C A0B7+C 7B;;C >B;?C D2 
DC>9D2 >DD2 	D 
D-,D-2E6E
E
EEEEr   z<thinkz</think>Tz Streaming TTS pipeline error: %s)"clearrp  %DEFAULT_ELEVENLABS_STREAMING_MODEL_IDr   ri   ry   r   r   r   r#   r   r1   OutputStreamr   r	  r   r   r   r   DOTALLrP   r  queueEmptyr   r   rg   _SENTENCE_BOUNDARY_REsearchend
get_nowaitstopr  set)r  r  r  r  ra   rv  rm  r!   r0   r  sentence_bufmin_sentence_lenlong_flush_lenqueue_timeout_think_block_rer  deltamend_posr  r  r  rw  rd   r  r  ri  s    ` `                @@@@@@@r   stream_tts_to_speakerr  |  s   " {.8%''
NN<44	==X66==!5!*z8!D!DF F 2MzM<)LI)Lz8)L)LMM
 

 !!566<" 	)NNUVVVV[/11
#G444 [ [ [YZZZZZ[ !),..B$&OO#(1G %4 % %M "''))))#W- ) ) )LL!@#FFF$(MMMMMM  ) ) )NN#H#NNN$(MMMMMM) ')*%>biPPP(	Ic (	I (	I (	I (	I (	I (	I (	I (	I (	I (	I (	I (	I (	I (	IT	 	 	4 ##%% *	*"}==;   |$$~55#OL111#%L }.222|DD%%'' 2#OL111E!L
 +..r<@@L <''Jl,J,J*)00>>9%%'''1+GHH5x~~''((+;;;#+l#:L)))*? ##%% *	*Z	%%'''';   	   @ @ @93????????@ $""$$$##%%%%    $""$$$##%%%%   s  C N  C; :N ;$D"N !D""N (:E# "N #G
4FN G
#G N G

AN %H< ;N </I.+N -I..C2N !M6 5N 6NN NN P 
N<N72P 7N<<P (O+ +
O87O8Q#(P>=Q#>
QQ#
QQ#__main__u   🔊 Text-to-Speech Tool Modulez2==================================================c                 >    	  |              dS # t           $ r Y dS w xY w)NTF)r   )importerlabels     r   _checkr  T  s9    	HJJJ4 	 	 	55	s   
 
z
Provider availability:z  Edge TTS:   	installedz$not installed (pip install edge-tts)z  ElevenLabs: elz&not installed (pip install elevenlabs)z    API Key:  rh  r  znot setz  OpenAI:     oaiznot installedz2not set (VOICE_TOOLS_OPENAI_KEY or OPENAI_API_KEY)z  MiniMax:    r  zAPI key setznot set (MINIMAX_API_KEY)z  Piper:      z%not installed (pip install piper-tts)z  ffmpeg:     u	   ✅ foundu(   ❌ not found (needed for Telegram Opus)z
  Output dir: z  Configured provider: )registryr  rr  a  Convert text to speech audio. Returns a MEDIA: path that the platform delivers as native audio. Compatible providers render as a voice bubble on Telegram; otherwise audio is sent as a regular attachment. In CLI mode, saves to ~/voice-memos/. Voice and provider are user-configured (built-in providers like edge/openai or custom command providers under tts.providers.<name>), not model-selected.objectstringzThe text to convert to speech. Provider-specific character caps apply and are enforced automatically (OpenAI 4096, xAI 15000, MiniMax 10000, ElevenLabs 5k-40k depending on model); over-long input is truncated.)r   descriptionz9Optional custom file path to save the audio. Defaults to z/audio_cache/<timestamp>.mp3r  r   )r   
propertiesrequired)r   r  
parametersr|   c                 r    t          |                     dd          |                     d                    S )Nr  r   r   r  )r  ri   )argskws     r   r{  r{    s6    2XXfb!!HH]++ -  -  - r   u   🔊)r   toolsetschemahandlercheck_fnemojir   )__doc__ry  r  r  r  loggingr   r  r   r   r<  r   r   r,  	threadingr  pathlibr   typingr   r   r   r   urllib.parser   rO   r	   	getLoggerr  r   r   tools.managed_tool_gatewayr   tools.tool_backend_helpersr   r   r   tools.xai_httpr   r   r#   r(   r-   r1   r6   r;   r   r^  rp  rm   r  r  rh  ri  rH  r  r  r  r  r  r  r  r  r  r  r  r  r   r  r  GEMINI_TTS_SAMPLE_RATEGEMINI_TTS_CHANNELSGEMINI_TTS_SAMPLE_WIDTHrP   rQ   r  rX   rl   __annotations__r_   re   MAX_TEXT_LENGTHry   r   r   	frozensetrn   r   r   r   rq   r   ro   rk   rp   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r5  r9  r>  rX  rf  r{  r  r  r  r  r  r  r  r  r  r$  r'  r>  r?  rA  rE  rM  rd  re  ro  r  r  tupler  r  r   r  r  r  r  r  r  r  	MULTILINEr  r  r  r  r  QueueEventr  printr  r   r`   tools.registryr  r  
TTS_SCHEMAregisterr   r   r   <module>r     sz  ! ! !F      				  				                    0 0 0 0 0 0 0 0 0 0 0 0             0 0 0 0 0 0		8	$	$/ / / / D C C C C C p p p p p p p p p p 0 0 0 0 0 0  
  
  
  
    	 	 	  ' 4 6 (; %( = " +  5 ' 2 = 3 E     , 9 ! P    = = = = = -,..  , , $sCx.    ##""	4 	4  $sCx. 	 	 	    +
 ,05$ 5$sm5$c3h(5$ 	5$ 5$ 5$ 5$v$sCx.    &Ld38n L L L L LH "	 # # #    '* #$) !&Y'D'D'DEE &* #8d38n 8C 8DcN 8 8 8 8S#X
 
#s(^   0>S#X >4 > > > >S#X d38n   * S#X        	T#s(^ 	 	 	 	 	 "&[ [cN[#[ 	[ [ [ [$T#s(^     3 # (3-    @# hsm PS    "sCx. 	   <#j.> #4 # # # #L%Qc %QE %Qj6Q %Q %Q %Q %QP'd 'DcN 't ' ' ' 'A
AA A cN	A
 S#XA 	A A A AH htCH~.F RV    .T . . . .
 s  x}        L3 S d3PS8n Y\    <(s ( ($sCx. (UX ( ( ( (\-s - -$sCx. -UX - - - -f;C ;c ;tCH~ ;RU ; ; ; ;BQ Q# Q4S> QVY Q Q Q Qn+ +# +4S> +VY + + + +f .'/	C CCC C 	C
 C C C CD@s @ @$sCx. @UX @ @ @ @N    D    D3 D D D D
D# D D D D
23 2S 2d38n 2QT 2 2 2 2x &( DcN ' ' '    	t 	 	 	 	1S 1 1 1 1 1 1hKc K Kc3h KTW K K K Kf *, S#X + + +4c 4 4c3h 4TW 4 4 4 4x "&B4 B4
B4#B4 	B4 B4 B4 B4P1 1 1 1 1hU38_    ,`4 ` ` ` ` #
#BCC  -..2:.//
"*_
%
%2:&''RZ%%
"*Z((RZ	666

>>>>	G		
9%%# #    & 9=	N NNN ON xt45	N N N Nh z	E
+,,,	E(OOO   
E
$%%%	E
x&&1A6*J*Jv;;Pv
x
xyyy	E
z&&1CT*J*Jx;;Px
z
z{{{	E
XMM2F$G$GV55Y
X
XYYY	E
e&&1F*N*Nc;;Tc
e
efff	E	o0022l558l	o 	o   
E
oMM:K,L,Lm==Rm
o
oppp	E
q*@*@*B*Bo;;Ho
q
qrrr	E
i++--g;;=g
i
ijjj	E
1/
1
1222F}V$$H	E
.H
.
./// 0 / / / / / / /  ` !  s 
 !  O[n[n[p[p   O   O   O 	
 	
 H  
&  	- - $
	 	 	 	 	 	r   