
    i]              	       ~   d Z ddlZddlZddlZddlZddlZddlZddlm	Z	 ddl
mZ ddlmZ ddlmZmZmZmZmZmZmZmZ ddlmZmZ ddlZddlZddlmZ d	d
lmZm Z m!Z!m"Z"m#Z#m$Z$ d	dl%m&Z& d	dl'm(Z( d	dl)m*Z*m+Z+m,Z,m-Z- d	dlm.Z.m/Z/  ej0         ej1                              Z2de3de4deee3e3f         e3f         fdZ5dee6e3f         de6fdZ7de6fdZ8de3de4de4fdZ9de3de4fdZ:de3de4de4fdZ;dee6e3f         de4dee3ddf         fdZ<d e&d!ee6e3f         de6fd"Z=de6fd#Z>d$e6d%e6d&e6de6fd'Z? G d( d)          Z@dS )*zCommunicate with the service. Only the Communicate class should be used by
end-users. The other classes and functions are for internal use only.    N)nullcontext)TextIOWrapper)Queue)AsyncGeneratorContextManagerDict	GeneratorListOptionalTupleUnion)escapeunescape)Literal   )DEFAULT_VOICEMP3_BITRATE_BPSSEC_MS_GEC_VERSIONTICKS_PER_SECONDWSS_HEADERSWSS_URL)	TTSConfig)DRM)NoAudioReceivedUnexpectedResponseUnknownResponseWebSocketError)CommunicateStateTTSChunk)cafiledataheader_lengthreturnc                     t          | t                    st          d          i }| d|                             d          D ] }|                    dd          \  }}|||<   !|| |dz   d         fS )z
    Returns the headers and data from the given data.

    Args:
        data (bytes): The data to be parsed.
        header_length (int): The length of the header.

    Returns:
        tuple: The headers and data to be used in the request.
    zdata must be bytesNs   
   :r      )
isinstancebytes	TypeErrorsplit)r!   r"   headerslinekeyvalues         [/home/ubuntu/.hermes/hermes-agent/venv/lib/python3.11/site-packages/edge_tts/communicate.pyget_headers_and_datar0   2   s     dE"" .,---G^m^$**733  ZZa((
UD*,,---    stringc                    t          | t                    r|                     d          } t          | t                    st	          d          t          |           }t          |          D ]G\  }}t          |          }d|cxk    rdk    s!n d|cxk    rdk    sn d|cxk    rdk    rn Bd	||<   Hd
                    |          S )aS  
    The service does not support a couple character ranges.
    Most important being the vertical tab character which is
    commonly present in OCR-ed PDFs. Not doing this will
    result in an error from the service.

    Args:
        string (str or bytes): The string to be cleaned.

    Returns:
        str: The cleaned string.
    utf-8zstring must be str or bytesr                    )	r'   r(   decodestrr)   list	enumerateordjoin)r2   charsidxcharcodes        r/   remove_incompatible_charactersrF   J   s     &%   (w''fc"" 75666F||Eu%%  	TIINNNNNNNNd 0 0 0 0b 0 0 0 0bD6F6F6F6FB6F6F6F6F6FE#J775>>r1   c                  2    t          j                    j        S )zZ
    Returns a UUID without dashes.

    Returns:
        str: A UUID without dashes.
    )uuiduuid4hex r1   r/   
connect_idrL   f   s     :<<r1   textlimitc                 n    |                      dd|          }|dk     r|                      dd|          }|S )a  
    Finds the index of the rightmost preferred split character (newline or space)
    within the initial `limit` bytes of the text.

    This helps find a natural word or sentence boundary for splitting, prioritizing
    newlines over spaces.

    Args:
        text (bytes): The byte string to search within.
        limit (int): The maximum index (exclusive) to search up to.

    Returns:
        int: The index of the last found newline or space within the limit,
             or -1 if neither is found in that range.
       
r       )rfind)rM   rN   split_ats      r/   (_find_last_newline_or_space_within_limitrT   p   s<    " zz%E**H!||::dAu--Or1   text_segmentc                     t          |           }|dk    r;	 | d|                             d           |S # t          $ r |dz  }Y nw xY w|dk    ;|S )a  
    Finds the rightmost possible byte index such that the
    segment `text_segment[:index]` is a valid UTF-8 sequence.

    This prevents splitting in the middle of a multi-byte UTF-8 character.

    Args:
        text_segment (bytes): The byte segment being considered for splitting.

    Returns:
        int: The index of the safe split point. Returns 0 if no valid split
             point is found (e.g., if the first byte is part of a multi-byte
             sequence longer than the limit allows).
    r   Nr4   r   )lenr<   UnicodeDecodeError)rU   rS   s     r/   _find_safe_utf8_split_pointrY      s{     <  H
Q,,	(#**7333O! 	 	 	MHHH	 Q,, Os   6 AArS   c                     |dk    rSd| d|         v rG|                      dd|          }|                     d||          dk    rn|}|dk    rd| d|         v G|S )a  
    Adjusts a proposed split point backward to prevent splitting inside an XML entity.

    For example, if `text` is `b"this &amp; that"` and `split_at` falls between
    `&` and `;`, this function moves `split_at` to the index before `&`.

    Args:
        text (bytes): The text segment being considered.
        split_at (int): The proposed split point index, determined by whitespace
                        or UTF-8 safety.

    Returns:
        int: The adjusted split point index. It will be moved to the '&'
             if an unterminated entity is detected right before the original `split_at`.
             Otherwise, the original `split_at` is returned.
    r      &N   ;)rindexfind)rM   rS   ampersand_indexs      r/   "_adjust_split_point_for_xml_entityra      s}    " Q,,44		?22++dAx8899T?H55;; # Q,,44		?22 Or1   byte_lengthc              #   P  K   t          | t                    r|                     d          } t          | t                    st	          d          |dk    rt          d          t          |           |k    rt          | |          }|dk     rt          |           }t          | |          }|dk     rt          d          | d|         
                                }|r|V  | |dk    r|ndd         } t          |           |k    | 
                                }|r|V  dS dS )a  
    Splits text into chunks, each not exceeding a maximum byte length.

    This function prioritizes splitting at natural boundaries (newlines, spaces)
    while ensuring that:
    1. No chunk exceeds `byte_length` bytes.
    2. Chunks do not end with an incomplete UTF-8 multi-byte character.
    3. Chunks do not split XML entities (like `&amp;`) in the middle.

    Args:
        text (str or bytes): The input text. If str, it's encoded to UTF-8.
        byte_length (int): The maximum allowed byte length for any yielded chunk.
                           Must be positive.

    Yields:
        bytes: Text chunks (UTF-8 encoded, stripped of leading/trailing whitespace)
               that conform to the byte length and integrity constraints.

    Raises:
        TypeError: If `text` is not str or bytes.
        ValueError: If `byte_length` is not positive, or if a split point
                    cannot be determined (e.g., due to extremely small byte_length
                    relative to character/entity sizes).
    r4   ztext must be str or bytesr   z"byte_length must be greater than 0zTMaximum byte length is too small or invalid text structure near '&' or invalid UTF-8Nr   )r'   r=   encoder(   r)   
ValueErrorrW   rT   rY   ra   strip)rM   rb   rS   chunkremaining_chunks        r/   split_text_by_byte_lengthri      s`     6 $ ${{7##dE"" 53444a=>>>
d))k
!
!;D+NNa<<2488H 6dHEEa<< C   YhY%%'' 	KKK 1HH!5565 d))k
!
!: jjllO  r1   tcescaped_textc                     t          |t                    r|                    d          }d| j         d| j         d| j         d| j         d| dS )z
    Creates a SSML string from the given parameters.

    Args:
        tc (TTSConfig): The TTS configuration.
        escaped_text (str or bytes): The escaped text. If bytes, it must be UTF-8 encoded.

    Returns:
        str: The SSML string.
    r4   z_<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='en-US'><voice name='z'><prosody pitch='z' rate='z
' volume='z'>z</prosody></voice></speak>)r'   r(   r<   voicepitchratevolume)rj   rk   s     r/   mkssmlrq     s     ,&& 4#**733		 	8	 	-/W	 	@B		 	 	 	 	r1   c                  N    t          j        dt          j                              S )zg
    Return Javascript-style date string.

    Returns:
        str: Javascript-style date string.
    z:%a %b %d %Y %H:%M:%S GMT+0000 (Coordinated Universal Time))timestrftimegmtimerK   r1   r/   date_to_stringrv      s#     =Ddkmm  r1   
request_id	timestampssmlc                     d|  d| d| S )z
    Returns the headers and data to be used in the request.

    Returns:
        str: The headers and data to be used in the request.
    zX-RequestId:z1
Content-Type:application/ssml+xml
X-Timestamp:zZ
Path:ssml

rK   )rw   rx   ry   s      r/   ssml_headers_plus_datar{   0  s4    	z 	 	 	 	 		 	r1   c                      e Zd ZdZefdddddddddd	ed
ededededed         deej	                 dee         dee
         dee
         fdZdedefdZd dZdeedf         fdZdeedf         fdZ	 d!deeef         deeeef                  ddfdZdeeddf         fdZ	 d!deeef         deeeef                  ddfdZdS )"Communicatez'
    Communicate with the service.
    z+0%z+0HzSentenceBoundaryN
   <   )ro   rp   rn   boundary	connectorproxyconnect_timeoutreceive_timeoutrM   rm   ro   rp   rn   r   WordBoundaryr~   r   r   r   r   c                   t          |||||          | _        t          |t                    st	          d          t          t          t          |                    d          | _        |$t          |t                    st	          d          || _	        t          |	t                    st	          d          t          |
t                    st	          d          t          j        d d |	|
          | _        |)t          |t          j                  st	          d          || _        dd	d	d
d	d	d| _        d S )Nztext must be stri   zproxy must be strzconnect_timeout must be intzreceive_timeout must be int)totalconnectsock_connect	sock_readz'connector must be aiohttp.BaseConnectorr1   r   F)partial_textoffset_compensationlast_duration_offsetstream_was_calledchunk_audio_bytescumulative_audio_bytes)r   
tts_configr'   r=   r)   ri   r   rF   textsr   intaiohttpClientTimeoutsession_timeoutBaseConnectorr   state)selfrM   rm   ro   rp   rn   r   r   r   r   r   s              r/   __init__zCommunicate.__init__G  sU    $E4II $$$ 	0./// /1$7788
 

 Zs%;%;/000$)
 /3// 	;9:::/3// 	;9:::&4(%	 
  
  
  Iw?T)U)U EFFF:C  #$$%!&!"&'(
 (



r1   r!   r#   c                 B   t          j        |          d         D ]v}|d         }|dv rR|d         d         | j        d         z   }|d         d         }|||t          |d         d         d	                   d
c S |dv ret	          d|           t          d          )NMetadataTyper   DataOffsetr   DurationrM   Text)typeoffsetdurationrM   )
SessionEndzUnknown metadata type: zNo WordBoundary metadata found)jsonloadsr   r   r   r   )r   r!   meta_obj	meta_typecurrent_offsetcurrent_durations         r/   __parse_metadatazCommunicate.__parse_metadata  s    
4((4 	I 	IH (I@@@V$X.<Q1RR  $,F#3J#? %, 0$Xf%5f%=f%EFF	     O++!"GI"G"GHHH !ABBBr1   c                     | j         dxx         | j         d         z  cc<   | j         d         dz  t          z  t          z  | j         d<   d| j         d<   dS )a  Update inter-chunk offset_compensation from cumulative CBR audio bytes.

        The output format is audio-24khz-48kbitrate-mono-mp3 (48 kbps CBR).
        For any CBR stream the byte-to-tick conversion is exact integer
        arithmetic:  ticks = total_bytes * 8 * 10_000_000 // 48_000.

        This replaces the previous metadata-based accumulation which drifted
        on long texts due to variable AI silence and Microsoft's integer
        overflow in reported offsets.
        r   r   r5   r   r   N)r   r   r   )r   s    r/   __compensate_offsetzCommunicate.__compensate_offset  sr     	
+,,,
;N0OO,,,J/0  	
() +,
&'''r1   c           
     x   K   d' fd}d' fd}d}t          j         j        d j                  4 d {V 	 }|                    t
           dt                       dt          j                     d	t           d
 j
        t          j        t                    t                    4 d {V 	  |             d {V   |             d {V  2 3 d {V }|j        t           j        j        k    r|j                            d          }t'          ||                    d                    \  }}|                    dd           }	|	dk    r4                     |          }
|
W V  |
d         |
d         z    j        d<   |	dk    r                                   n|	dvrt3          d          |j        t           j        j        k    rUt7          |j                  dk     rt9          d          t:                              |j        d d         d          }|t7          |j                  k    rt9          d          t'          |j        |          \  }}|                    d          dk    rt9          d          |                    dd           }|dvrt9          d          |$t7          |          dk    rt9          d           t7          |          dk    rt9          d!          d} j        d"xx         t7          |          z  cc<   d#|d$W V  R|j        t           j        j        k    rtA          |j        r|j        nd%          6 |stC          d&          	 d d d           d {V  n# 1 d {V swxY w Y   d d d           d {V  d S # 1 d {V swxY w Y   d S )(Nr#   c            	         K   j         j        dk    } | rdnd}| sdnd}                    dt                       d| d| d           d{V  dS )	z)Sends the command request to the service.r   truefalsezX-Timestamp:z
Content-Type:application/json; charset=utf-8
Path:speech.config

{"context":{"synthesis":{"audio":{"metadataoptions":{"sentenceBoundaryEnabled":"z","wordBoundaryEnabled":"z9"},"outputFormat":"audio-24khz-48kbitrate-mono-mp3"}}}}
N)r   r   send_strrv   )word_boundarywdsqr   	websockets      r/   send_command_requestz2Communicate.__stream.<locals>.send_command_request  s       O4FM(5gB,9'B$$~//   /1	  LN	  	 	 	 	 	 	 	 	 	 	 	r1   c                     K                        t          t                      t                      t	           j         j        d                                        d{V  dS )z&Sends the SSML request to the service.r   N)r   r{   rL   rv   rq   r   r   )r   r   s   r/   send_ssml_requestz/Communicate.__stream.<locals>.send_ssml_request  s      $$&LL"$$
>2  	 	 	 	 	 	 	 	 	 	 	r1   FT)r   	trust_envtimeoutz&ConnectionId=z&Sec-MS-GEC=z&Sec-MS-GEC-Version=   )compressr   r+   sslr4   s   

s   Paths   audio.metadatar   r   r   s   turn.end)s   responses
   turn.startzUnknown path receivedr&   zBWe received a binary message, but it is missing the header length.bigz9The header length is greater than the length of the data.s   audioz3Received binary message, but the path is not audio.s   Content-Type)s
   audio/mpegNz=Received binary message, but with an unexpected Content-Type.r   z<Received binary message with no Content-Type, but with data.z:Received binary message, but it is missing the audio data.r   audio)r   r!   zUnknown errorzFNo audio was received. Please verify that your parameters are correct.r#   N)"r   ClientSessionr   r   
ws_connectr   rL   r   generate_sec_ms_gecr   r   headers_with_muidr   _SSL_CTXr   	WSMsgTypeTEXTr!   rd   r0   r_   get_Communicate__parse_metadatar   _Communicate__compensate_offsetr   BINARYrW   r   r   
from_bytesERRORr   r   )r   r   r   audio_was_receivedsessionreceivedencoded_data
parametersr!   pathparsed_metadatar"   content_typer   s   `            @r/   __streamzCommunicate.__stream  s-     	 	 	 	 	 	 	 	 	 	 	 	 	 	  # (n(
 
 
 g	 g	 g	 g	 g	 g	 g	 g	 g(( 8 8jll 8 82448 8#58 8 *)+66 ) 
 
	g	 g	 g	 g	 g	 g	 g	 g	 &&(((((((((##%%%%%%%%%"+ Q Q Q Q Q Q Qh=G$5$:::*2-*>*>w*G*GL';$l&7&7&D&D( ($J &>>'488D000*.*?*?*E*E----- ,H5
8SS 
#9:: ,, 00222%AAA-.EFFF B]g&7&>>>8=))A--0`  
 %(NN8=!3De$L$LM$s8='9'9990W  
 (< }( ($J
 "~~g..(::0Q   $.>>/4#H#HL#+@@@0[  
 $+t99>>$ 1Z  
 4yyA~~0X  
 *.&J2333s4yy@333#*D9999999]g&7&===()1KO   >] #,f & %\   Kg	 g	 g	 g	 g	 g	 g	 g	 g	 g	 g	 g	 g	 g	 g	 g	 g	 g	 g	 g	 g	 g	 g	 g	 g	 g	 g	 g	 g	 g	 g	 g	 g	 g	 g	 g	 g	 g	 g	 g	 g	 g	 g	 g	 g	 g	 g	 g	 g	 g	 g	 g	 g	 g	 g	 g	 g	sC   A4N)2#NM JN3N)
N	N)N	N))
N36N3c                  K   | j         d         rt          d          d| j         d<   | j        D ]| j         d<   d| j         d<   	 |                                 2 3 d{V }|W V  6 8# t          j        $ rV}|j        dk    r t          j        |           d| j         d<   |                                 2 3 d{V }|W V  6 Y d}~d}~ww xY wdS )	au  
        Streams audio and metadata from the service.

        Raises:
            NoAudioReceived: If no audio is received from the service.
            UnexpectedResponse: If the response from the service is unexpected.
            UnknownResponse: If the response from the service is unknown.
            WebSocketError: If there is an error with the websocket.
        r   zstream can only be called once.Tr   r   r   Ni  )	r   RuntimeErrorr   _Communicate__streamr   ClientResponseErrorstatusr   handle_client_response_error)r   messagees      r/   streamzCommunicate.stream6  sI      :)* 	B@AAA*.
&' +/* 	" 	"DJ~&./DJ*+
"%)]]__ " " " " " " "'!MMMMM &5_. " " "8s??033323
./%)]]__ " " " " " " "'!MMMMM &5_____"	" 	"s0   A(A&A((C7>C5C;CCaudio_fnamemetadata_fnamec                   K   |t          |dd          nt                      }|5  t          |d          5 }|                                 2 3 d{V }|d         dk    r|                    |d                    /t	          |t
                    r4|d         d	v r*t          j        ||           |                    d
           y6 	 ddd           n# 1 swxY w Y   ddd           dS # 1 swxY w Y   dS )zE
        Save the audio and metadata to the specified files.
        Nwr4   )encodingwbr   r   r!   r   
)openr   r   writer'   r   r   dump)r   r   r   metadatar   r   s         r/   savezCommunicate.saveW  s      ) w7777 	
  		) 		)tK.. 		)%!% ) ) ) ) ) ) )g6?g--KK0000-88 )WV_ Q > > Igx000NN4((( "/		) 		) 		) 		) 		) 		) 		) 		) 		) 		) 		) 		) 		) 		) 		) 		) 		) 		) 		) 		) 		) 		) 		) 		) 		) 		) 		) 		) 		) 		) 		) 		) 		)sA   C,CCA4CC,C	C,C	C,,C03C0c              #      K   dt           ddf fd}t                      }t          j                                        5 }|                    ||           	 |                                }|n|V  	 ddd           dS # 1 swxY w Y   dS )z-Synchronous interface for async stream methodqueuer#   Nc                      d fd}t          j                    }t          j        |           |                     |                       |                                 d S )Nr#   c                     K                                    2 3 d {V }                     |            6                     d            d S N)r   put)itemr   r   s    r/   	get_itemszECommunicate.stream_sync.<locals>.fetch_async_items.<locals>.get_itemss  s`      "&++-- $ $ $ $ $ $ $$IIdOOOO #0		$s   5r   )asyncionew_event_loopset_event_looprun_until_completeclose)r   r   loopr   s   `  r/   fetch_async_itemsz2Communicate.stream_sync.<locals>.fetch_async_itemsr  ss                 
 )++D"4(((##IIKK000JJLLLLLr1   )r   
concurrentfuturesThreadPoolExecutorsubmitr   )r   r   r   executorr   s   `    r/   stream_synczCommunicate.stream_synco  s      		U 		t 		 		 		 		 		 		 ww2244 	OO-u555yy{{<


	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s   4BBBc                    t           j                                        5 }|                    t          j        |                     ||                    }|                                 ddd           dS # 1 swxY w Y   dS )z,Synchronous interface for async save method.N)r   r   r  r  r   runr   result)r   r   r   r  futures        r/   	save_synczCommunicate.save_sync  s     2244 	__TYY{NCC F MMOOO		 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s   A	A55A9<A9r   r   )__name__
__module____qualname____doc__r   r=   r   r   r   r   r   r   r(   r   r   r   r   r   r   r   r   r	   r  r	  rK   r1   r/   r}   r}   A  s2         #9

 @R59#)+)+9
 9
 9
9
 9

 9
 9
 9
 <=9
 G129
 }9
 "#9
 "#9
 9
 9
 9
vCU Cx C C C C&, , , ,(Kx~ > K K K KZ"	$	'" " " "H 7;) )3:&) !sEz!23) 
	) ) ) )0Yxt';<    8 7;
 
3:&
 !sEz!23
 
	
 
 
 
 
 
r1   r}   )Ar  r   concurrent.futuresr   r   r   rs   rH   
contextlibr   ior   r   r   typingr   r   r   r	   r
   r   r   r   xml.sax.saxutilsr   r   r   certifityping_extensionsr   	constantsr   r   r   r   r   r   data_classesr   drmr   
exceptionsr   r   r   r   r   r   create_default_contextwherer   r(   r   r0   r=   rF   rL   rT   rY   ra   ri   rq   rv   r{   r}   rK   r1   r/   <module>r     s  I I       



   " " " " " "            	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 . - - - - - - -   % % % % % %                $ # # # # #                  / . . . . . . .%3%]W]__===.
. #.
4uu$%. . . .05e+<     8C    5      0e     8U c c    <B
U

B*-BudD !B B B BJy c5j(9 c    2     s s # #    "Q Q Q Q Q Q Q Q Q Qr1   