
    i+!                        d Z ddlmZ ddlZddlZddlZddlmZ ddlm	Z	m
Z
mZmZmZ  ej        e          Z eh d          Zd!dZd"dZd#dZd$dZd%dZd&dZd'dZdd gZdS )(u  Routing helpers for inbound user-attached images.

Two modes:

  native  — attach images as OpenAI-style ``image_url`` content parts on the
            user turn. Provider adapters (Anthropic, Gemini, Bedrock, Codex,
            OpenAI chat.completions) already translate these into their
            vendor-specific multimodal formats.

  text    — run ``vision_analyze`` on each image up-front and prepend the
            description to the user's text. The model never sees the pixels;
            it only sees a lossy text summary. This is the pre-existing
            behaviour and still the right choice for non-vision models.

The decision is made once per message turn by :func:`decide_image_input_mode`.
It reads ``agent.image_input_mode`` from config.yaml (``auto`` | ``native``
| ``text``, default ``auto``) and the active model's capability metadata.

In ``auto`` mode:
  - If the user has explicitly configured ``auxiliary.vision.provider``
    (i.e. not ``auto`` and not empty), we assume they want the text pipeline
    regardless of the main model — they've opted in to a specific vision
    backend for a reason (cost, quality, local-only, etc.).
  - Otherwise, if the active model reports ``supports_vision=True`` in its
    models.dev metadata, we attach natively.
  - Otherwise (non-vision model, no explicit override), we fall back to text.

This keeps ``vision_analyze`` surfaced as a tool in every session — skills
and agent flows that chain it (browser screenshots, deeper inspection of
URL-referenced images, style-gating loops) keep working. The routing only
affects *how user-attached images on the current turn* are presented to the
main model.
    )annotationsN)Path)AnyDictListOptionalTuple>   autotextnativerawr   returnstrc                    t          | t                    sdS |                                                                 }|t          v r|S dS )z5Normalize a config value into one of the valid modes.r
   )
isinstancer   striplower_VALID_MODES)r   vals     8/home/ubuntu/.hermes/hermes-agent/agent/image_routing.py_coerce_moder   1   sG    c3 v
))++



C
l
6    cfgOptional[Dict[str, Any]]boolc                h   t          | t                    sdS |                     d          pi }t          |t                    sdS |                    d          pi }t          |t                    sdS t          |                    d          pd                                                                          }t          |                    d          pd                                          }t          |                    d          pd                                          }|dv r|s|sdS d	S )
zTrue when the user configured a specific auxiliary vision backend.

    An explicit override means the user *wants* the text pipeline (they're
    paying for a dedicated vision model), so we don't silently bypass it.
    F	auxiliaryvisionprovider modelbase_url)r    r
   T)r   dictgetr   r   r   )r   auxr   r   r!   r"   s         r   _explicit_aux_vision_overrider&   ;   s     c4   u
''+


$"Cc4   uWWX$"Ffd## u6::j))/R006688>>@@H

7##)r**0022E6::j))/R006688H <hu4r   r   r!   Optional[bool]c                    | r|sdS 	 ddl m}  || |          }n5# t          $ r(}t                              d| ||           Y d}~dS d}~ww xY w|dS t          |j                  S )z:Return True/False if we can resolve caps, None if unknown.Nr   )get_model_capabilitiesu2   image_routing: caps lookup failed for %s:%s — %s)agent.models_devr)   	Exceptionloggerdebugr   supports_vision)r   r!   r)   capsexcs        r   _lookup_supports_visionr1   T   s     5 t;;;;;;%%h66   I8UZ\_```ttttt |t$%%%s    
AAAc                >   d}t          |t                    rN|                    d          pi }t          |t                    r"t          |                    d                    }|dk    rdS |dk    rdS t	          |          rdS t          | |          }|du rdS dS )a1  Return ``"native"`` or ``"text"`` for the given turn.

    Args:
      provider: active inference provider ID (e.g. ``"anthropic"``, ``"openrouter"``).
      model:    active model slug as it would be sent to the provider.
      cfg:      loaded config.yaml dict, or None. When None, behaves as auto.
    r
   agentimage_input_moder   r   T)r   r#   r$   r   r&   r1   )r   r!   r   mode_cfg	agent_cfgsupportss         r   decide_image_input_moder8   c   s     H#t GGGG$$*	i&& 	G#IMM2D$E$EFFH8x6v %S)) v&x77H4x6r   pathr   c                    t          j        t          |                     \  }}|r|                    d          r|S | j                                        }ddddddd                    |d          S )Nzimage/z
image/jpegz	image/pngz	image/gifz
image/webpz	image/bmp)z.jpgz.jpegz.pngz.gifz.webpz.bmp)	mimetypes
guess_typer   
startswithsuffixr   r$   )r9   mime_r>   s       r   _guess_mimerA      s    "3t99--GD! ))  [  F  
c&, r   Optional[str]c                   	 |                                  }n4# t          $ r'}t                              d| |           Y d}~dS d}~ww xY wt	          |           }t          j        |                              d          }d| d| S )u!  Encode a local image as a base64 data URL at its native size.

    Size limits are NOT enforced here — the agent retry loop
    (``run_agent._try_shrink_image_parts_in_messages``) shrinks on the
    provider's first rejection. Keeping this simple means providers that
    accept large images (OpenAI 49 MB+, Gemini 100 MB) don't pay a silent
    quality tax just because one other provider is stricter.

    Returns None only if the file can't be read (missing, permission
    denied, etc.); the caller reports those paths in ``skipped``.
    u'   image_routing: failed to read %s — %sNasciizdata:z;base64,)
read_bytesr+   r,   warningrA   base64	b64encodedecode)r9   r   r0   r?   b64s        r   _file_to_data_urlrK      s    oo   @$LLLttttt tD

3


&
&w
/
/C&4&&&&&s    
AAA	user_textimage_paths	List[str]&Tuple[List[Dict[str, Any]], List[str]]c                4   g }g }| pd                                 }|r|                    d|d           |D ]}t          |          }|                                r|                                s#|                    t          |                     \t          |          }|s#|                    t          |                     |                    dd|id           |s2t          d |D                       r|                    ddd	d           ||fS )
uV  Build an OpenAI-style ``content`` list for a user turn.

    Shape:
      [{"type": "text", "text": "..."},
       {"type": "image_url", "image_url": {"url": "data:image/png;base64,..."}},
       ...]

    Images are attached at their native size. If a provider rejects the
    request because an image is too large (e.g. Anthropic's 5 MB per-image
    ceiling), the agent's retry loop transparently shrinks and retries
    once — see ``run_agent._try_shrink_image_parts_in_messages``.

    Returns (content_parts, skipped_paths). Skipped paths are files that
    couldn't be read from disk.
    r    r   )typer   	image_urlurl)rQ   rR   c              3  H   K   | ]}|                     d           dk    V  dS )rQ   rR   N)r$   ).0ps     r   	<genexpr>z-build_native_content_parts.<locals>.<genexpr>   s1      DDf4DDDDDDr   r   zWhat do you see in this image?)	r   appendr   existsis_filer   rK   anyinsert)rL   rM   partsskippedr   raw_pathrV   data_urls           r   build_native_content_partsra      sJ   & #%EGO""$$D 5fd33444  NNxxzz 	 	NN3x==)))$Q'' 	NN3x==)))*
 
 	 	 	 	  TCDDeDDDDD TQ1QRRSSS'>r   r8   ra   )r   r   r   r   )r   r   r   r   )r   r   r!   r   r   r'   )r   r   r!   r   r   r   r   r   )r9   r   r   r   )r9   r   r   rB   )rL   r   rM   rN   r   rO   )__doc__
__future__r   rG   loggingr;   pathlibr   typingr   r   r   r   r	   	getLogger__name__r,   	frozensetr   r   r&   r1   r8   rA   rK   ra   __all__ r   r   <module>rl      sI     D # " " " " "             3 3 3 3 3 3 3 3 3 3 3 3 3 3		8	$	$ y33344      2& & & &   `       "' ' ' ',, , , ,`  r   