
    i$              	          U d Z ddlZddlZddlZddlZddlmZ ddlmZ  ej	        e
          Z eddh          Z e ej        d           ej        d           ej        d	           ej        d
           ej        d          h          Z ej        d          fZ edh          Z ej        d          Zdadaeed<   defdZddZdej        ej        z  defdZdededefdZdedefdZdS )uL  URL safety checks — blocks requests to private/internal network addresses.

Prevents SSRF (Server-Side Request Forgery) where a malicious prompt or
skill could trick the agent into fetching internal resources like cloud
metadata endpoints (169.254.169.254), localhost services, or private
network hosts.

The check can be globally disabled via ``security.allow_private_urls: true``
in config.yaml for environments where DNS resolves external domains to
private/benchmark-range IPs (OpenWrt routers, corporate proxies, VPNs
that use 198.18.0.0/15 or 100.64.0.0/10).  Even when disabled, cloud
metadata hostnames (metadata.google.internal, 169.254.169.254) are
**always** blocked — those are never legitimate agent targets.

Limitations (documented, not fixable at pre-flight level):
  - DNS rebinding (TOCTOU): an attacker-controlled DNS server with TTL=0
    can return a public IP for the check, then a private IP for the actual
    connection. Fixing this requires connection-level validation (e.g.
    Python's Champion library or an egress proxy like Stripe's Smokescreen).
  - Redirect-based bypass is mitigated by httpx event hooks that re-validate
    each redirect target in vision_tools, gateway platform adapters, and
    media cache helpers. Web tools use third-party SDKs (Firecrawl/Tavily)
    where redirect handling is on their servers.
    N)urlparse)is_truthy_valuezmetadata.google.internalzmetadata.googz169.254.169.254z169.254.170.2z169.254.169.253zfd00:ec2::254z100.100.100.200z169.254.0.0/16zmultimedia.nt.qq.com.cnz100.64.0.0/10F_cached_allow_privatereturnc                  z   t           rt          S da dat          j        dd                                                                          } | dv r	dat          S | dv rt          S 	 ddlm}  |            }|                    d	i           }t          |t                    r-t          |                    d
          d          r	dat          S |                    di           }t          |t                    r-t          |                    d
          d          r	dat          S n# t          $ r Y nw xY wt          S )ac  Return True when the user has opted out of private-IP blocking.

    Checks (in priority order):
    1. ``HERMES_ALLOW_PRIVATE_URLS`` env var  (``true``/``1``/``yes``)
    2. ``security.allow_private_urls`` in config.yaml
    3. ``browser.allow_private_urls`` in config.yaml  (legacy / backward compat)

    Result is cached for the process lifetime.
    TFHERMES_ALLOW_PRIVATE_URLS )true1yes)false0nor   )read_raw_configsecurityallow_private_urls)defaultbrowser)_allow_private_resolvedr   osgetenvstriplowerhermes_cli.configr   get
isinstancedictr   	Exception)env_valr   cfgsecr   s        5/home/ubuntu/.hermes/hermes-agent/tools/url_safety.py_global_allow_private_urlsr#   P   sw     %$$"! i3R88>>@@FFHHG&&& $$$&&&$$555555oggj"%%c4   	)_GG())5&
 &
 &
 	) %)!(('')R((gt$$ 	)KK,--u*
 *
 *
 	) %)!((    ! s   %A'D& AD& &
D32D3c                      da dadS )u+   Reset the cached toggle — only for tests.FN)r   r        r"   _reset_allow_private_cacher'      s     $!r&   ipc                 x    | j         s| j        s| j        s| j        rdS | j        s| j        rdS | t          v rdS dS )z<Return True if the IP should be blocked for SSRF protection.TF)
is_privateis_loopbackis_link_localis_reservedis_multicastis_unspecified_CGNAT_NETWORK)r(   s    r"   _is_blocked_ipr1      s[    	}  "*: bn t	 "+ t	^t5r&   hostnameschemec                      |dk    o| t           v S )zGReturn True when a trusted HTTPS hostname may bypass IP-class blocking.https)_TRUSTED_PRIVATE_IP_HOSTS)r2   r3   s     r"   _allows_private_ip_resolutionr7      s    WF-F!FFr&   urlc                    	 t          |           }|j        pd                                                                                    d          }|j        pd                                                                }|sdS |t          v rt                              d|           dS t                      }t          ||          }	 t          j        |dt          j        t          j                  }n1# t          j        $ r t                              d|           Y dS w xY w|D ]\  }}}}}	|	d         }
	 t!          j        |
          n# t$          $ r Y 2w xY wt&          v s t)          fdt*          D                       rt                              d	||
            dS |s0|s.t-                    rt                              d
||
            dS |rt                              d|           n|rt                              d|           dS # t0          $ r'}t                              d| |           Y d}~dS d}~ww xY w)u  Return True if the URL target is not a private/internal address.

    Resolves the hostname to an IP and checks against private ranges.
    Fails closed: DNS errors and unexpected exceptions block the request.

    When ``security.allow_private_urls`` is enabled (or the env var
    ``HERMES_ALLOW_PRIVATE_URLS=true``), private-IP blocking is skipped.
    Cloud metadata endpoints (169.254.169.254, metadata.google.internal)
    remain blocked regardless — they are never legitimate agent targets.
    r	   .Fz(Blocked request to internal hostname: %sNu1   Blocked request — DNS resolution failed for: %sr   c              3       K   | ]}|v V  	d S )Nr%   ).0netr(   s     r"   	<genexpr>zis_safe_url.<locals>.<genexpr>   s'      /^/^cc	/^/^/^/^/^/^r&   z3Blocked request to cloud metadata address: %s -> %sz5Blocked request to private/internal address: %s -> %szKAllowing private/internal resolution (security.allow_private_urls=true): %szAAllowing trusted hostname despite private/internal resolution: %sTu5   Blocked request — URL safety check error for %s: %s)r   r2   r   r   rstripr3   _BLOCKED_HOSTNAMESloggerwarningr#   r7   socketgetaddrinfo	AF_UNSPECSOCK_STREAMgaierror	ipaddress
ip_address
ValueError_ALWAYS_BLOCKED_IPSany_ALWAYS_BLOCKED_NETWORKSr1   debugr   )r8   parsedr2   r3   allow_all_privateallow_private_ip	addr_infofamily_sockaddrip_strexcr(   s               @r"   is_safe_urlrX      s   A#O)r002288::AA#FF-%2,,..4466 	5 )))NNExPPP5 78886JJ	*8T6;KVM_``II 	 	 	 NNNPXYYY55		 *3 	 	%FAq!Xa[F)&11    (((C/^/^/^/^E]/^/^/^,^,^(If   uu$ -= .QSBTBT Kf   uu 		LL]     	LLS  
 t    	NPSUXYYYuuuuu	sy   A>H $H )H +C4 3H 4*D"H !D""H 8EH 
EH EA	H %0H <H 
III)r   N) __doc__rH   loggingr   rC   urllib.parser   utilsr   	getLogger__name__rA   	frozensetr@   rI   rK   
ip_networkrM   r6   r0   r   r   bool__annotations__r#   r'   IPv4AddressIPv6Addressr1   strr7   rX   r%   r&   r"   <module>rf      s#    2      				  ! ! ! ! ! ! ! ! ! ! ! !		8	$	$
 Y      iI*++I))I*++I))I*++!    I)**  &I'    &%o66   # t # # #0!D 0! 0! 0! 0!f" " " "	y,y/DD 	 	 	 	 	GC G G G G G G
LS LT L L L L L Lr&   