
    i:&                    b   U d Z ddlmZ ddlZddlZddlZddlZddlmZ ddl	m
Z
mZmZmZmZ ddlmZ ddlmZ  ej        e          Zdg g d	Zd
Z ej                    Zdaded<   daded<   daded<   d.dZ G d de          Z d/dZ!d0dZ"d1dZ#d2d3d#Z$d2d3d$Z%d4d&Z&d5d)Z'd6d+Z(d2d7d-Z)dS )8a  Website access policy helpers for URL-capable tools.

This module loads a user-managed website blocklist from ~/.hermes/config.yaml
and optional shared list files. It is intentionally lightweight so web/browser
tools can enforce URL policy without pulling in the heavier CLI config stack.

Policy is cached in memory with a short TTL so config changes take effect
quickly without re-reading the file on every URL check.
    )annotationsN)Path)AnyDictListOptionalTuple)urlparseget_hermes_homeF)enableddomainsshared_filesg      >@zOptional[Dict[str, Any]]_cached_policyOptional[str]_cached_policy_pathg        float_cached_policy_timereturnr   c                 $    t                      dz  S )Nzconfig.yamlr        9/home/ubuntu/.hermes/hermes-agent/tools/website_policy.py_get_default_config_pathr   (   s    },,r   c                      e Zd ZdZdS )WebsitePolicyErrorz/Raised when a website policy file is malformed.N)__name__
__module____qualname____doc__r   r   r   r   r   ,   s        9999r   r   hoststrc                x    | pd                                                                                     d          S )N .)striplowerrstrip)r!   s    r   _normalize_hostr)   0   s2    JB%%''..s333r   ruler   c                   t          | t                    sd S |                                                                 }|r|                    d          rd S d|v rt          |          }|j        p|j        }|                    dd          d                                         	                    d          }|                    d          r
|dd          }|pd S )	N#:///   r   r%   zwww.   )

isinstancer"   r&   r'   
startswithr
   netlocpathsplitr(   )r*   valueparseds      r   _normalize_ruler8   4   s    dC   tJJLL  E E$$S)) t~~%,KKQ"((**11#66E abb	=Dr   r4   	List[str]c                   	 |                      d          }nd# t          $ r  t                              d|            g cY S t          t
          f$ r(}t                              d| |           g cY d}~S d}~ww xY wg }|                                D ]T}|                                }|r|                    d          r.t          |          }|r|
                    |           U|S )u   Load rules from a shared blocklist file.

    Missing or unreadable files log a warning and return an empty list
    rather than raising — a bad file path should not disable all web tools.
    utf-8encodingz.Shared blocklist file not found (skipping): %sz6Failed to read shared blocklist file %s (skipping): %sNr,   )	read_textFileNotFoundErrorloggerwarningOSErrorUnicodeDecodeError
splitlinesr&   r2   r8   append)r4   rawexcruleslinestripped
normalizeds          r   _iter_blocklist_file_rulesrL   C   s   nngn..   GNNN			'(   OQUWZ[[[						 E   % %::<< 	8..s33 	$X..
 	%LL$$$Ls!    'A:A:A5/A:5A:config_pathOptional[Path]Dict[str, Any]c                   | pt                      } |                                 st          t                    S 	 dd l}n># t
          $ r1 t                              d           t          t                    cY S w xY w	 t          | d          5 }|	                    |          pi }d d d            n# 1 swxY w Y   nK# |j
        $ r}t          d|  d|           |d }~wt          $ r}t          d|  d|           |d }~ww xY wt          |t                    st          d          |                    d	i           }|i }t          |t                    st          d
          |                    di           }|i }t          |t                    st          d          t          t                    }|                    |           |S )Nr   u3   PyYAML not installed — website blocklist disabledr;   r<   zInvalid config YAML at z: zFailed to read config file zconfig root must be a mappingsecurityzsecurity must be a mappingwebsite_blocklistz,security.website_blocklist must be a mapping)r   existsdict_DEFAULT_WEBSITE_BLOCKLISTyamlImportErrorr@   debugopen	safe_load	YAMLErrorr   rB   r1   getupdate)rM   rV   fconfigrG   rQ   rR   policys           r   _load_policy_configra   ]   sX   ;!9!;!;K 0.///0 0 0 0JKKK./////0^+000 	-A^^A&&,"F	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	-> Z Z Z !O;!O!O#!O!OPPVYY ^ ^ ^ !S{!S!Sc!S!STTZ]]^fd## B !@AAAzz*b))Hh%% ? !=>>> %8"== '.. Q !OPPP,--F
MM#$$$MsW   ? 8A:9A:>B? B3'B? 3B77B? :B7;B? ?
D	CD,DDc                   | rt          |           nd}t          j                    }| Wt          5  t          1t
          |k    r&|t          z
  t          k     rt          cddd           S ddd           n# 1 swxY w Y   | pt                      } t          |           }|
                    dg           pg }t          |t                    st          d          |
                    dg           pg }t          |t                    st          d          |
                    dd          }t          |t                    st          d	          g }t                      }|D ]H}	t!          |	          }
|
r5d
|
f|vr/|                    |
d
d           |                    d
|
f           I|D ]}t          |t                     r|                                s,t)          |                                          }|                                s#t/                      |z                                  }t3          |          D ]R}
t          |          |
f}||v r|                    |
t          |          d           |                    |           S||d}| t                      k    r%t          5  |ada|addd           n# 1 swxY w Y   |S )zLoad and return the parsed website blocklist policy.

    Results are cached for ``_CACHE_TTL_SECONDS`` to avoid re-reading
    config.yaml on every URL check.  Pass an explicit ``config_path``
    to bypass the cache (used by tests).
    __default__Nr   z1security.website_blocklist.domains must be a listr   z6security.website_blocklist.shared_files must be a listr   Tz4security.website_blocklist.enabled must be a booleanr_   )patternsource)r   rH   )r"   time	monotonic_cache_lockr   r   r   _CACHE_TTL_SECONDSr   ra   r\   r1   listr   boolsetr8   rE   addr&   r   
expanduseris_absoluter   resolverL   )rM   resolved_pathnowr`   raw_domainsraw_shared_filesr   rH   seenraw_rulerK   shared_filer4   keyresults                  r   load_website_blocklistrz      s    )4FC$$$M
.

C  	& 	&*'=88..2DDD%	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& ;!9!;!;K --F**Y++1rKk4(( V !TUUUzz."55;&-- [ !YZZZjjD))Ggt$$ Y !WXXX"$E!$D - -$X..
 	-8Z0<<LLZ8DDEEEHHh
+,,,'  +s++ 	;3D3D3F3F 	K  ++--!! 	8#%%,5577D4T:: 	 	Jt99j)Cd{{LLZ3t99EEFFFHHSMMMM	 !511F .0000 	& 	&#N"/"%	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	&
 Ms#   ,A55A9<A93KK
K
Nonec                 J    t           5  daddd           dS # 1 swxY w Y   dS )z?Force the next ``check_website_access`` call to re-read config.N)rh   r   r   r   r   invalidate_cacher}      ss     
                   s   rd   rk   c                    | r|sdS |                     d          rt          j        | |          S | |k    p|                     d|           S )NFz*.r%   )r2   fnmatchendswith)r!   rd   s     r   _match_host_against_ruler      s_     w u$ .tW---7?:dmmMMM:::r   urlc                    t          |           }t          |j        p|j                  }|r|S d| vr1t          d|            }t          |j        p|j                  }|r|S dS )Nr-   z//r$   )r
   r)   hostnamer3   )r   r7   r!   
schemelesss       r   _extract_host_from_urlishr      sx    c]]F6?;fm<<D Cj3jj))
z2Gj6GHH 	K2r   Optional[Dict[str, str]]c                :   |Nt           5  t          (t                              d          s	 ddd           dS ddd           n# 1 swxY w Y   t          |           }|sdS 	 t	          |          }nd# t
          $ r)}| t                              d|           Y d}~dS d}~wt          $ r&}t                              d|           Y d}~dS d}~ww xY w|                    d          sdS |                    dg           D ]}|                    dd          }t          ||          rlt          
                    d| ||                    d	d
                     | |||                    d	d
          d| d| d|                    d	d
           dc S dS )u  Check whether a URL is allowed by the website blocklist policy.

    Returns ``None`` if access is allowed, or a dict with block metadata
    (``host``, ``rule``, ``source``, ``message``) if blocked.

    Never raises on policy errors — logs a warning and returns ``None``
    (fail-open) so a config typo doesn't break all web tools.  Pass
    ``config_path`` explicitly (tests) to get strict error propagation.
    Nr   z.Website policy config error (failing open): %sz:Unexpected error loading website policy (failing open): %srH   rd   r$   u,   Blocked URL %s — matched rule '%s' from %sre   r_   zBlocked by website policy: 'z' matched rule 'z' from )r   r!   r*   re   message)rh   r   r\   r   rz   r   r@   rA   	Exceptionr   info)r   rM   r!   r`   rG   r*   rd   s          r   check_website_accessr      sc     	 	).2D2DY2O2O)	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 %S))D t	'44   "GMMMttttt   SUXYYYttttt ::i   t

7B''  ((9b))#D'22 	KKFWdhhx&B&BD D D ((8X66<4 < < < <!XXh99< <	 	 	 	 		 4s5   #AA
A
%A5 5
C?B##C0CC)r   r   )r!   r"   r   r"   )r*   r   r   r   )r4   r   r   r9   )N)rM   rN   r   rO   )r   r{   )r!   r"   rd   r"   r   rk   )r   r"   r   r"   )r   r"   rM   rN   r   r   )*r    
__future__r   r   logging	threadingrf   pathlibr   typingr   r   r   r   r	   urllib.parser
   hermes_constantsr   	getLoggerr   r@   rU   ri   Lockrh   r   __annotations__r   r   r   r   r   r)   r8   rL   ra   rz   r}   r   r   r   r   r   r   <module>r      s7     # " " " " "              3 3 3 3 3 3 3 3 3 3 3 3 3 3 ! ! ! ! ! ! , , , , , ,		8	$	$     in+/ / / / /%)  ) ) ) )          - - - -: : : : : : : :4 4 4 4      4# # # # #LD D D D DN   ; ; ; ;   2 2 2 2 2 2 2r   