U
    1jgzE                     @   sR   d dl mZ d dlZddlmZmZ dgZdddZdddZG dd deZ	dS )    )OptionalN   )	OptimizerParamsTLBFGSc                 C   s   |d k	r|\}}n| |kr"| |fn|| f\}}|| d||  | |   }	|	d ||  }
|
dkr|
  }| |kr|||  || |	 || d|     }n(| | | || |	 || d|     }tt|||S || d S d S )N      r   g       @)sqrtminmax)x1f1Zg1Zx2f2Zg2boundsZ
xmin_boundZ
xmax_boundd1Z	d2_squareZd2Zmin_pos r   5/tmp/pip-unpacked-wheel-ttp2cnii/torch/optim/lbfgs.py_cubic_interpolate
   s    
	*(r   -C6??&.>   c           !   	   C   s   |   }|jtjd}| |||\}}d}||}d|||f\}}}}d}d}||
k r|||| |  ks|dkr||kr||g}||g}||jtjdg}||g}qt || | kr|g}|g}|g}d}q|dkr||g}||g}||jtjdg}||g}q|d||   }|d }|}t||||||||fd}|}|}|jtjd}|}| |||\}}|d7 }||}|d7 }qT||
krd|g}||g}||g}d}|d |d	 krd
nd\}}|s||
k rt |d |d  | |	k rqt|d |d |d |d |d |d }dt|t|  } tt|| |t| | k r|s|t|ks|t|krt |t| t |t| k rt||  }nt||  }d}nd}nd}| |||\}}|d7 }||}|d7 }|||| |  ks ||| krj|||< |||< |jtjd||< |||< |d |d kr`d
nd\}}nt || | krd}nJ||| ||   dkr|| ||< || ||< || ||< || ||< |||< |||< |jtjd||< |||< q|| }|| }|| }||||fS )NZmemory_formatr   r   FTg{Gz?
   )r   )r   r   )r   r   g?)absr   clonetorchcontiguous_formatdotr   r
   )!obj_funcxtdfggtdZc1c2tolerance_changeZmax_lsZd_normZf_newZg_newls_func_evalsZgtd_newZt_prevZf_prevZg_prevZgtd_prevdoneZls_iterZbracketZ	bracket_fZ	bracket_gZbracket_gtdZmin_stepZmax_steptmpZinsuf_progressZlow_posZhigh_posZepsr   r   r   _strong_wolfe&   s    

$
      


 ""
$ r,   c                
       s~   e Zd ZdZdeeeee eeeee d fd	d
Z	dd Z
dd Zdd Zdd Zdd Zdd Ze dd Z  ZS )r   a\  Implements L-BFGS algorithm.

    Heavily inspired by `minFunc
    <https://www.cs.ubc.ca/~schmidtm/Software/minFunc.html>`_.

    .. warning::
        This optimizer doesn't support per-parameter options and parameter
        groups (there can be only one).

    .. warning::
        Right now all parameters have to be on a single device. This will be
        improved in the future.

    .. note::
        This is a very memory intensive optimizer (it requires additional
        ``param_bytes * (history_size + 1)`` bytes). If it doesn't fit in memory
        try reducing the history size, or use a different algorithm.

    Args:
        params (iterable): iterable of parameters to optimize. Parameters must be real.
        lr (float): learning rate (default: 1)
        max_iter (int): maximal number of iterations per optimization step
            (default: 20)
        max_eval (int): maximal number of function evaluations per optimization
            step (default: max_iter * 1.25).
        tolerance_grad (float): termination tolerance on first order optimality
            (default: 1e-7).
        tolerance_change (float): termination tolerance on function
            value/parameter changes (default: 1e-9).
        history_size (int): update history size (default: 100).
        line_search_fn (str): either 'strong_wolfe' or None (default: None).
    r      NHz>r   d   )paramslrmax_itermax_evaltolerance_gradr(   history_sizeline_search_fnc	           
   	      sh   |d kr|d d }t |||||||d}	t ||	 t| jdkrNtd| jd d | _d | _d S )N      )r1   r2   r3   r4   r(   r5   r6   r   z>LBFGS doesn't support per-parameter options (parameter groups)r   r0   )dictsuper__init__lenparam_groups
ValueError_params_numel_cache)
selfr0   r1   r2   r3   r4   r(   r5   r6   defaults	__class__r   r   r;      s$    	zLBFGS.__init__c                 C   s&   | j d kr tdd | jD | _ | j S )Nc                 s   s,   | ]$}t |rd |  n| V  qdS )r   N)r   
is_complexnumel.0pr   r   r   	<genexpr>   s   zLBFGS._numel.<locals>.<genexpr>)r@   sumr?   rA   r   r   r   _numel   s
    

zLBFGS._numelc                 C   s   g }| j D ]l}|jd kr,||  }n&|jjrF|j d}n|jd}t	|rlt
|d}|| q
t|dS )Nr   r   )r?   ZgradnewrF   Zzero_Z	is_sparseZto_denseviewr   rE   view_as_realappendcat)rA   ZviewsrI   rO   r   r   r   _gather_flat_grad   s    


zLBFGS._gather_flat_gradc                 C   sh   d}| j D ]H}t|r"t|}| }|j||||  ||d ||7 }q
||  ksdtd S )Nr   alpha)	r?   r   rE   rP   rF   add_Zview_asrM   AssertionError)rA   Z	step_sizeupdateoffsetrI   rF   r   r   r   	_add_grad  s    


 
zLBFGS._add_gradc                 C   s   dd | j D S )Nc                 S   s   g | ]}|j tjd qS )r   )r   r   r   rG   r   r   r   
<listcomp>  s     z&LBFGS._clone_param.<locals>.<listcomp>)r?   rL   r   r   r   _clone_param  s    zLBFGS._clone_paramc                 C   s$   t | j|D ]\}}|| qd S N)zipr?   copy_)rA   Zparams_datarI   Zpdatar   r   r   
_set_param  s    zLBFGS._set_paramc                 C   s0   |  || t| }|  }| | ||fS r]   )rZ   floatrS   r`   )rA   closurer!   r"   r#   loss	flat_gradr   r   r   _directional_evaluate  s
    

zLBFGS._directional_evaluatec           &   	      s  t jdkstt   jd }|d }|d }|d }|d }|d }|d }|d	 }	jjd  }
|
d
d |
dd   }t|}d}|
d
  d7  < 	 }|
  |k}|r|S |
d}|
d}|
d}|
d}|
d}|
d}|
d}|
d}d}||k r^|d7 }|
d  d7  < |
d dkrj| }g }g }g }d}nN||}||}||}|dkrt ||	kr|d |d |d || || |d|  ||| }t |}d|
krdg|	 |
d< |
d }| }t|d ddD ]8}|| |||  ||< |j|| ||  d q.t|| }} t|D ]6}|| | ||  }!| j|| || |! d q|dkr|jtjd}n
|| |}|
d dkr
tdd|
   | }n|}||}"|"| kr(q^d}#|dk	r|dkrJtdn2 }$ fdd}%t|%|$|||||"\}}}}#|| |
  |k}nP|| ||krt  t  }W 5 Q R X 	 }|
  |k}d}#||#7 }|
d
  |#7  < ||krq^||krq^|r(q^||
  |krDq^t
|| |k rq^q||
d< ||
d< ||
d< ||
d< ||
d< ||
d< ||
d< ||
d< |S )zPerform a single optimization step.

        Args:
            closure (Callable): A closure that reevaluates the model
                and returns the loss.
        r   r   r1   r2   r3   r4   r(   r6   r5   Z
func_evalsn_iterr#   r"   old_dirsold_stpsroH_diagprev_flat_grad	prev_lossg|=g      ?alNr   rT   r   Zstrong_wolfez only 'strong_wolfe' is supportedc                    s     | ||S r]   )re   )r!   r"   r#   rb   rA   r   r   r      s    zLBFGS.step.<locals>.obj_func)r<   r=   rW   r   Zenable_gradstater?   
setdefaultra   rS   r   r   getnegsubmulr   poprQ   rangerV   r   r   r_   r
   rK   RuntimeErrorr\   r,   rZ   )&rA   rb   groupr1   r2   r3   r4   r(   r6   r5   ro   Z	orig_lossrc   Zcurrent_evalsrd   Zopt_condr#   r"   rg   rh   ri   rj   rk   rl   rf   ysZysZnum_oldrm   qirZbe_ir&   r)   Zx_initr    r   rn   r   step&  s    

























      



z
LBFGS.step)r   r-   Nr.   r   r/   N)__name__
__module____qualname____doc__r   ra   intr   strr;   rM   rS   rZ   r\   r`   re   r   Zno_gradr~   __classcell__r   r   rC   r   r      s4   $        	)N)r   r   r   r   )
typingr   r   Z	optimizerr   r   __all__r   r,   r   r   r   r   r   <module>   s   
       
 