B
    fd!                 @   s   d dl Z d dlZd dlZd dlmZ d dlmZ d dlZd dlmZ d dl	m
Z
 d dlmZ ddlmZ eeZG d	d
 d
eZG dd dZdS )    N)
HTMLParser)Optional)raw)MessageEntityType)PeerIdInvalid   )utilsc                   sL   e Zd ZedZdd fddZdd Zdd	 Zd
d Z	dd Z
  ZS )Parserztg://user\?id=(\d+)zpyrogram.Client)clientc                s&   t    || _d| _g | _i | _d S )N )super__init__r
   textentitiestag_entities)selfr
   )	__class__ 8/tmp/pip-unpacked-wheel-rcokkf2l/pyrogram/parser/html.pyr   %   s
    
zParser.__init__c             C   sp  t |}i }|dkr tjj}n|dkr2tjj}n|dkrDtjj}n|dkrVtjj}n|dkrhtjj}n|dkrztjj}n|dkrtjj	}|
dd	|d< n|d
krtjj}n|dkr|
dd	}tj|}|rtjj}t|d|d< ntjj}||d< n.|dkr,tjj}t|
d}||d< nd S || jkrFg | j|< | j| |f t| jdd| d S )N)bstrong)iemu)sdelstrike
blockquotecodeprelanguager   Zspoilerahrefr   user_idurlemojiidZdocument_idr   )offsetlength)dictr   typesZMessageEntityBoldZMessageEntityItalicZMessageEntityUnderlineZMessageEntityStrikeZMessageEntityBlockquoteZMessageEntityCodeZMessageEntityPregetZMessageEntitySpoilerr	   
MENTION_REmatchInputMessageEntityMentionNameintgroupZMessageEntityTextUrlZMessageEntityCustomEmojir   appendlenr   )r   tagattrsextraentityr$   Zmentioncustom_emoji_idr   r   r   handle_starttag.   sF    










zParser.handle_starttagc             C   sP   t |}x2| j D ]$}x|D ]}| jt|7  _q W qW |  j|7  _d S )N)htmlunescaper   valuesr(   r2   r   )r   datar   r6   r   r   r   handle_dataZ   s
    

zParser.handle_datac          	   C   sr   y| j | j|   W n< ttfk
rV   |  \}}|d7 }td||| Y nX | j| sn| j| d S )Nr   z)Unmatched closing tag </%s> at line %s:%s)	r   r1   r   popKeyError
IndexErrorgetposlogdebug)r   r3   liner'   r   r   r   handle_endtagc   s    
zParser.handle_endtagc             C   s   d S )Nr   )r   messager   r   r   erroro   s    zParser.error)__name__
__module____qualname__recompiler,   r   r8   r=   rE   rG   __classcell__r   r   )r   r   r	   "   s   
	,	r	   c               @   s@   e Zd Zed dddZedddZeeedd	d
Z	dS )HTMLzpyrogram.Client)r
   c             C   s
   || _ d S )N)r
   )r   r
   r   r   r   r   t   s    zHTML.__init__)r   c          	      s,  t dd|}t dd|}t| j}|t| |  |jrg }x2|j	 D ]$\}}|
d| dt| d qTW tdd| g }xb|jD ]X}t|tjjry$| jd k	r| j|jI d H |_W n tk
r   wY nX |
| qW ttd	d
 |}t|jt|dd
 dp&d dS )Nz^\s*(<[\w<>=\s\"]*>)\s*z\1z\s*(</[\w</>]*>)\s*$<z> (x)zUnclosed tags: %sz, c             S   s
   | j dkS )Nr   )r(   )xr   r   r   <lambda>       zHTML.parse.<locals>.<lambda>c             S   s   | j S )N)r'   )er   r   r   rR      rS   )key)rF   r   )rK   subr	   r
   feedr   add_surrogatescloser   itemsr1   r2   rB   infojoinr   
isinstancer   r*   r.   Zresolve_peerr#   r   listfilterremove_surrogatesr   sorted)r   r   parserZunclosed_tagsr3   r   r6   r   r   r   parsew   s,    
 

z
HTML.parse)r   r   c                s   dd t t d fddt| } g  jdd d d	}x|t k r`||7 }qFW rd
 d }xFtD ]:\}}| d | | t| ||  | |d   } |}q|W t| S )Nc             S   sd  | j }| j}|| j }|tjtjtjtjfkrV|jd 	 }d| d}d| d}n|tj
kr|j	 }t| ddpxd}|rd| d| dn
d| d}d| d}n|tjtjtjfkr|j	 }d| d}d| d}nr|tjkr| j}d	| d}d
}nN|tjkr,| j}	d|	j d}d
}n(|tjkrP| j}
d|
 d}d}ndS ||f||ffS )z_
            Parses a single entity and returns (start_tag, start), (end_tag, end)
            r   rO   >z</r    r   z language="z">z	<a href="z</a>z<a href="tg://user?id=z<emoji id="z</emoji>N)typer'   r(   r   ZBOLDZITALICZ	UNDERLINEZSTRIKETHROUGHnamelowerZPREgetattrZCODEZ
BLOCKQUOTEZSPOILERZ	TEXT_LINKr$   ZTEXT_MENTIONuserr&   ZCUSTOM_EMOJIr7   )r6   Zentity_typestartendrf   	start_tagend_tagr    r$   ri   r7   r   r   r   	parse_one   sJ    



"

zHTML.unparse.<locals>.parse_one)entity_ireturnc                s    |  }|dkrdS |\\}}\}} ||f | d }x*|t k rh | j|k rh||7 }q@W  ||f ||  S )aR  
            Takes the index of the entity to start parsing from, returns the number of parsed entities inside it.
            Uses entities_offsets as a stack, pushing (start_tag, start) first, then parsing nested entities,
            and finally pushing (end_tag, end) to the stack.
            No need to sort at the end.
            Nr   )r1   r2   r'   )ro   thisrl   rj   rm   rk   Z
internal_i)r   entities_offsetsrn   	recursiver   r   rs      s    zHTML.unparse.<locals>.recursivec             S   s   | j | j fS )N)r'   r(   )rT   r   r   r   rR      rS   zHTML.unparse.<locals>.<lambda>)rU   r   r   )	r/   r   rX   sortr2   reversedr9   escaper`   )r   r   r   Zlast_offsetr6   r'   r   )r   rr   rn   rs   r   unparse   s    /
.zHTML.unparseN)
rH   rI   rJ   r   r   strrc   staticmethodr^   rx   r   r   r   r   rN   s   s   %rN   )r9   loggingrK   html.parserr   typingr   Zpyrogramr   Zpyrogram.enumsr   Zpyrogram.errorsr   r   r   	getLoggerrH   rB   r	   rN   r   r   r   r   <module>   s   
Q