
    Kxh :                         d Z ddlZddlZddlZddlmZmZmZ ddlm	Z	 ddl
mZ ddlmZmZmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ  G d d          ZdS )a  
Module containing the UniversalDetector detector class, which is the primary
class a user of ``chardet`` should use.

:author: Mark Pilgrim (initial port to Python)
:author: Shy Shalom (original C code)
:author: Dan Blanchard (major refactoring for 3.0)
:author: Ian Cordasco
    N)ListOptionalUnion   )CharSetGroupProber)CharSetProber)
InputStateLanguageFilterProbingState)EscCharSetProber)Latin1Prober)MacRomanProber)MBCSGroupProber)
ResultDict)SBCSGroupProber)UTF1632Proberc            	       X   e Zd ZdZdZ ej        d          Z ej        d          Z ej        d          Z	dddd	d
ddddZ
ddddddddZej        dfdededdfdZedefd            Zedefd            Zedee         fd            Zd!dZdeeef         ddfdZdefd ZdS )"UniversalDetectoraq  
    The ``UniversalDetector`` class underlies the ``chardet.detect`` function
    and coordinates all of the different charset probers.

    To get a ``dict`` containing an encoding and its confidence, you can simply
    run:

    .. code::

            u = UniversalDetector()
            u.feed(some_bytes)
            u.close()
            detected = u.result

    g?s   [-]s   (|~{)s   [-]zWindows-1252zWindows-1250zWindows-1251zWindows-1256zWindows-1253zWindows-1255zWindows-1254zWindows-1257)
iso-8859-1z
iso-8859-2z
iso-8859-5z
iso-8859-6z
iso-8859-7z
iso-8859-8
iso-8859-9ziso-8859-13zISO-8859-11GB18030CP949UTF-16)asciir   ztis-620r   gb2312zeuc-krzutf-16leFlang_filtershould_rename_legacyreturnNc                     d | _         d | _        g | _        d dd d| _        d| _        d| _        t          j        | _        d| _	        || _
        t          j        t                    | _        d| _        || _        |                                  d S )N        encoding
confidencelanguageF    )_esc_charset_prober_utf1632_prober_charset_probersresultdone	_got_datar	   
PURE_ASCII_input_state
_last_charr   logging	getLogger__name__logger_has_win_bytesr   reset)selfr   r   s      X/var/www/html/what/lib/python3.11/site-packages/pip/_vendor/chardet/universaldetector.py__init__zUniversalDetector.__init__d   s    
 @D 8<57#
 #

 	&1&'11#$8!

r%   c                     | j         S N)r-   r5   s    r6   input_statezUniversalDetector.input_state{   s      r%   c                     | j         S r9   )r3   r:   s    r6   has_win_byteszUniversalDetector.has_win_bytes   s    ""r%   c                     | j         S r9   )r(   r:   s    r6   charset_probersz!UniversalDetector.charset_probers   s    $$r%   c                 2   dddd| _         d| _        d| _        d| _        t          j        | _        d| _        | j        r| j        	                                 | j
        r| j
        	                                 | j        D ]}|	                                 dS )z
        Reset the UniversalDetector and all of its probers back to their
        initial states.  This is called by ``__init__``, so you only need to
        call this directly in between analyses of different documents.
        Nr    r!   Fr%   )r)   r*   r+   r3   r	   r,   r-   r.   r&   r4   r'   r(   )r5   probers     r6   r4   zUniversalDetector.reset   s     $(sMM	#&1# 	-$**,,, 	) &&(((+ 	 	FLLNNNN	 	r%   byte_strc                 p   | j         rdS |sdS t          |t                    st          |          }| j        s|                    t
          j                  rdddd| _        n|                    t
          j        t
          j	        f          rdddd| _        nx|                    d          rdddd| _        nW|                    d	          rd
ddd| _        n6|                    t
          j
        t
          j        f          rdddd| _        d| _        | j        d         	d| _         dS | j        t          j        k    rt| j                            |          rt          j        | _        nH| j        t          j        k    r3| j                            | j        |z             rt          j        | _        |dd         | _        | j        st-                      | _        | j        j        t0          j        k    r]| j                            |          t0          j        k    r5| j        j        | j                                        dd| _        d| _         dS | j        t          j        k    r| j        st?          | j                   | _        | j                            |          t0          j        k    r?| j        j        | j                                        | j        j!        d| _        d| _         dS dS | j        t          j        k    r'| j"        stG          | j                   g| _"        | j         tH          j%        z  r&| j"        &                    tO                                 | j"        &                    tQ                                 | j"        &                    tS                                 | j"        D ]U}|                    |          t0          j        k    r0|j        |                                |j!        d| _        d| _          nV| j*                            |          rd| _+        dS dS dS )a  
        Takes a chunk of a document and feeds it through all of the relevant
        charset probers.

        After calling ``feed``, you can check the value of the ``done``
        attribute to see if you need to continue feeding the
        ``UniversalDetector`` more data, or if it has made a prediction
        (in the ``result`` attribute).

        .. note::
           You should always call ``close`` when you're done feeding in your
           document if ``done`` is not already ``True``.
        Nz	UTF-8-SIG      ? r!   zUTF-32s     zX-ISO-10646-UCS-4-3412s     zX-ISO-10646-UCS-4-2143r   Tr"   ),r*   
isinstance	bytearrayr+   
startswithcodecsBOM_UTF8r)   BOM_UTF32_LEBOM_UTF32_BEBOM_LEBOM_BEr-   r	   r,   HIGH_BYTE_DETECTORsearch	HIGH_BYTEESC_DETECTORr.   	ESC_ASCIIr'   r   stater   	DETECTINGfeedFOUND_ITcharset_nameget_confidencer&   r   r   r$   r(   r   r
   NON_CJKappendr   r   r   WIN_BYTE_DETECTORr3   )r5   rB   rA   s      r6   rW   zUniversalDetector.feed   s]    9 	F 	F(I.. 	+ **H ~ %	""6?33 X !,"% " 
 $$f&96;N%OPP X ,43TVWW$$%899 X !9"% "	  $$%899 X !9"% "	  $$fmV]%CDD X ,43TVWW!DN{:&2 	 
 555&--h77 9$.$8!!!Z%:::%,,T_x-GHH ; %/$8!"233- # 	3#0??D %)???#((22l6KKK $ 4 A"&"6"E"E"G"G " 
 !	 
 444+ N+;D<L+M+M(',,X66,:OOO $ 8 E"&":"I"I"K"K $ 8 A 
 !			 PO *"666( ?)89I)J)J(K%#n&<< D)001B1BCCC%,,\^^<<<%,,^-=-=>>>/  ;;x((L,AAA$*$7&,&;&;&=&=$*O# #DK
 !%DIE B %,,X66 +&*###% 76"+ +r%   c           	         | j         r| j        S d| _         | j        s| j                            d           n%| j        t          j        k    rdddd| _        n| j        t          j        k    rd}d}d}| j	        D ]#}|s|
                                }||k    r|}|}$|r|| j        k    r|j        }|J |                                }|
                                }|                    d	          r"| j        r| j                            ||          }| j        r/| j                            |pd                                |          }|||j        d| _        | j                                        t,          j        k    r| j        d
         | j                            d           | j	        D ]}|st1          |t2                    rD|j        D ];}| j                            d|j        |j        |
                                           <^| j                            d|j        |j        |
                                           | j        S )z
        Stop analyzing the current document and come up with a final
        prediction.

        :returns:  The ``result`` attribute, a ``dict`` with the keys
                   `encoding`, `confidence`, and `language`.
        Tzno data received!r   rD   rE   r!   Nr    ziso-8859r"   z no probers hit minimum thresholdz%s %s confidence = %s)r*   r)   r+   r2   debugr-   r	   r,   rR   r(   rZ   MINIMUM_THRESHOLDrY   lowerrI   r3   ISO_WIN_MAPgetr   
LEGACY_MAPr$   getEffectiveLevelr/   DEBUGrG   r   probers)	r5   prober_confidencemax_prober_confidence
max_proberrA   rY   lower_charset_namer#   group_probers	            r6   closezUniversalDetector.close  s    9 	;	~ (	K12222 *"777'.crRRDKK *"666 $$'!J/ ( ( $*$9$9$;$;!$'<<<,=)!'J 4t7MMM)6#///%1%7%7%9%9"'6688
 &00<< * '+'7';';.( ( , #'?#6#6%+2244l$ $L !-", * 3  ;((**gm;;{:&.!!"DEEE$($9  L' ! !,0BCC &2&:  F K-- 7 & 3 & & 5 5 7 7	    ))3(5(1(7799	    {r%   )r   N)r1   
__module____qualname____doc__r`   recompilerP   rS   r]   rb   rd   r
   ALLboolr7   propertyintr;   r=   r   r   r?   r4   r   bytesrH   rW   r   rm    r%   r6   r   r   8   s          #N332:l++L"
>22$$$$$$$%	 	K  $ $ J '5&8%* # # 
	   . !S ! ! ! X! #t # # # X# %m!4 % % % X%   &A+U5)#34 A+ A+ A+ A+ A+FMz M M M M M Mr%   r   )rp   rJ   r/   rq   typingr   r   r   charsetgroupproberr   charsetproberr   enumsr	   r
   r   	escproberr   latin1proberr   macromanproberr   mbcsgroupproberr   
resultdictr   sbcsgroupproberr   utf1632proberr   r   rx   r%   r6   <module>r      sF  8    				 ( ( ( ( ( ( ( ( ( ( 2 2 2 2 2 2 ( ( ( ( ( ( ; ; ; ; ; ; ; ; ; ; ' ' ' ' ' ' & & & & & & * * * * * * , , , , , , " " " " " " , , , , , , ( ( ( ( ( (r r r r r r r r r rr%   