G:YddlmZmZmZddlmZddlmZmZddl Z ddl Z ddl m Z m Z ddlmZddlmZmZmZmZdd lmZdd lmZed eDZed eDZed eDZeeddgzZdZejrOeddkreddksJe j edde!dzdzZ"ne j eZ"hdZ#e j dZ$iZ%Gdde&Z'dZ(Gdde&Z)Gdde)Z*Gdde+Z,Gdd e&Z-Gd!d"e&Z.d#Z/dS)$)absolute_importdivisionunicode_literals) text_type) http_clienturllibN)BytesIOStringIO) webencodings)EOFspaceCharacters asciiLettersasciiUppercase)_ReparseException)_utilsc8g|]}|dSasciiencode.0items /builddir/build/BUILDROOT/alt-python311-pip-21.3.1-4.el9.x86_64/opt/alt/python311/lib/python3.11/site-packages/pip/_vendor/html5lib/_inputstream.py rs$!S!S!S4$++g"6"6!S!S!Sc8g|]}|dSrrrs rrrs$MMMt{{733MMMrc8g|]}|dSrrrs rrrs$ Q Q Q$W!5!5 Q Q Qr>   z[ - -/:-@\[-`{-~]c<eZdZdZdZdZdZdZdZdZ dZ d S) BufferedStreamzBuffering for streams that do not have buffering of their own The buffer is implemented as a list of chunks on the assumption that joining many strings will be slow since it is O(n**2) c4||_g|_ddg|_dS)Nr"r)streambufferposition)selfrGs r__init__zBufferedStream.__init__:s  Q rcd}|jd|jdD]}|t|z }||jdz }|SNrr )rHrIlen)rJposchunks rtellzBufferedStream.tell?sR[!2$-"2!23  E 3u:: CC t}Q rc||ksJ|}d}t|j||kr@|t|j|z}|dz }t|j||k@||g|_dSrM)_bufferedBytesrNrHrI)rJrOoffsetis rseekzBufferedStream.seekFsd))++++++ $+a.!!F** c$+a.)) )F FA$+a.!!F**F  rc&|js||S|jdt|jkr>|jdt|jdkr||S||S)Nrr r")rH _readStreamrIrN_readFromBufferrJbytess rreadzBufferedStream.readOs{ /##E** *mA#dk"2"222mA#dk"o"6"666##E** *''.. .rc>td|jDS)Nc,g|]}t|S)rNrs rrz1BufferedStream._bufferedBytes..Ys666$CII666r)sumrHrJs rrSzBufferedStream._bufferedBytesXs!66$+666777rc|j|}|j||jdxxdz cc<t ||jd<|SrM)rGr\rHappendrIrN)rJr[datas rrXzBufferedStream._readStream[sb{&& 4    aAt99 a rcP|}g}|jd}|jd}|t|jkr|dkr|dksJ|j|}|t||z kr|}|||zg|_n-t||z }|t|g|_|dz }|||||z||z}d}|t|jkr|dk|r(|||d|S)Nrr r)rIrNrHrcrXjoin)rJr[remainingBytesrv bufferIndex bufferOffset bufferedData bytesToReads rrYzBufferedStream._readFromBufferbsK mA& }Q' C ,,,,11D1D!A%%%%;{3L\!2!2\!AAA, !,l[.H I !,//,> !,c,.?.? @ q IIl< {0J#JK L L L k )NLC ,,,,11D1D  8 IId&&~66 7 7 7xx||rN) __name__ __module__ __qualname____doc__rKrQrVr\rSrXrYr_rrrErE3s    $$$///888rrEc t|tjs>t|tjjr"t|jtjrd}nNt|dr)t|dt}nt|t}|r-d|D}|rtd|zt|fi|St|fi|S)NFr\rc<g|]}|d|S) _encoding)endswith)rxs rrz#HTMLInputStream..s)BBB1!**[*A*ABQBBBrz3Cannot set an encoding with a unicode input, set %r) isinstancer HTTPResponserresponseaddbasefphasattrr\r TypeErrorHTMLUnicodeInputStreamHTMLBinaryInputStream)sourcekwargs isUnicode encodingss rHTMLInputStreamr}s 6;3442 FFO3 4 42 FI{7 8 82  2v{{1~~y99 vy11 7BBBBB  _QT]]^^ ^%f77777$V66v666rc\eZdZdZdZdZdZdZdZdZ dZ dd Z d Z d Z ddZdZd S)r}Provides a unicode stream of characters to the HTMLTokenizer. This class takes care of character encoding and removing or replacing incorrect byte-sequences and also provides column and line tracking. i(ctjsd|_n,tddkr |j|_n |j|_dg|_tddf|_| ||_ | dS)Initialises the HTMLInputStream. HTMLInputStream(source, [encoding]) -> Normalized stream from source for use by html5lib. source can be either a file-object, local filename or a string. The optional encoding parameter must be a string that indicates the encoding. If specified, that encoding will be used, regardless of any BOM or later declaration (such as in a meta element) Nu􏿿r rutf-8certain) rsupports_lone_surrogatesreportCharacterErrorsrNcharacterErrorsUCS4characterErrorsUCS2newLineslookupEncoding charEncoding openStream dataStreamreset)rJrs rrKzHTMLUnicodeInputStream.__init__s. B*.D & &   ! # #)-)AD & &)-)AD & +G44i@//&11 rchd|_d|_d|_g|_d|_d|_d|_dS)Nr)rP chunkSize chunkOffseterrors prevNumLines prevNumCols_bufferedCharacterras rrzHTMLUnicodeInputStream.resetsA  #'rcJt|dr|}nt|}|SzvProduces a file object from source. source can be either a file object, local filename or a string. r\)r{r rJrrGs rrz!HTMLUnicodeInputStream.openStreams/ 66 " " &FFf%%F rc|j}|dd|}|j|z}|dd|}|dkr |j|z}n||dzz }||fS)N rr"r )rPcountrrfindr)rJrTrPnLines positionLine lastLinePospositionColumns r _positionz HTMLUnicodeInputStream._positionss T1f--(61 kk$622 "  !-6NN#{Q7Nn--rcJ||j\}}|dz|fS)z:Returns (line, col) of the current position in the stream.r )rr)rJlinecols rrIzHTMLUnicodeInputStream.positions(NN4#344 cq#rc|j|jkr|stS|j}|j|}|dz|_|S)zo Read one character from the stream or queue if available. Return EOF when EOF is reached. r )rr readChunkr rP)rJrchars rrzHTMLUnicodeInputStream.charsP  t~ - ->>##  & z+&&? rNcn||j}||j\|_|_d|_d|_d|_|j|}|j r|j |z}d|_ n|sdSt|dkrBt|d}|dksd|cxkrdkrnn|d|_ |dd}|j r| || d d }| d d }||_t||_d S) NrrFr r" iz r T)_defaultChunkSizerrrrrPrrr\rrNordrreplace)rJrrdlastvs rrz HTMLUnicodeInputStream.readChunksL  .I.2nnT^.L.L+4+ ##I..  " *T1D&*D # # 5 t99q==RMME}}% 9 9 9 96 9 9 9 9 9*.r('CRCy  % -  & &t , , ,||FD))||D$'' Ttrcttt|D]}|jddS)Ninvalid-codepoint)rangerNinvalid_unicode_refindallrrc)rJrd_s rrz*HTMLUnicodeInputStream.characterErrorsUCS4sTs-55d;;<<== 4 4A K  2 3 3 3 3 4 4rc.d}t|D]}|rt|}|}t j|||dzrEt j|||dz}|tvr|j dd}|dkr7|dkr1|t|dz kr|j dd}|j ddS)NFrTrir ) rfinditerrgroupstartrisSurrogatePairsurrogatePairToCodepointnon_bmp_invalid_codepointsrrcrN)rJrdskipmatch codepointrOchar_vals rrz*HTMLUnicodeInputStream.characterErrorsUCS2#s''0066 8 8E EKKMM**I++--C%d3sQw;&788 8!:4C!G ;LMM999K&&':;;;v%%)v*=*=TQ&& ""#67777 ""#67777# 8 8rFc t||f}nt#t$rg |D]}t|dksJdd|D}|sd|z}t jd|zx}t||f<YnwxYwg} ||j|j}||j|j krnnN| }||j kr/| |j|j|||_n=| |j|jd| snd|} | S)z Returns a string of characters from the stream up to but not including any character in 'characters' or EOF. 'characters' must be a container that supports the 'in' method and iteration over its characters. Trc2g|]}dt|zS)z\x%02x)r)rcs rrz5HTMLUnicodeInputStream.charsUntil..Hs#DDDAYQ/DDDrz^%sz[%s]+N) charsUntilRegExKeyErrorrrfrecompilerrPrrendrcr) rJ charactersoppositecharsrregexrhmrrs r charsUntilz!HTMLUnicodeInputStream.charsUntil:s Z#Z$:;EE Z Z Z )#))Aq66C<<<<<GGDDDDDEEE & >@jSX>Y>Y YEOZ$:;;; Z  DJ(899Ay#t~556eegg$.((IIdj)9#)=>???'*D$ IIdj!1!2!23 4 4 4>>## + . GGBKKsA.BBc|turT|jdkr!||jz|_|xjdz c_dS|xjdzc_|j|j|ksJdSdSrM)r rrPr)rJrs rungetzHTMLUnicodeInputStream.ungetis} s??1$$ "DJ. !#  A%  z$"23t;;;; ?<;rN)F)rmrnrorprrKrrrrIrrrrrrr_rrr}r}s@ ' ' '    . . .   $$$$L444888.----^<<<<> #/// rc|jdj|jd|_t |dS)Nrr)r codec_info streamreaderrrr}rras rrzHTMLBinaryInputStream.resetsA+A.9FFt~W`aa$$T*****rct|dr|}nt|} ||n#t$rt |}YnwxYw|Sr)r{r rVrQ ExceptionrErs rrz HTMLBinaryInputStream.openStreams} 66 " " %FFV__F , KK & & & & , , ,#F++FFF , s'A A('A(c|df}|d|St|jdf}|d|St|jdf}|d|S|df}|d|St|jdf}|d"|djds|St|jdf}|d|S|r ddl m }g}|}|j sj|j |j}t|t sJ|sn1|||||j j|t|jd}|j d||dfSn#t,$rYnwxYwt|jdf}|d|StddfS)Nrr tentativezutf-16)UniversalDetectorencodingr) detectBOMrrrdetectEncodingMetarname startswithr%pip._vendor.chardet.universaldetectorrdonerr\rrvr[rcfeedcloseresultrV ImportErrorr)rJchardetrrbuffersdetectorrHrs rrz'HTMLBinaryInputStream.determineEncodings_~~''2 ? & &d&<==yH ? & &d&=>> I ? & ..00+= ? & &d&FGGT ? &|A/C/N/Nx/X/X & &d&:;;[H ? &   1 1SSSSSS,,.."-*!^001EFFF%fe44444!NN6***MM&))) #-*   )(/**EFF##A&&&'#[00(    &&d&;<>  UUOAa!eG}rc*|Sr)r$ras rnextzEncodingBytes.nextYs}}rc|j}|t|krt|dkrt|dz x|_}|||dzSrMr r"s rpreviouszEncodingBytes.previous]sN N D >>  UUOU"Aa!eG}rcR|jt|krt||_dSrrrNr!)rJrIs r setPositionzEncodingBytes.setPositionfs& >SYY & & !rch|jt|krt|jdkr|jSdS)Nrr*ras r getPositionzEncodingBytes.getPositionks5 >SYY & &  >Q  > !4rc0||j|jdzSNr )rIras rgetCurrentBytezEncodingBytes.getCurrentByteusDM$-!"3344rc|j}|t|kr2|||dz}||vr ||_|S|dz }|t|k2||_dS)zSkip past a list of charactersr NrIrNrrJrr#rs rrzEncodingBytes.skipzsf M#d))mmQq1uW A~~!" FA #d))mm trc|j}|t|kr2|||dz}||vr ||_|S|dz }|t|k2||_dSr/r2r3s r skipUntilzEncodingBytes.skipUntilsf M#d))mmQq1uW AEzz!" FA #d))mm trcz|||j}|r|xjt|z c_|S)zLook for a sequence of bytes at the start of a string. If the bytes are found return True and advance the position to the byte after the match. Otherwise return False and leave the position alone)rrIrN)rJr[rhs r matchByteszEncodingBytes.matchBytess<__UDM 2 2  ( MMSZZ 'MM rc |||jt|zdz |_n#t$rt wxYwdS)zLook for the next sequence of bytes matching a given sequence. If a match is found advance the position to the last byte of the matchr T)indexrIrNr ValueErrorr!rZs rjumpTozEncodingBytes.jumpTosV !ZZt}==E JQNDNN     ts 36AN)rmrnrorprrKrr$r&r(r+r-propertyrIr0 currentBytespaceCharactersBytesrr5r7r;r_rrrrBs 222""" x [11H555(>**K-       rrcHeZdZdZdZdZdZdZdZdZ dZ d Z d Z d S) rz?Mini parser for detecting character encoding from meta elementsc<t||_d|_dS)z3string - the data to work on for encoding detectionN)rrdrrJrds rrKzEncodingParser.__init__s!$''  rcd|jvrdSd|jfd|jfd|jfd|jfd|jfd|jff}|jD]w}d} |jdn#t$rYnKwxYw|D]?\}}|j|r |}n#t$rd}YnwxYw@|snx|j S) Nsrdr;ras rrCzEncodingParser.handleCommentsy'''rc|jjtvrdSd}d} |}|dS|ddkr|ddk}|r | ||_dSn|ddkr#|d}t |}| ||_dSnb|ddkrVt t|d}|}|t |}| |r ||_dS|}) NTFrs http-equivr s content-typecharsetscontent) rdr=r> getAttributerrContentAttrParserrparse)rJ hasPragmapendingEncodingattrtentativeEncodingcodec contentParsers rrDzEncodingParser.handleMetas+ 9 (< < <4  8$$&&D|t7m++ $Q? :I %_%@(7 $u!W **(,Q%*+<==E((- $u)!W **$5mDG6L6L$M$MM(5(;(;(=(=%(4 ./@ A A ,(805 ',u277 8rc,|dS)NF)handlePossibleTagras rrGz%EncodingParser.handlePossibleStartTags%%e,,,rcTt|j|dS)NT)r&rdrZras rrEz#EncodingParser.handlePossibleEndTags# TY%%d+++rcJ|j}|jtvr,|r(||dS|t }|dkr|n,|}||}|dS)NTr!)rdr=asciiLettersBytesr(rFr5spacesAngleBracketsrP)rJendTagrdrrUs rrZz EncodingParser.handlePossibleTagsy  #4 4 4 #   """4 NN. / / 99 MMOOOO$$&&D"((**"trc6|jdS)Nr rMras rrFzEncodingParser.handleOthersy%%%rc|j}|ttdgz}|t |dksJ|dvrdSg}g} |dkr|rn|tvr|}nu|dvrd|dfS|t vr(||n|dS||t|}|dkr+| d|dfSt||}|d vr|} t|}||kr9t|d|d|fS|t vr(||n|||d krd|dfS|t vr(||n|dS|| t|}|tvr*d|d|fS|t vr(||n|dS||) z_Return a name,value pair for the next attribute in the stream, if one is found, or None/Nr )r NT=)rbr r)'"r ) rdrr> frozensetrNrfasciiUppercaseBytesrcrr&r(r^)rJrdrattrName attrValue quoteChars rrPzEncodingParser.getAttributesy II*Yv->->> ? ?yCFFaKKK'  4  DyyXy***IIKKl""xx))3..))) ****t"""T A! $ 99 MMOOO88H%%s* * T IIKK  I (JJ >>JJJ88H--sxx /B/BBB---$$QWWYY////$$Q''' ($YY88H%%s* * % % %   QWWYY ' ' ' ' Y4   Q    $T A'''xx))388I+>+>>>)))  ++++t  ### $rN) rmrnrorprKrrCrDrGrErZrFrPr_rrrrsII <((("8"8"8H---,,,.&&&J$J$J$J$J$rrceZdZdZdZdS)rQcBt|tsJ||_dSr)rvr[rdrAs rrKzContentAttrParser.__init__as#$&&&&& rc |jd|jxjdz c_|j|jjdksdS|jxjdz c_|j|jjdvrb|jj}|jxjdz c_|jj}|j|r|j||jjSdS|jj} |jt |j||jjS#t$r|j|dcYSwxYw#t$rYdSwxYw)NrOr rc)rerd)rdr;rIrr=r5r>r!)rJ quoteMark oldPositions rrRzContentAttrParser.parsees  I  Z ( ( ( I  ! #   INN   9(D00t I  ! #   INN   y$ 44 I1  ""a'"""i0 9##I.. 9[1C%CDD4#i0 3I''(<===9[1C%CDD$3339[\\22223   44 s=AEBE: E7D??EEEE E,+E,N)rmrnrorKrRr_rrrQrQ`s2rrQct|tr( |d}n#t$rYdSwxYw|& t j|S#t $rYdSwxYwdS)z{Return the python codec name corresponding to an encoding or None if the string doesn't correspond to a valid encoding.rN)rvr[decodeUnicodeDecodeErrorr lookupAttributeError)rs rrrs(E"" w//HH!   44  &x00 0   44 ts- ;;A A#"A#)0 __future__rrrpip._vendor.sixrpip._vendor.six.movesrrrrior r pip._vendorr constantsr rrrrrrrfr>r]rgr^invalid_unicode_no_surrogaterrrevalrrascii_punctuation_rerobjectrErr}r~r[rrrQrr_rrrseBBBBBBBBBB%%%%%%55555555 $$$$$$IIIIIIIIIIII((((((!y!S!S?!S!S!STTIMM MMMNNi Q Q. Q Q QRR*YYd|-D-DD j " B ( +s 2 27S7Y7YZ]7^7^bc7c7c7c c#$@"$E$(D)<$=$=%>$'%())$$@AA222"rz"mnnGGGGGVGGGT777.c<c<c<c<c