ر4zdZddlmZddlZddlZddlZddlZddlZddl Z ddl Z ddl Z ddl Z ddlmZmZmZddlmZddlmZddlmZddlmZmZmZdd lmZdd lmZdd l m!Z!m"Z"dd l#m$Z$dd l%m&Z&ddl'm(Z(ddl)m*Z*ddl+m,Z,ddl-m.Z.ddl/m0Z0ddl1m2Z2ddl3m4Z4m5Z5m6Z6e j7e8Z9ee:e:fZ;dIdZ<Gdde=Z>dJd Z?Gd!d"e=Z@dKd%ZAdLd&ZBdMd)ZCGd*d+ZDGd,d-eZEdNd/ZFeFdOd3ZGed45Gd6d1ZHGd7d8eZI dPdQd?ZJ dRdSdBZKdTdDZLGdEdFeZMGdGdHZNdS)UzO The main purpose of this module is to expose LinkCollector.collect_sources(). ) annotationsN)IterableMutableMappingSequence) dataclass) HTMLParser)Values)Callable NamedTupleProtocol)requests)Response) RetryErrorSSLError)NetworkConnectionError)Link) SearchScope) PipSession)raise_for_status)is_archive_fileredact_auth_from_url)vcs)CandidatesFromPage LinkSource build_sourceurlstrreturn str | NonectjD]D}||r|t |dvr|cSEdS)zgLook for VCS schemes in the URL. Returns the matched VCS scheme, or None if there's no match. z+:N)rschemeslower startswithlen)rschemes o/builddir/build/BUILD/cloudlinux-venv-1.0.10/venv/lib/python3.11/site-packages/pip/_internal/index/collector.py_match_vcs_schemer).sW + 99;; ! !& ) ) c#f++.>$.F.FMMM 4c eZdZdfd ZxZS)_NotAPIContent content_typer request_descr Nonecht||||_||_dSN)super__init__r-r.)selfr-r. __class__s r(r3z_NotAPIContent.__init__:s3 |444((r*)r-rr.rr r/)__name__ __module__ __qualname__r3 __classcell__r5s@r(r,r,9s=))))))))))r*r,responserr/c|jdd}|}|drdSt ||jj)z Check the Content-Type header to ensure the response contains a Simple API Response. Raises `_NotAPIContent` if the content type is not a valid content-type. Content-TypeUnknown)z text/htmlz#application/vnd.pypi.simple.v1+html#application/vnd.pypi.simple.v1+jsonN)headersgetr$r%r,requestmethod)r;r-content_type_ls r(_ensure_api_headerrE@sg#'' BBL!''))N     x'7'> ? ??r*ceZdZdS)_NotHTTPN)r6r7r8r*r(rGrGVsDr*rGsessionrctj|\}}}}}|dvrt||d}t |t |dS)z Send a HEAD request to the URL, and ensure the response contains a simple API Response. Raises `_NotHTTP` if the URL is not available for a HEAD request, or `_NotAPIContent` if the content type is not a valid content type. >httphttpsT)allow_redirectsN)urllibparseurlsplitrGheadrrE)rrIr'netlocpathqueryfragmentresps r(_ensure_api_responserWZsq-3L,A,A#,F,F)FFD% &&&jj <<T< 2 2DTtr*ctt|jrt||tdt |||dgddd}t|t|tdt ||j d d |S) aYAccess an Simple API response with GET, and return the response. This consists of three parts: 1. If the URL looks suspiciously like an archive, send a HEAD first to check the Content-Type is HTML or Simple API, to avoid downloading a large file. Raise `_NotHTTP` if the content type cannot be determined, or `_NotAPIContent` if it is not HTML or a Simple API. 2. Actually perform the request. Raise HTTP exceptions on network failures. 3. Check the Content-Type header to make sure we got a Simple API response, and raise `_NotAPIContent` otherwise. rIzGetting page %sz, )r?z*application/vnd.pypi.simple.v1+html; q=0.1ztext/html; q=0.01z max-age=0)Acceptz Cache-Control)r@zFetched page %s as %sr=r>) rrfilenamerWloggerdebugrrAjoinrrEr@)rrIrVs r(_get_simple_responser_lstCyy)**3S'2222 LL"$8$=$=>>> ;; ii()+     D4Tt LLS!! 33 Kr*r@ResponseHeadersc|rSd|vrOtj}|d|d<|d}|rt |SdS)z=Determine if we have any encoding information in our headers.r=z content-typecharsetN)emailmessageMessage get_paramr)r@mrbs r(_get_encoding_from_headersrhs` >W,, M ! ! # ##N3.++i((  w<<  4r*c&eZdZd dZdd Zdd Zd S)CacheablePageContentpage IndexContentr r/c&|jsJ||_dSr1)cache_link_parsingrkr4rks r(r3zCacheablePageContent.__init__s&&&& r*otherobjectboolcpt|t|o|jj|jjkSr1) isinstancetyperkr)r4rps r(__eq__zCacheablePageContent.__eq__s*%d,,P%*.1PPr*intc4t|jjSr1)hashrkrr4s r(__hash__zCacheablePageContent.__hash__sDIM"""r*N)rkrlr r/)rprqr rr)r rw)r6r7r8r3rvr{rHr*r(rjrjsTQQQQ######r*rjceZdZddZdS) ParseLinksrkrlr Iterable[Link]cdSr1rHros r(__call__zParseLinks.__call__sr*Nrkrlr r~)r6r7r8rrHr*r(r}r}sAAAAAAr*r}fnc|tjd fd tjd fd }|S) z Given a function that parses an Iterable[Link] from an IndexContent, cache the function's result (keyed by CacheablePageContent), unless the IndexContent `page` has `page.cache_link_parsing == False`. cacheable_pagerjr list[Link]c>t|jSr1)listrk)rrs r(wrapperz*with_cached_index_content..wrappersBB~*++,,,r*rkrlcr|jrt|St|Sr1)rnrjr)rkrrs r(wrapper_wrapperz2with_cached_index_content..wrapper_wrappers;  " 77/5566 6BBtHH~~r*)rrjr r)rkrlr r) functoolscachewraps)rrrs` @r(with_cached_index_contentrsr_-----_-_R r*rkrlr~c#K|j}|drUtj|j}|dgD]#}tj||j }||V$dSt|j }|j pd}| |j ||j }|jp|}|jD] } tj| ||}||V!dS)z\ Parse a Simple API's Index Content, and yield its anchor elements as Link objects. r?filesNzutf-8)page_urlbase_url)r-r$r%jsonloadscontentrAr from_jsonrHTMLLinkParserencodingfeeddecoderanchors from_element) rkrDdatafilelinkparserrrranchors r( parse_linksrs! &,,..N  !FGGz$,''HHWb))  D>$11D|JJJJ DH % %F}'H KK ##H--... (C%#H. #III <  r*T)frozencReZdZUdZded<ded<ded<ded<d Zd ed <dd ZdS)rlaRepresents one response (or page), along with its URL. :param encoding: the encoding to decode the given content. :param url: the URL from which the HTML was downloaded. :param cache_link_parsing: whether links parsed from this page's url should be cached. PyPI index urls should have this set to False, for example. bytesrrr-r!rrTrrrnr c*t|jSr1)rrrzs r(__str__zIndexContent.__str__s#DH---r*N)r r)r6r7r8__doc____annotations__rnrrHr*r(rlrlsrNNN HHH#####......r*c4eZdZdZd fd Zdd Zdd ZxZS)rzf HTMLParser that keeps the first base HREF and a list of all anchor elements' attributes. rrr r/cvtd||_d|_g|_dS)NT)convert_charrefs)r2r3rrr)r4rr5s r(r3zHTMLLinkParser.__init__s7 $///$( 46 r*tagattrslist[tuple[str, str | None]]c|dkr)|j"||}| ||_dSdS|dkr)|jt |dSdS)Nbasea)rget_hrefrappenddict)r4rrhrefs r(handle_starttagzHTMLLinkParser.handle_starttagsn &==T]2==''D $   CZZ L  U , , , , ,Zr*r!c*|D]\}}|dkr|cSdS)NrrH)r4rnamevalues r(rzHTMLLinkParser.get_href!s1   KD%v~~ tr*)rrr r/)rrrrr r/)rrr r!)r6r7r8rr3rrr9r:s@r(rr so 777777----r*rrrreasonstr | ExceptionmethCallable[..., None] | Nonec<| tj}|d||dS)Nz%Could not fetch URL %s: %s - skipping)r\r])rrrs r(_handle_get_simple_failr(s+  ||D 0$?????r*rnrrc|t|j}t|j|jd||j|S)Nr=)rrrn)rhr@rlrr)r;rnrs r(_make_index_contentr2sG*(*:;;H ( L-    r*IndexContent | Nonec|jddd}t|}|rtd||dSt j|\}}}}}}|dkrtj t j |rU| ds|dz }t j|d}td| t!|| }t#||j S#t&$rtd |Yn t($r1}td ||j|jYd}~nd}~wt.$r}t1||Yd}~nd}~wt2$r}t1||Yd}~nd}~wt4$r:}d } | t7|z } t1|| tjYd}~nRd}~wt:j$r}t1|d|Yd}~n(d}~wt:j$rt1|dYnwxYwdS)N#rrzICannot look at %s URL %s because it does not support lookup as web pages.r/z index.htmlz# file: URL is directory, getting %srY)rnz`Skipping page %s because it looks like an archive, and cannot be checked by a HTTP HEAD request.zSkipping page %s because the %s request got Content-Type: %s. The only supported Content-Types are application/vnd.pypi.simple.v1+json, application/vnd.pypi.simple.v1+html, and text/htmlz4There was a problem confirming the ssl certificate: )rzconnection error: z timed out) rsplitr)r\warningrNrOurlparseosrSisdirrB url2pathnameendswithurljoinr]r_rrnrGr,r.r-rrrrrinfor ConnectionErrorTimeout) rrIr vcs_schemer'_rSrVexcrs r(_get_index_contentr?s (..a  #C#3''J W     t &|44S99FAtQ1 BGMM&.*E*Ed*K*KLL||C    3JC l""3 55 :C@@@U#C999:#4Dz'LinkCollector.create..s+IIs-c22IIIIIIr*)rrno_index)rIr) index_urlextra_index_urlsrr\r]r^rrcreater)clsrIrrrrrlink_collectors r(rzLinkCollector.creates'(7+CC   $5  LL&IIjIIIII   J'-2 ")!!%   '%   r* list[str]c|jjSr1)rrrzs r(rzLinkCollector.find_linkss ++r*locationrrc.t||jS)z> Fetch an HTML page containing package links. rY)rrI)r4rs r(fetch_responsezLinkCollector.fetch_responses"(DLAAAAr* project_namercandidates_from_pagerrcztjfdjD}tjfdjD}t tj redtj ||D}t|ddg|z}t d|tt!|t!|S)Nc 3VK|]#}t|jjddV$dS)Frpage_validator expand_dirrnrNrrIis_secure_originrlocrrr4s r(rz0LinkCollector.collect_sources..s_ 4 4  %9#|< #()     4 4 4 4 4 4 r*c 3VK|]#}t|jjddV$dS)TrNrrs r(rz0LinkCollector.collect_sources..s_ 5 5  %9#|<#')     5 5 5 5 5 5 r*c4g|]}||j d|jS)Nz* )r)rss r( z1LinkCollector.collect_sources..s6=QV%7QV %7%7%7r*z' location(s) to search for versions of : )rr) collections OrderedDictrget_index_urls_locationsvaluesrr\ isEnabledForloggingDEBUG itertoolschainr&r]r^rr)r4rrindex_url_sourcesfind_links_sourcesliness``` r(collect_sourceszLinkCollector.collect_sourcess (3 4 4 4 4 4 4 (AA,OO 4 4 4    &(( )4 5 5 5 5 5 5  5 5 5    &((    w} - - +");=NOOE u::33#/333E LL5)) * * *.//-..    r*N)rIrrrr r/)F)rIrrr rrrr r)r r)rrr r)rrrrr r) r6r7r8rr3 classmethodrpropertyrrr rHr*r(rrs #( [B,,,X,BBBB , , , , , , r*r)rrr r!)r;rr r/)rrrIrr r/)rrrIrr r)r@r`r r!)rr}r r}rr1)rrrrrrr r/)T)r;rrnrrr rl)rrrIrr r)Or __future__rr email.messagercrrrrr urllib.parserNurllib.requestcollections.abcrrr dataclassesr html.parserroptparser typingr r r pip._vendorr pip._vendor.requestsrpip._vendor.requests.exceptionsrrpip._internal.exceptionsrpip._internal.models.linkr!pip._internal.models.search_scoperpip._internal.network.sessionrpip._internal.network.utilsrpip._internal.utils.filetypesrpip._internal.utils.miscrpip._internal.vcsrsourcesrrr getLoggerr6r\rr`r) Exceptionr,rErGrWr_rhrjr}rrrlrrrrrrrHr*r(r's#"""""  >>>>>>>>>>!!!!!!"""""" ! ))))))@@@@@@@@;;;;;;******999999444444888888999999999999!!!!!!AAAAAAAAAA  8 $ $ c*)))))Y)))@@@@,     y   $<<<<~ # # # # # # # #BBBBBBBB(8 $........(Z>(,@@@@@48     ::::z,,,,,z,,, h h h h h h h h h h r*