=dZddlmZddlZddlZddlZddlZddlZddl Z ddl Z ddl Z ddl mZmZmZddlmZddlmZddlmZddlmZmZmZdd lmZdd lmZdd lm Z m!Z!dd l"m#Z#dd l$m%Z%ddl&m'Z'ddl(m)Z)ddl*m+Z+ddl,m-Z-ddl.m/Z/ddl0m1Z1ddl2m3Z3ddl4m5Z5m6Z6m7Z7e j8e9Z:ee;e;fZZ?dKd!Z@Gd"d#e>ZAdLd&ZBdMd'ZCdNd*ZDGd+d,ZEGd-d.eZFdOd0ZGeGdPd4ZHed56Gd7d2ZIGd8d9eZJ dQdRd@ZK dSdTdCZLdUdEZMGdFdGeZNGdHdIZOdS)VzO The main purpose of this module is to expose LinkCollector.collect_sources(). ) annotationsN)IterableMutableMappingSequence) dataclass) HTMLParser)Values)Callable NamedTupleProtocol)requests)Response) RetryErrorSSLError)NetworkConnectionError)Link) SearchScope) PipSession)raise_for_status)is_archive_fileredact_auth_from_url) url_to_path)vcs)CandidatesFromPage LinkSource build_sourceurlstrreturn str | NonectjD]D}||r|t |dvr|cSEdS)zgLook for VCS schemes in the URL. Returns the matched VCS scheme, or None if there's no match. z+:N)rschemeslower startswithlen)rschemes }/builddir/build/BUILD/imunify360-venv-2.6.2/opt/imunify360/venv/lib/python3.11/site-packages/pip/_internal/index/collector.py_match_vcs_schemer*.sW + 99;; ! !& ) ) c#f++.>$.F.FMMM 4c eZdZdfd ZxZS)_NotAPIContent content_typer request_descr!Nonecht||||_||_dSN)super__init__r.r/)selfr.r/ __class__s r)r4z_NotAPIContent.__init__:s3 |444((r+)r.r r/r r!r0)__name__ __module__ __qualname__r4 __classcell__r6s@r)r-r-9s=))))))))))r+r-responserr0c|jdd}|}|drdSt ||jj)z Check the Content-Type header to ensure the response contains a Simple API Response. Raises `_NotAPIContent` if the content type is not a valid content-type. Content-TypeUnknown)z text/htmlz#application/vnd.pypi.simple.v1+html#application/vnd.pypi.simple.v1+jsonN)headersgetr%r&r-requestmethod)r<r.content_type_ls r)_ensure_api_headerrF@sg#'' BBL!''))N     x'7'> ? ??r+ceZdZdS)_NotHTTPN)r7r8r9r+r)rHrHVsDr+rHsessionrctj|\}}}}}|dvrt||d}t |t |dS)z Send a HEAD request to the URL, and ensure the response contains a simple API Response. Raises `_NotHTTP` if the URL is not available for a HEAD request, or `_NotAPIContent` if the content type is not a valid content type. >httphttpsT)allow_redirectsN)urllibparseurlsplitrHheadrrF)rrJr(netlocpathqueryfragmentresps r)_ensure_api_responserXZsq-3L,A,A#,F,F)FFD% &&&jj <<T< 2 2DTtr+ctt|jrt||tdt |||dgddd}t|t|tdt ||j d d |S) aYAccess an Simple API response with GET, and return the response. This consists of three parts: 1. If the URL looks suspiciously like an archive, send a HEAD first to check the Content-Type is HTML or Simple API, to avoid downloading a large file. Raise `_NotHTTP` if the content type cannot be determined, or `_NotAPIContent` if it is not HTML or a Simple API. 2. Actually perform the request. Raise HTTP exceptions on network failures. 3. Check the Content-Type header to make sure we got a Simple API response, and raise `_NotAPIContent` otherwise. rJzGetting page %sz, )r@z*application/vnd.pypi.simple.v1+html; q=0.1ztext/html; q=0.01z max-age=0)Acceptz Cache-Control)rAzFetched page %s as %sr>r?) rrfilenamerXloggerdebugrrBjoinrrFrA)rrJrWs r)_get_simple_responser`lstCyy)**3S'2222 LL"$8$=$=>>> ;; ii()+     D4Tt LLS!! 33 Kr+rAResponseHeadersc|rSd|vrOtj}|d|d<|d}|rt |SdS)z=Determine if we have any encoding information in our headers.r>z content-typecharsetN)emailmessageMessage get_paramr )rAmrcs r)_get_encoding_from_headersris` >W,, M ! ! # ##N3.++i((  w<<  4r+c&eZdZd dZdd Zdd Zd S)CacheablePageContentpage IndexContentr!r0c&|jsJ||_dSr2)cache_link_parsingrlr5rls r)r4zCacheablePageContent.__init__s&&&& r+otherobjectboolcpt|t|o|jj|jjkSr2) isinstancetyperlr)r5rqs r)__eq__zCacheablePageContent.__eq__s*%d,,P%*.1PPr+intc4t|jjSr2)hashrlrr5s r)__hash__zCacheablePageContent.__hash__sDIM"""r+N)rlrmr!r0)rqrrr!rs)r!rx)r7r8r9r4rwr|rIr+r)rkrksTQQQQ######r+rkceZdZddZdS) ParseLinksrlrmr!Iterable[Link]cdSr2rIrps r)__call__zParseLinks.__call__sr+Nrlrmr!r)r7r8r9rrIr+r)r~r~sAAAAAAr+r~fnc|tjd fd tjd fd }|S) z Given a function that parses an Iterable[Link] from an IndexContent, cache the function's result (keyed by CacheablePageContent), unless the IndexContent `page` has `page.cache_link_parsing == False`. cacheable_pagerkr! list[Link]c>t|jSr2)listrl)rrs r)wrapperz*with_cached_index_content..wrappersBB~*++,,,r+rlrmcr|jrt|St|Sr2)rorkr)rlrrs r)wrapper_wrapperz2with_cached_index_content..wrapper_wrappers;  " 77/5566 6BBtHH~~r+)rrkr!r)rlrmr!r) functoolscachewraps)rrrs` @r)with_cached_index_contentrsr_-----_-_R r+rlrmrc#K|j}|drUtj|j}|dgD]#}tj||j }||V$dSt|j }|j pd}| |j ||j }|jp|}|jD] } tj| ||}||V!dS)z\ Parse a Simple API's Index Content, and yield its anchor elements as Link objects. r@filesNzutf-8)page_urlbase_url)r.r%r&jsonloadscontentrBr from_jsonrHTMLLinkParserencodingfeeddecoderanchors from_element) rlrEdatafilelinkparserrrranchors r) parse_linksrs! &,,..N  !FGGz$,''HHWb))  D>$11D|JJJJ DH % %F}'H KK ##H--... (C%#H. #III <  r+T)frozencReZdZUdZded<ded<ded<ded<d Zd ed <dd ZdS)rmaRepresents one response (or page), along with its URL. :param encoding: the encoding to decode the given content. :param url: the URL from which the HTML was downloaded. :param cache_link_parsing: whether links parsed from this page's url should be cached. PyPI index urls should have this set to False, for example. bytesrr r.r"rrTrsror!c*t|jSr2)rrr{s r)__str__zIndexContent.__str__s#DH---r+N)r!r )r7r8r9__doc____annotations__rorrIr+r)rmrmsrNNN HHH#####......r+c4eZdZdZd fd Zdd Zdd ZxZS)rzf HTMLParser that keeps the first base HREF and a list of all anchor elements' attributes. rr r!r0cvtd||_d|_g|_dS)NT)convert_charrefs)r3r4rrr)r5rr6s r)r4zHTMLLinkParser.__init__s7 $///$( 46 r+tagattrslist[tuple[str, str | None]]c|dkr)|j"||}| ||_dSdS|dkr)|jt |dSdS)Nbasea)rget_hrefrappenddict)r5rrhrefs r)handle_starttagzHTMLLinkParser.handle_starttagsn &==T]2==''D $   CZZ L  U , , , , ,Zr+r"c*|D]\}}|dkr|cSdS)NrrI)r5rnamevalues r)rzHTMLLinkParser.get_href!s1   KD%v~~ tr+)rr r!r0)rr rrr!r0)rrr!r")r7r8r9rr4rrr:r;s@r)rr so 777777----r+rrrreasonstr | ExceptionmethCallable[..., None] | Nonec<| tj}|d||dS)Nz%Could not fetch URL %s: %s - skipping)r]r^)rrrs r)_handle_get_simple_failr(s+  ||D 0$?????r+rorsc|t|j}t|j|jd||j|S)Nr>)rrro)rirArmrr)r<rors r)_make_index_contentr2sG*(*:;;H ( L-    r+IndexContent | Nonec|jddd}t|}|rtd||dS|drt jt|rU| ds|dz }tj |d}td| t|| }t!||j S#t$$rtd |Yn t&$r1}td ||j|jYd}~nd}~wt,$r}t/||Yd}~nd}~wt0$r}t/||Yd}~nd}~wt2$r:}d }|t5|z }t/||tjYd}~nRd}~wt8j$r}t/|d|Yd}~n(d}~wt8j$rt/|dYnwxYwdS)N#rrzICannot look at %s URL %s because it does not support lookup as web pages.zfile:/z index.htmlz# file: URL is directory, getting %srZ)roz`Skipping page %s because it looks like an archive, and cannot be checked by a HTTP HEAD request.zSkipping page %s because the %s request got Content-Type: %s. The only supported Content-Types are application/vnd.pypi.simple.v1+json, application/vnd.pypi.simple.v1+html, and text/htmlz4There was a problem confirming the ssl certificate: )rzconnection error: z timed out)rsplitr*r]warningr&osrTisdirrendswithrOrPurljoinr^r`rrorHr-r/r.rrrrr infor ConnectionErrorTimeout)rrJr vcs_schemerWexcrs r)_get_index_contentr?s (..a  #C#3''J W     t ~~g A27==S1A1A#B#B A||C    3JC l""3 55 :C@@@U#C999:#4Dz'LinkCollector.create..s+IIs-c22IIIIIIr+)rrno_index)rJr) index_urlextra_index_urlsrr]r^r_rrcreater)clsrJrrrrrlink_collectors r)rzLinkCollector.creates'(7+CC   $5  LL&IIjIIIII   J'-2 ")!!%   '%   r+ list[str]c|jjSr2)rrr{s r)rzLinkCollector.find_linkss ++r+locationrrc.t||jS)z> Fetch an HTML page containing package links. rZ)rrJ)r5rs r)fetch_responsezLinkCollector.fetch_responses"(DLAAAAr+ project_namer candidates_from_pagerrcztjfdjD}tjfdjD}t tj redtj ||D}t|ddg|z}t d|tt!|t!|S)Nc 3VK|]#}t|jjddV$dS)Frpage_validator expand_dirrorNrrJis_secure_originrlocrrr5s r)rz0LinkCollector.collect_sources..s_ 4 4  %9#|< #()     4 4 4 4 4 4 r+c 3VK|]#}t|jjddV$dS)TrNrrs r)rz0LinkCollector.collect_sources..s_ 5 5  %9#|<#')     5 5 5 5 5 5 r+c4g|]}||j d|jS)Nz* )r)rss r) z1LinkCollector.collect_sources..s6=QV%7QV %7%7%7r+z' location(s) to search for versions of : )rr) collections OrderedDictrget_index_urls_locationsvaluesrr] isEnabledForloggingDEBUG itertoolschainr'r^r_rr)r5rrindex_url_sourcesfind_links_sourcesliness``` r)collect_sourceszLinkCollector.collect_sourcess (3 4 4 4 4 4 4 (AA,OO 4 4 4    &(( )4 5 5 5 5 5 5  5 5 5    &((    w} - - +");=NOOE u::33#/333E LL5)) * * *.//-..    r+N)rJrrrr!r0)F)rJrrr rrsr!r)r!r)rrr!r)rr rrr!r) r7r8r9rr4 classmethodrpropertyrrr rIr+r)rrs #( [B,,,X,BBBB , , , , , , r+r)rr r!r")r<rr!r0)rr rJrr!r0)rr rJrr!r)rArar!r")rr~r!r~rr2)rrrrrrr!r0)T)r<rrorsr!rm)rrrJrr!r)Pr __future__rr email.messagerdrrrrr urllib.parserOcollections.abcrrr dataclassesr html.parserroptparser typingr r r pip._vendorr pip._vendor.requestsrpip._vendor.requests.exceptionsrrpip._internal.exceptionsrpip._internal.models.linkr!pip._internal.models.search_scoperpip._internal.network.sessionrpip._internal.network.utilsrpip._internal.utils.filetypesrpip._internal.utils.miscrpip._internal.utils.urlsrpip._internal.vcsrsourcesrrr getLoggerr7r]r rar* Exceptionr-rFrHrXr`rirkr~rrrmrrrrrrrIr+r)r%s#"""""  >>>>>>>>>>!!!!!!"""""" ! ))))))@@@@@@@@;;;;;;******999999444444888888999999999999000000!!!!!!AAAAAAAAAA  8 $ $ c*)))))Y)))@@@@,     y   $<<<<~ # # # # # # # #BBBBBBBB(8 $........(Z>(,@@@@@48     9999x,,,,,z,,, h h h h h h h h h h r+