#
# Copyright (C) 2012-2017 The Python Software Foundation.
# See LICENSE.txt and CONTRIBUTORS.txt.
#
import codecs
from collections import deque
import contextlib
import csv
from glob import iglob as std_iglob
import io
import json
import logging
import os
import py_compile
import re
import socket
try:
import ssl
except ImportError: # pragma: no cover
ssl = None
import subprocess
import sys
import tarfile
import tempfile
import textwrap
try:
import threading
except ImportError: # pragma: no cover
import dummy_threading as threading
import time
from . import DistlibException
from .compat import (string_types, text_type, shutil, raw_input, StringIO,
cache_from_source, urlopen, urljoin, httplib, xmlrpclib,
splittype, HTTPHandler, BaseConfigurator, valid_ident,
Container, configparser, URLError, ZipFile, fsdecode,
unquote, urlparse)
logger = logging.getLogger(__name__)
#
# Requirement parsing code as per PEP 508
#
IDENTIFIER = re.compile(r'^([\w\.-]+)\s*')
VERSION_IDENTIFIER = re.compile(r'^([\w\.*+-]+)\s*')
COMPARE_OP = re.compile(r'^(<=?|>=?|={2,3}|[~!]=)\s*')
MARKER_OP = re.compile(r'^((<=?)|(>=?)|={2,3}|[~!]=|in|not\s+in)\s*')
OR = re.compile(r'^or\b\s*')
AND = re.compile(r'^and\b\s*')
NON_SPACE = re.compile(r'(\S+)\s*')
STRING_CHUNK = re.compile(r'([\s\w\.{}()*+#:;,/?!~`@$%^&=|<>\[\]-]+)')
def parse_marker(marker_string):
"""
Parse a marker string and return a dictionary containing a marker expression.
The dictionary will contain keys "op", "lhs" and "rhs" for non-terminals in
the expression grammar, or strings. A string contained in quotes is to be
interpreted as a literal string, and a string not contained in quotes is a
variable (such as os_name).
"""
def marker_var(remaining):
# either identifier, or literal string
m = IDENTIFIER.match(remaining)
if m:
result = m.groups()[0]
remaining = remaining[m.end():]
elif not remaining:
raise SyntaxError('unexpected end of input')
else:
q = remaining[0]
if q not in '\'"':
raise SyntaxError('invalid expression: %s' % remaining)
oq = '\'"'.replace(q, '')
remaining = remaining[1:]
parts = [q]
while remaining:
# either a string chunk, or oq, or q to terminate
if remaining[0] == q:
break
elif remaining[0] == oq:
parts.append(oq)
remaining = remaining[1:]
else:
m = STRING_CHUNK.match(remaining)
if not m:
raise SyntaxError('error in string literal: %s' % remaining)
parts.append(m.groups()[0])
remaining = remaining[m.end():]
else:
s = ''.join(parts)
raise SyntaxError('unterminated string: %s' % s)
parts.append(q)
result = ''.join(parts)
remaining = remaining[1:].lstrip() # skip past closing quote
return result, remaining
def marker_expr(remaining):
if remaining and remaining[0] == '(':
result, remaining = marker(remaining[1:].lstrip())
if remaining[0] != ')':
raise SyntaxError('unterminated parenthesis: %s' % remaining)
remaining = remaining[1:].lstrip()
else:
lhs, remaining = marker_var(remaining)
while remaining:
m = MARKER_OP.match(remaining)
if not m:
break
op = m.groups()[0]
remaining = remaining[m.end():]
rhs, remaining = marker_var(remaining)
lhs = {'op': op, 'lhs': lhs, 'rhs': rhs}
result = lhs
return result, remaining
def marker_and(remaining):
lhs, remaining = marker_expr(remaining)
while remaining:
m = AND.match(remaining)
if not m:
break
remaining = remaining[m.end():]
rhs, remaining = marker_expr(remaining)
lhs = {'op': 'and', 'lhs': lhs, 'rhs': rhs}
return lhs, remaining
def marker(remaining):
lhs, remaining = marker_and(remaining)
while remaining:
m = OR.match(remaining)
if not m:
break
remaining = remaining[m.end():]
rhs, remaining = marker_and(remaining)
lhs = {'op': 'or', 'lhs': lhs, 'rhs': rhs}
return lhs, remaining
return marker(marker_string)
def parse_requirement(req):
"""
Parse a requirement passed in as a string. Return a Container
whose attributes contain the various parts of the requirement.
"""
remaining = req.strip()
if not remaining or remaining.startswith('#'):
return None
m = IDENTIFIER.match(remaining)
if not m:
raise SyntaxError('name expected: %s' % remaining)
distname = m.groups()[0]
remaining = remaining[m.end():]
extras = mark_expr = versions = uri = None
if remaining and remaining[0] == '[':
i = remaining.find(']', 1)
if i < 0:
raise SyntaxError('unterminated extra: %s' % remaining)
s = remaining[1:i]
remaining = remaining[i + 1:].lstrip()
extras = []
while s:
m = IDENTIFIER.match(s)
if not m:
raise SyntaxError('malformed extra: %s' % s)
extras.append(m.groups()[0])
s = s[m.end():]
if not s:
break
if s[0] != ',':
raise SyntaxError('comma expected in extras: %s' % s)
s = s[1:].lstrip()
if not extras:
extras = None
if remaining:
if remaining[0] == '@':
# it's a URI
remaining = remaining[1:].lstrip()
m = NON_SPACE.match(remaining)
if not m:
raise SyntaxError('invalid URI: %s' % remaining)
uri = m.groups()[0]
t = urlparse(uri)
# there are issues with Python and URL parsing, so this test
# is a bit crude. See bpo-20271, bpo-23505. Python doesn't
# always parse invalid URLs correctly - it should raise
# exceptions for malformed URLs
if not (t.scheme and t.netloc):
raise SyntaxError('Invalid URL: %s' % uri)
remaining = remaining[m.end():].lstrip()
else:
def get_versions(ver_remaining):
"""
Return a list of operator, version tuples if any are
specified, else None.
"""
m = COMPARE_OP.match(ver_remaining)
versions = None
if m:
versions = []
while True:
op = m.groups()[0]
ver_remaining = ver_remaining[m.end():]
m = VERSION_IDENTIFIER.match(ver_remaining)
if not m:
raise SyntaxError('invalid version: %s' % ver_remaining)
v = m.groups()[0]
versions.append((op, v))
ver_remaining = ver_remaining[m.end():]
if not ver_remaining or ver_remaining[0] != ',':
break
ver_remaining = ver_remaining[1:].lstrip()
m = COMPARE_OP.match(ver_remaining)
if not m:
raise SyntaxError('invalid constraint: %s' % ver_remaining)
if not versions:
versions = None
return versions, ver_remaining
if remaining[0] != '(':
versions, remaining = get_versions(remaining)
else:
i = remaining.find(')', 1)
if i < 0:
raise SyntaxError('unterminated parenthesis: %s' % remaining)
s = remaining[1:i]
remaining = remaining[i + 1:].lstrip()
# As a special diversion from PEP 508, allow a version number
# a.b.c in parentheses as a synonym for ~= a.b.c (because this
# is allowed in earlier PEPs)
if COMPARE_OP.match(s):
versions, _ = get_versions(s)
else:
m = VERSION_IDENTIFIER.match(s)
if not m:
raise SyntaxError('invalid constraint: %s' % s)
v = m.groups()[0]
s = s[m.end():].lstrip()
if s:
raise SyntaxError('invalid constraint: %s' % s)
versions = [('~=', v)]
if remaining:
if remaining[0] != ';':
raise SyntaxError('invalid requirement: %s' % remaining)
remaining = remaining[1:].lstrip()
mark_expr, remaining = parse_marker(remaining)
if remaining and remaining[0] != '#':
raise SyntaxError('unexpected trailing data: %s' % remaining)
if not versions:
rs = distname
else:
rs = '%s %s' % (distname, ', '.join(['%s %s' % con for con in versions]))
return Container(name=distname, extras=extras, constraints=versions,
marker=mark_expr, url=uri, requirement=rs)
def get_resources_dests(resources_root, rules):
"""Find destinations for resources files"""
def get_rel_path(root, path):
# normalizes and returns a lstripped-/-separated path
root = root.replace(os.path.sep, '/')
path = path.replace(os.path.sep, '/')
assert path.startswith(root)
return path[len(root):].lstrip('/')
destinations = {}
for base, suffix, dest in rules:
prefix = os.path.join(resources_root, base)
for abs_base in iglob(prefix):
abs_glob = os.path.join(abs_base, suffix)
for abs_path in iglob(abs_glob):
resource_file = get_rel_path(resources_root, abs_path)
if dest is None: # remove the entry if it was here
destinations.pop(resource_file, None)
else:
rel_path = get_rel_path(abs_base, abs_path)
rel_dest = dest.replace(os.path.sep, '/').rstrip('/')
destinations[resource_file] = rel_dest + '/' + rel_path
return destinations
def in_venv():
if hasattr(sys, 'real_prefix'):
# virtualenv venvs
result = True
else:
# PEP 405 venvs
result = sys.prefix != getattr(sys, 'base_prefix', sys.prefix)
return result
def get_executable():
# The __PYVENV_LAUNCHER__ dance is apparently no longer needed, as
# changes to the stub launcher mean that sys.executable always points
# to the stub on OS X
# if sys.platform == 'darwin' and ('__PYVENV_LAUNCHER__'
# in os.environ):
# result = os.environ['__PYVENV_LAUNCHER__']
# else:
# result = sys.executable
# return result
result = os.path.normcase(sys.executable)
if not isinstance(result, text_type):
result = fsdecode(result)
return result
def proceed(prompt, allowed_chars, error_prompt=None, default=None):
p = prompt
while True:
s = raw_input(p)
p = prompt
if not s and default:
s = default
if s:
c = s[0].lower()
if c in allowed_chars:
break
if error_prompt:
p = '%c: %s\n%s' % (c, error_prompt, prompt)
return c
def extract_by_key(d, keys):
if isinstance(keys, string_types):
keys = keys.split()
result = {}
for key in keys:
if key in d:
result[key] = d[key]
return result
def read_exports(stream):
if sys.version_info[0] >= 3:
# needs to be a text stream
stream = codecs.getreader('utf-8')(stream)
# Try to load as JSON, falling back on legacy format
data = stream.read()
stream = StringIO(data)
try:
jdata = json.load(stream)
result = jdata['extensions']['python.exports']['exports']
for group, entries in result.items():
for k, v in entries.items():
s = '%s = %s' % (k, v)
entry = get_export_entry(s)
assert entry is not None
entries[k] = entry
return result
except Exception:
stream.seek(0, 0)
def read_stream(cp, stream):
if hasattr(cp, 'read_file'):
cp.read_file(stream)
else:
cp.readfp(stream)
cp = configparser.ConfigParser()
try:
read_stream(cp, stream)
except configparser.MissingSectionHeaderError:
stream.close()
data = textwrap.dedent(data)
stream = StringIO(data)
read_stream(cp, stream)
result = {}
for key in cp.sections():
result[key] = entries = {}
for name, value in cp.items(key):
s = '%s = %s' % (name, value)
entry = get_export_entry(s)
assert entry is not None
#entry.dist = self
entries[name] = entry
return result
def write_exports(exports, stream):
if sys.version_info[0] >= 3:
# needs to be a text stream
stream = codecs.getwriter('utf-8')(stream)
cp = configparser.ConfigParser()
for k, v in exports.items():
# TODO check k, v for valid values
cp.add_section(k)
for entry in v.values():
if entry.suffix is None:
s = entry.prefix
else:
s = '%s:%s' % (entry.prefix, entry.suffix)
if entry.flags:
s = '%s [%s]' % (s, ', '.join(entry.flags))
cp.set(k, entry.name, s)
cp.write(stream)
@contextlib.contextmanager
def tempdir():
td = tempfile.mkdtemp()
try:
yield td
finally:
shutil.rmtree(td)
@contextlib.contextmanager
def chdir(d):
cwd = os.getcwd()
try:
os.chdir(d)
yield
finally:
os.chdir(cwd)
@contextlib.contextmanager
def socket_timeout(seconds=15):
cto = socket.getdefaulttimeout()
try:
socket.setdefaulttimeout(seconds)
yield
finally:
socket.setdefaulttimeout(cto)
class cached_property(object):
def __init__(self, func):
self.func = func
#for attr in ('__name__', '__module__', '__doc__'):
# setattr(self, attr, getattr(func, attr, None))
def __get__(self, obj, cls=None):
if obj is None:
return self
value = self.func(obj)
object.__setattr__(obj, self.func.__name__, value)
#obj.__dict__[self.func.__name__] = value = self.func(obj)
return value
def convert_path(pathname):
"""Return 'pathname' as a name that will work on the native filesystem.
The path is split on '/' and put back together again using the current
directory separator. Needed because filenames in the setup script are
always supplied in Unix style, and have to be converted to the local
convention before we can actually use them in the filesystem. Raises
ValueError on non-Unix-ish systems if 'pathname' either starts or
ends with a slash.
"""
if os.sep == '/':
return pathname
if not pathname:
return pathname
if pathname[0] == '/':
raise ValueError("path '%s' cannot be absolute" % pathname)
if pathname[-1] == '/':
raise ValueError("path '%s' cannot end with '/'" % pathname)
paths = pathname.split('/')
while os.curdir in paths:
paths.remove(os.curdir)
if not paths:
return os.curdir
return os.path.join(*paths)
class FileOperator(object):
def __init__(self, dry_run=False):
self.dry_run = dry_run
self.ensured = set()
self._init_record()
def _init_record(self):
self.record = False
self.files_written = set()
self.dirs_created = set()
def record_as_written(self, path):
if self.record:
self.files_written.add(path)
def newer(self, source, target):
"""Tell if the target is newer than the source.
Returns true if 'source' exists and is more recently modified than
'target', or if 'source' exists and 'target' doesn't.
Returns false if both exist and 'target' is the same age or younger
than 'source'. Raise PackagingFileError if 'source' does not exist.
Note that this test is not very accurate: files created in the same
second will have the same "age".
"""
if not os.path.exists(source):
raise DistlibException("file '%r' does not exist" %
os.path.abspath(source))
if not os.path.exists(target):
return True
return os.stat(source).st_mtime > os.stat(target).st_mtime
def copy_file(self, infile, outfile, check=True):
"""Copy a file respecting dry-run and force flags.
"""
self.ensure_dir(os.path.dirname(outfile))
logger.info('Copying %s to %s', infile, outfile)
if not self.dry_run:
msg = None
if check:
if os.path.islink(outfile):
msg = '%s is a symlink' % outfile
elif os.path.exists(outfile) and not os.path.isfile(outfile):
msg = '%s is a non-regular file' % outfile
if msg:
raise ValueError(msg + ' which would be overwritten')
shutil.copyfile(infile, outfile)
self.record_as_written(outfile)
def copy_stream(self, instream, outfile, encoding=None):
assert not os.path.isdir(outfile)
self.ensure_dir(os.path.dirname(outfile))
logger.info('Copying stream %s to %s', instream, outfile)
if not self.dry_run:
if encoding is None:
outstream = open(outfile, 'wb')
else:
outstream = codecs.open(outfile, 'w', encoding=encoding)
try:
shutil.copyfileobj(instream, outstream)
finally:
outstream.close()
self.record_as_written(outfile)
def write_binary_file(self, path, data):
self.ensure_dir(os.path.dirname(path))
if not self.dry_run:
if os.path.exists(path):
os.remove(path)
with open(path, 'wb') as f:
f.write(data)
self.record_as_written(path)
def write_text_file(self, path, data, encoding):
self.write_binary_file(path, data.encode(encoding))
def set_mode(self, bits, mask, files):
if os.name == 'posix' or (os.name == 'java' and os._name == 'posix'):
# Set the executable bits (owner, group, and world) on
# all the files specified.
for f in files:
if self.dry_run:
logger.info("changing mode of %s", f)
else:
mode = (os.stat(f).st_mode | bits) & mask
logger.info("changing mode of %s to %o", f, mode)
os.chmod(f, mode)
set_executable_mode = lambda s, f: s.set_mode(0o555, 0o7777, f)
def ensure_dir(self, path):
path = os.path.abspath(path)
if path not in self.ensured and not os.path.exists(path):
self.ensured.add(path)
d, f = os.path.split(path)
self.ensure_dir(d)
logger.info('Creating %s' % path)
if not self.dry_run:
os.mkdir(path)
if self.record:
self.dirs_created.add(path)
def byte_compile(self, path, optimize=False, force=False, prefix=None, hashed_invalidation=False):
dpath = cache_from_source(path, not optimize)
logger.info('Byte-compiling %s to %s', path, dpath)
if not self.dry_run:
if force or self.newer(path, dpath):
if not prefix:
diagpath = None
else:
assert path.startswith(prefix)
diagpath = path[len(prefix):]
compile_kwargs = {}
if hashed_invalidation and hasattr(py_compile, 'PycInvalidationMode'):
compile_kwargs['invalidation_mode'] = py_compile.PycInvalidationMode.CHECKED_HASH
py_compile.compile(path, dpath, diagpath, True, **compile_kwargs) # raise error
self.record_as_written(dpath)
return dpath
def ensure_removed(self, path):
if os.path.exists(path):
if os.path.isdir(path) and not os.path.islink(path):
logger.debug('Removing directory tree at %s', path)
if not self.dry_run:
shutil.rmtree(path)
if self.record:
if path in self.dirs_created:
self.dirs_created.remove(path)
else:
if os.path.islink(path):
s = 'link'
else:
s = 'file'
logger.debug('Removing %s %s', s, path)
if not self.dry_run:
os.remove(path)
if self.record:
if path in self.files_written:
self.files_written.remove(path)
def is_writable(self, path):
result = False
while not result:
if os.path.exists(path):
result = os.access(path, os.W_OK)
break
parent = os.path.dirname(path)
if parent == path:
break
path = parent
return result
def commit(self):
"""
Commit recorded changes, turn off recording, return
changes.
"""
assert self.record
result = self.files_written, self.dirs_created
self._init_record()
return result
def rollback(self):
if not self.dry_run:
for f in list(self.files_written):
if os.path.exists(f):
os.remove(f)
# dirs should all be empty now, except perhaps for
# __pycache__ subdirs
# reverse so that subdirs appear before their parents
dirs = sorted(self.dirs_created, reverse=True)
for d in dirs:
flist = os.listdir(d)
if flist:
assert flist == ['__pycache__']
sd = os.path.join(d, flist[0])
os.rmdir(sd)
os.rmdir(d) # should fail if non-empty
self._init_record()
def resolve(module_name, dotted_path):
if module_name in sys.modules:
mod = sys.modules[module_name]
else:
mod = __import__(module_name)
if dotted_path is None:
result = mod
else:
parts = dotted_path.split('.')
result = getattr(mod, parts.pop(0))
for p in parts:
result = getattr(result, p)
return result
class ExportEntry(object):
def __init__(self, name, prefix, suffix, flags):
self.name = name
self.prefix = prefix
self.suffix = suffix
self.flags = flags
@cached_property
def value(self):
return resolve(self.prefix, self.suffix)
def __repr__(self): # pragma: no cover
return '