blob: 1f85acb42c6ccf3ba3da1897c8ad9126bc517250 [file] [log] [blame]
# Copyright 2013 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Provides functions: get_native_path_case(), isabs() and safe_join()."""
import logging
import os
import re
import sys
import unicodedata
## OS-specific imports
if sys.platform == 'win32':
from ctypes.wintypes import create_unicode_buffer
from ctypes.wintypes import windll, FormatError # pylint: disable=E0611
from ctypes.wintypes import GetLastError # pylint: disable=E0611
elif sys.platform == 'darwin':
import Carbon.File # pylint: disable=F0401
import MacOS # pylint: disable=F0401
if sys.platform == 'win32':
def QueryDosDevice(drive_letter):
"""Returns the Windows 'native' path for a DOS drive letter."""
assert re.match(r'^[a-zA-Z]:$', drive_letter), drive_letter
assert isinstance(drive_letter, unicode)
# Guesswork. QueryDosDeviceW never returns the required number of bytes.
chars = 1024
drive_letter = drive_letter
p = create_unicode_buffer(chars)
if 0 == windll.kernel32.QueryDosDeviceW(drive_letter, p, chars):
err = GetLastError()
if err:
# pylint: disable=E0602
msg = u'QueryDosDevice(%s): %s (%d)' % (
drive_letter, FormatError(err), err)
raise WindowsError(err, msg.encode('utf-8'))
return p.value
def GetShortPathName(long_path):
"""Returns the Windows short path equivalent for a 'long' path."""
assert isinstance(long_path, unicode), repr(long_path)
# Adds '\\\\?\\' when given an absolute path so the MAX_PATH (260) limit is
# not enforced.
if os.path.isabs(long_path) and not long_path.startswith('\\\\?\\'):
long_path = '\\\\?\\' + long_path
chars = windll.kernel32.GetShortPathNameW(long_path, None, 0)
if chars:
p = create_unicode_buffer(chars)
if windll.kernel32.GetShortPathNameW(long_path, p, chars):
return p.value
err = GetLastError()
if err:
# pylint: disable=E0602
msg = u'GetShortPathName(%s): %s (%d)' % (
long_path, FormatError(err), err)
raise WindowsError(err, msg.encode('utf-8'))
def GetLongPathName(short_path):
"""Returns the Windows long path equivalent for a 'short' path."""
assert isinstance(short_path, unicode)
# Adds '\\\\?\\' when given an absolute path so the MAX_PATH (260) limit is
# not enforced.
if os.path.isabs(short_path) and not short_path.startswith('\\\\?\\'):
short_path = '\\\\?\\' + short_path
chars = windll.kernel32.GetLongPathNameW(short_path, None, 0)
if chars:
p = create_unicode_buffer(chars)
if windll.kernel32.GetLongPathNameW(short_path, p, chars):
return p.value
err = GetLastError()
if err:
# pylint: disable=E0602
msg = u'GetLongPathName(%s): %s (%d)' % (
short_path, FormatError(err), err)
raise WindowsError(err, msg.encode('utf-8'))
class DosDriveMap(object):
"""Maps \Device\HarddiskVolumeN to N: on Windows."""
# Keep one global cache.
_MAPPING = {}
def __init__(self):
"""Lazy loads the cache."""
if not self._MAPPING:
# This is related to UNC resolver on windows. Ignore that.
self._MAPPING[u'\\Device\\Mup'] = None
self._MAPPING[u'\\SystemRoot'] = os.environ[u'SystemRoot']
for letter in (chr(l) for l in xrange(ord('C'), ord('Z')+1)):
try:
letter = u'%s:' % letter
mapped = QueryDosDevice(letter)
if mapped in self._MAPPING:
logging.warn(
('Two drives: \'%s\' and \'%s\', are mapped to the same disk'
'. Drive letters are a user-mode concept and the kernel '
'traces only have NT path, so all accesses will be '
'associated with the first drive letter, independent of the '
'actual letter used by the code') % (
self._MAPPING[mapped], letter))
else:
self._MAPPING[mapped] = letter
except WindowsError: # pylint: disable=E0602
pass
def to_win32(self, path):
"""Converts a native NT path to Win32/DOS compatible path."""
match = re.match(r'(^\\Device\\[a-zA-Z0-9]+)(\\.*)?$', path)
if not match:
raise ValueError(
'Can\'t convert %s into a Win32 compatible path' % path,
path)
if not match.group(1) in self._MAPPING:
# Unmapped partitions may be accessed by windows for the
# fun of it while the test is running. Discard these.
return None
drive = self._MAPPING[match.group(1)]
if not drive or not match.group(2):
return drive
return drive + match.group(2)
def isabs(path):
"""Accepts X: as an absolute path, unlike python's os.path.isabs()."""
return os.path.isabs(path) or len(path) == 2 and path[1] == ':'
def find_item_native_case(root, item):
"""Gets the native path case of a single item based at root_path."""
if item == '..':
return item
root = get_native_path_case(root)
return os.path.basename(get_native_path_case(os.path.join(root, item)))
def get_native_path_case(p):
"""Returns the native path case for an existing file.
On Windows, removes any leading '\\?\'.
"""
assert isinstance(p, unicode), repr(p)
if not isabs(p):
raise ValueError(
'get_native_path_case(%r): Require an absolute path' % p, p)
# Make sure it is normalized to os.path.sep. Do not do it here to keep the
# function fast
assert '/' not in p, p
suffix = ''
count = p.count(':')
if count > 1:
# This means it has an alternate-data stream. There could be 3 ':', since
# it could be the $DATA datastream of an ADS. Split the whole ADS suffix
# off and add it back afterward. There is no way to know the native path
# case of an alternate data stream.
items = p.split(':')
p = ':'.join(items[0:2])
suffix = ''.join(':' + i for i in items[2:])
# TODO(maruel): Use os.path.normpath?
if p.endswith('.\\'):
p = p[:-2]
# Windows used to have an option to turn on case sensitivity on non Win32
# subsystem but that's out of scope here and isn't supported anymore.
# Go figure why GetShortPathName() is needed.
try:
out = GetLongPathName(GetShortPathName(p))
except OSError, e:
if e.args[0] in (2, 3, 5):
# The path does not exist. Try to recurse and reconstruct the path.
base = os.path.dirname(p)
rest = os.path.basename(p)
return os.path.join(get_native_path_case(base), rest)
raise
if out.startswith('\\\\?\\'):
out = out[4:]
# Always upper case the first letter since GetLongPathName() will return the
# drive letter in the case it was given.
return out[0].upper() + out[1:] + suffix
elif sys.platform == 'darwin':
# On non-windows, keep the stdlib behavior.
isabs = os.path.isabs
def _native_case(p):
"""Gets the native path case. Warning: this function resolves symlinks."""
try:
rel_ref, _ = Carbon.File.FSPathMakeRef(p.encode('utf-8'))
# The OSX underlying code uses NFD but python strings are in NFC. This
# will cause issues with os.listdir() for example. Since the dtrace log
# *is* in NFC, normalize it here.
out = unicodedata.normalize(
'NFC', rel_ref.FSRefMakePath().decode('utf-8'))
if p.endswith(os.path.sep) and not out.endswith(os.path.sep):
return out + os.path.sep
return out
except MacOS.Error, e:
if e.args[0] in (-43, -120):
# The path does not exist. Try to recurse and reconstruct the path.
# -43 means file not found.
# -120 means directory not found.
base = os.path.dirname(p)
rest = os.path.basename(p)
return os.path.join(_native_case(base), rest)
raise OSError(
e.args[0], 'Failed to get native path for %s' % p, p, e.args[1])
def _split_at_symlink_native(base_path, rest):
"""Returns the native path for a symlink."""
base, symlink, rest = split_at_symlink(base_path, rest)
if symlink:
if not base_path:
base_path = base
else:
base_path = safe_join(base_path, base)
symlink = find_item_native_case(base_path, symlink)
return base, symlink, rest
def find_item_native_case(root_path, item):
"""Gets the native path case of a single item based at root_path.
There is no API to get the native path case of symlinks on OSX. So it
needs to be done the slow way.
"""
if item == '..':
return item
item = item.lower()
for element in os.listdir(root_path):
if element.lower() == item:
return element
def get_native_path_case(path):
"""Returns the native path case for an existing file.
Technically, it's only HFS+ on OSX that is case preserving and
insensitive. It's the default setting on HFS+ but can be changed.
"""
assert isinstance(path, unicode), repr(path)
if not isabs(path):
raise ValueError(
'get_native_path_case(%r): Require an absolute path' % path, path)
if path.startswith('/dev'):
# /dev is not visible from Carbon, causing an exception.
return path
# Starts assuming there is no symlink along the path.
resolved = _native_case(path)
if path.lower() in (resolved.lower(), resolved.lower() + './'):
# This code path is incredibly faster.
logging.debug('get_native_path_case(%s) = %s' % (path, resolved))
return resolved
# There was a symlink, process it.
base, symlink, rest = _split_at_symlink_native(None, path)
assert symlink, (path, base, symlink, rest, resolved)
prev = base
base = safe_join(_native_case(base), symlink)
assert len(base) > len(prev)
while rest:
prev = base
relbase, symlink, rest = _split_at_symlink_native(base, rest)
base = safe_join(base, relbase)
assert len(base) > len(prev), (prev, base, symlink)
if symlink:
base = safe_join(base, symlink)
assert len(base) > len(prev), (prev, base, symlink)
# Make sure no symlink was resolved.
assert base.lower() == path.lower(), (base, path)
logging.debug('get_native_path_case(%s) = %s' % (path, base))
return base
else: # OSes other than Windows and OSX.
# On non-windows, keep the stdlib behavior.
isabs = os.path.isabs
def find_item_native_case(root, item):
"""Gets the native path case of a single item based at root_path."""
if item == '..':
return item
root = get_native_path_case(root)
return os.path.basename(get_native_path_case(os.path.join(root, item)))
def get_native_path_case(path):
"""Returns the native path case for an existing file.
On OSes other than OSX and Windows, assume the file system is
case-sensitive.
TODO(maruel): This is not strictly true. Implement if necessary.
"""
assert isinstance(path, unicode), repr(path)
if not isabs(path):
raise ValueError(
'get_native_path_case(%r): Require an absolute path' % path, path)
# Give up on cygwin, as GetLongPathName() can't be called.
# Linux traces tends to not be normalized so use this occasion to normalize
# it. This function implementation already normalizes the path on the other
# OS so this needs to be done here to be coherent between OSes.
out = os.path.normpath(path)
if path.endswith(os.path.sep) and not out.endswith(os.path.sep):
return out + os.path.sep
return out
if sys.platform != 'win32': # All non-Windows OSes.
def safe_join(*args):
"""Joins path elements like os.path.join() but doesn't abort on absolute
path.
os.path.join('foo', '/bar') == '/bar'
but safe_join('foo', '/bar') == 'foo/bar'.
"""
out = ''
for element in args:
if element.startswith(os.path.sep):
if out.endswith(os.path.sep):
out += element[1:]
else:
out += element
else:
if out.endswith(os.path.sep):
out += element
else:
out += os.path.sep + element
return out
def split_at_symlink(base_dir, relfile):
"""Scans each component of relfile and cut the string at the symlink if
there is any.
Returns a tuple (base_path, symlink, rest), with symlink == rest == None if
not symlink was found.
"""
if base_dir:
assert relfile
assert os.path.isabs(base_dir)
index = 0
else:
assert os.path.isabs(relfile)
index = 1
def at_root(rest):
if base_dir:
return safe_join(base_dir, rest)
return rest
while True:
try:
index = relfile.index(os.path.sep, index)
except ValueError:
index = len(relfile)
full = at_root(relfile[:index])
if os.path.islink(full):
# A symlink!
base = os.path.dirname(relfile[:index])
symlink = os.path.basename(relfile[:index])
rest = relfile[index:]
logging.debug(
'split_at_symlink(%s, %s) -> (%s, %s, %s)' %
(base_dir, relfile, base, symlink, rest))
return base, symlink, rest
if index == len(relfile):
break
index += 1
return relfile, None, None