Функция для нахождения наибольшего общего пути и форматирования наряду с именами


Это функция, которую я написал вчера и проверили с большим количеством информации. Это было совершено и находится в использовании, но нет обзора было сделано, поэтому он кажется хорошим кандидатом для проверки кода: это может быть более читабельным и может быть переработан на более простые функции.

Его цель-дать самый длинный совместный путь и строка, в которой перечислены имена файлов списков словарей с полных путей к файлам. Он используется для обработки API в поисковой системе Bitbucket.

import os.path
import py # Used for other functions, listed for possible use in refactoring

def getpaths(files, listfiles=False):
    """For a list of files, a common path prefix and optionally list filenames

    Returns a tuple (common_prefix, filenames):
        common_prefix: Longest common path to all files in the input. If input
                       is a single file, contains full file path.
                       Slash-terminated if present and directory, empty string
                       otherwise.
        filenames: String containing the names of modified files, in the format
                   " M(file1, file2)" if listfiles=True, empty string
                   if either listfiles=False or no file was modified.
    """

    # Handle empty input
    if not files:
        return '', ''
    files = [f['file'] for f in files]
    if not any(files):
        return '', ''

    dirname = os.path.dirname
    basename = os.path.basename

    common_prefix = [dirname(f) for f in files]

    # Single file, show its full path
    if len(files) == 1:
        common_prefix = files[0]
        listfiles = False

    else:
        common_prefix = [path.split(os.sep) for path in common_prefix]
        common_prefix = os.sep.join(os.path.commonprefix(common_prefix))
        if common_prefix and not common_prefix.endswith('/'):
            common_prefix += '/'

    if listfiles:
        filenames = [basename(f) for f in files if f and basename(f)]
        filenames = ' M(%s)' % ', '.join(filenames)
    else:
        filenames = ''
    return common_prefix, filenames

# Test suite    
# Don't worry too much about test style, this is informative enough with py.test.
# Adequate coverage and readibility are the main concern.
# However, if you think a doctest or classic unittest is better, I'm open to change.

def test_getpaths():
    d = dict

    barefile = [d(file='file')]
    distinct = [d(file='path1/file1'), d(file='path2/file2'),
                d(file='path3/file')]
    shared = [d(file='path/file1'), d(file='path/file2'),
              d(file='path/file')]

    deepfile = [d(file='a/long/path/to/deepfile.py')]
    slashesfile = [d(file='/slashesfile/')]
    slashleft = [d(file='/slashleft')]
    slashright = [d(file='slashright/')]


    nocommon = distinct + [d(file='path4/file')]
    nocommonplusroot = distinct + barefile

    common = [d(file='some/path/to/file'), d(file='some/path/to/deeper/file'),
              d(file='some/path/to/anotherfile'), d(file='some/path/to/afile')]
    commonplusroot = shared + barefile

    empty = d(file='')
    nocommonplusempty = distinct + [empty]
    commonplusempty = shared + [empty]
    nocommonplusslash = distinct + [d(file='path4/dir/')]
    commonplusslash = shared + [d(file='path/dir/')]

    pypydoubleslash = [d(file='pypy/jit/metainterp/opt/u.py'),
                       d(file='pypy/jit/metainterp/test/test_c.py'),
                       d(file='pypy/jit/metainterp/test/test_o.py')]
    nothing = ('', '')

    # (input, expected output) for listfiles=False
    files_expected = [([], nothing),
                      ([empty], nothing),
                      ([empty, empty], nothing),
                      (barefile, ('file', '')),
                      (deepfile, ('a/long/path/to/deepfile.py', '')),
                      (slashesfile, ('/slashesfile/', '')),
                      (slashleft, ('/slashleft', '')),
                      (slashright, ('slashright/', '')),
                      (nocommon, nothing),
                      (nocommonplusroot, nothing),
                      (nocommonplusempty, nothing),
                      (common, ('some/path/to/', '')),
                      (commonplusroot, nothing),
                      (commonplusempty, nothing),
                      (nocommonplusslash, nothing),
                      (commonplusslash, ('path/', '')),
                      (pypydoubleslash, ('pypy/jit/metainterp/', '')),
                      ]

    for f, wanted in files_expected:
        assert getpaths(f) == wanted

    # (input, expected output) for listfiles=True
    files_expected = [([], nothing),
                      ([empty], nothing),
                      ([empty, empty], nothing),
                      (barefile, ('file', '')),
                      (deepfile, ('a/long/path/to/deepfile.py', '')),
                      (slashesfile, ('/slashesfile/', '')),
                      (slashleft, ('/slashleft', '')),
                      (slashright, ('slashright/', '')),
                      (nocommon, ('', ' M(file1, file2, file, file)')),
                      (nocommonplusroot, ('', ' M(file1, file2, file, file)')),
                      (nocommonplusempty, ('',' M(file1, file2, file)')),
                      (common, ('some/path/to/',
                                ' M(file, file, anotherfile, afile)')),
                      (commonplusroot, ('', ' M(file1, file2, file, file)')),
                      (commonplusempty, ('',' M(file1, file2, file)')),
                      (nocommonplusslash, ('',' M(file1, file2, file)')),
                      (commonplusslash, ('path/',' M(file1, file2, file)')),
                      (pypydoubleslash, ('pypy/jit/metainterp/',
                                         ' M(u.py, test_c.py, test_o.py)')),
                      ]

    for f, wanted in files_expected:
        assert getpaths(f, listfiles=True) == wanted

Улучшения для функции и ее тесты приветствуются. Для некоторого контекста, это, как он используется:

# Here's the kind of data we want to process. The return for listfiles=False
# should be ('bit/', ''). For True,
# ('bit/', ' M(hook.py, __init__.py, __init__.py, test_hook.py)')

commit = {
    u'files': [{u'file': u'bit/hook.py', u'type':
                u'modified'},
               {u'file': u'bit/__init__.py',
                u'type': u'added'},
               {u'file': u'bit/test/__init__.py',
                u'type': u'added'},
               {u'file': u'bit/test/test_hook.py',
                u'type': u'added'}]
          }

files = commit.get('files', [])
common_prefix, filenames = getpaths(files, True)
common_prefix = '/' + common_prefix
print '%s%s: ' % (common_prefix, filenames)


2022
15
задан 21 января 2011 в 08:01 Источник Поделиться
Комментарии