wake-up-neo.net

Filtern von os.walk () -Verzeichnissen und -Dateien

Ich suche nach einer Möglichkeit, Dateimuster einzuschließen/auszuschließen und Verzeichnisse von einem Aufruf von os.walk() auszuschließen.

Das mache ich jetzt:

import fnmatch
import os

includes = ['*.doc', '*.odt']
excludes = ['/home/paulo-freitas/Documents']

def _filter(paths):
    matches = []

    for path in paths:
        append = None

        for include in includes:
            if os.path.isdir(path):
                append = True
                break

            if fnmatch.fnmatch(path, include):
                append = True
                break

        for exclude in excludes:
            if os.path.isdir(path) and path == exclude:
                append = False
                break

            if fnmatch.fnmatch(path, exclude):
                append = False
                break

        if append:
            matches.append(path)

    return matches

for root, dirs, files in os.walk('/home/paulo-freitas'):
    dirs[:] = _filter(map(lambda d: os.path.join(root, d), dirs))
    files[:] = _filter(map(lambda f: os.path.join(root, f), files))

    for filename in files:
        filename = os.path.join(root, filename)

        print filename

Die Frage ist: Gibt es einen besseren Weg, dies zu tun? Wie?

38
Paulo Freitas

Diese Lösung verwendet fnmatch.translate zum Konvertieren von Glob-Mustern in reguläre Ausdrücke (es wird davon ausgegangen, dass die Includes nur für Dateien verwendet werden):

import fnmatch
import os
import os.path
import re

includes = ['*.doc', '*.odt'] # for files only
excludes = ['/home/paulo-freitas/Documents'] # for dirs and files

# transform glob patterns to regular expressions
includes = r'|'.join([fnmatch.translate(x) for x in includes])
excludes = r'|'.join([fnmatch.translate(x) for x in excludes]) or r'$.'

for root, dirs, files in os.walk('/home/paulo-freitas'):

    # exclude dirs
    dirs[:] = [os.path.join(root, d) for d in dirs]
    dirs[:] = [d for d in dirs if not re.match(excludes, d)]

    # exclude/include files
    files = [os.path.join(root, f) for f in files]
    files = [f for f in files if not re.match(excludes, f)]
    files = [f for f in files if re.match(includes, f)]

    for fname in files:
        print fname
46
Oben Sonne

Von docs.python.org

os.walk (top [ topdown = True [ onerror = Keiner [ followlinks = False]]])

Wenn topdown auf "True" gesetzt ist, kann der Anrufer die Verzeichnisnamenliste direkt ändern. Dies kann verwendet werden, um die Suche zu beschneiden

for root, dirs, files in os.walk('/home/paulo-freitas', topdown=True):
    # excludes can be done with fnmatch.filter and complementary set,
    # but it's more annoying to read.
    dirs[:] = [d for d in dirs if d not in excludes] 
    for pat in includes:
        for f in fnmatch.filter(files, pat):
            print os.path.join(root, f)

Ich möchte darauf hinweisen, dass der obige Code davon ausgeht, dass excludes ein Muster und kein vollständiger Pfad ist. Sie müssen das Listenverständnis anpassen, um zu filtern, wenn os.path.join(root, d) not in excludes dem OP-Fall entspricht.

21
kojiro

warum fnmatch?

import os
excludes=....
for ROOT,DIR,FILES in os.walk("/path"):
    for file in FILES:
       if file.endswith(('doc','odt')):
          print file
    for directory in DIR:
       if not directory in excludes :
          print directory

nicht ausführlich getestet

7
kurumi

dirtools ist perfekt für Ihren Anwendungsfall:

from dirtools import Dir

print(Dir('.', exclude_file='.gitignore').files())
1
michaeljoseph

Dies ist ein Beispiel für das Ausschließen von Verzeichnissen und Dateien mit os.walk():

ignoreDirPatterns=[".git"]
ignoreFilePatterns=[".php"]
def copyTree(src, dest, onerror=None):
    src = os.path.abspath(src)
    src_prefix = len(src) + len(os.path.sep)
    for root, dirs, files in os.walk(src, onerror=onerror):
        for pattern in ignoreDirPatterns:
            if pattern in root:
                break
        else:
            #If the above break didn't work, this part will be executed
            for file in files:
                for pattern in ignoreFilePatterns:
                    if pattern in file:
                        break
                else:
                    #If the above break didn't work, this part will be executed
                    dirpath = os.path.join(dest, root[src_prefix:])
                    try:
                        os.makedirs(dirpath,exist_ok=True)
                    except OSError as e:
                        if onerror is not None:
                            onerror(e)
                    filepath=os.path.join(root,file)
                    shutil.copy(filepath,dirpath)
                continue;#If the above else didn't executed, this will be reached

        continue;#If the above else didn't executed, this will be reached

python> = 3.2 wegen exist_ok in makedirs

0
Jahid

Hier ist eine Möglichkeit, dies zu tun

import fnmatch
import os

excludes = ['/home/paulo-freitas/Documents']
matches = []
for path, dirs, files in os.walk(os.getcwd()):
    for eachpath in excludes:
        if eachpath in path:
            continue
    else:
        for result in [os.path.abspath(os.path.join(path, filename)) for
                filename in files if fnmatch.fnmatch(filename,'*.doc') or fnmatch.fnmatch(filename,'*.odt')]:
            matches.append(result)
print matches
0
Senthil Kumaran
import os
includes = ['*.doc', '*.odt']
excludes = ['/home/paulo-freitas/Documents']
def file_search(path, exe):
for x,y,z in os.walk(path):
    for a in z:
        if a[-4:] == exe:
            print os.path.join(x,a)
        for x in includes:
            file_search(excludes[0],x)
0
juniour