Source code for openmdao.utils.file_utils

"""
Utilities for working with files.
"""
import sys
import os
import importlib
import types
from collections.abc import Iterable
from fnmatch import fnmatch
from os.path import join, basename, dirname, isfile, split, splitext, abspath
import pathlib
import shutil

from openmdao.utils.om_warnings import issue_warning


[docs] def get_module_path(fpath): """ Given a module filename, return its full Python module path. This includes enclosing packages and is based on existence of ``__init__.py`` files. Parameters ---------- fpath : str Pathname of file. Returns ------- str or None Full module path of the given file. Returns None if the file is not part of a package. """ fpath = abspath(fpath) if basename(fpath).startswith('__init__.'): pnames = [] else: pnames = [splitext(basename(fpath))[0]] path = dirname(fpath) initfile = join(path, '__init__.py') if not isfile(initfile): return None while isfile(initfile): path, pname = split(path) pnames.append(pname) initfile = join(path, '__init__.py') return '.'.join(pnames[::-1])
[docs] def package_iter(start_dir='.', dir_includes=None, dir_excludes=(), file_includes=None, file_excludes=()): """ Iterate over python files in packages (recursively) starting in start_dir. NOTE : all *_includes and *_excludes are applied to *local* directory and file names. Parameters ---------- start_dir : str Starting directory. dir_includes : iter of str or None Glob patterns for directory inclusion. Be careful here because dir names are local, so, for example, if includes=('foo',), then directory 'foo' would be included, but any subdirectories of 'foo' that were not also named 'foo' would not. dir_excludes : iter of str Glob patterns for directory exclusion. file_includes : iter of str or None Glob patterns for file inclusion. file_excludes : iter of str Glob patterns for file exclusion. Yields ------ str Filenames (full path from start_dir). """ file_includes = set() if file_includes is None else set(file_includes) file_includes.add('*.py') file_excludes = set() if file_excludes is None else set(file_excludes) file_excludes.update(('_*', 'test_*', 'api.py', 'parallel_api.py')) dir_excludes = set() if dir_excludes is None else set(dir_excludes) dir_excludes.update(('test', 'tests')) for f in files_iter(start_dir, dir_includes=dir_includes, dir_excludes=dir_excludes, file_includes=file_includes, file_excludes=file_excludes, package_only=True): yield f
[docs] def files_iter(start_dir='.', dir_includes=None, dir_excludes=(), file_includes=None, file_excludes=(), package_only=False): """ Iterate over files (recursively) starting in start_dir. NOTE : all *_includes and *_excludes are applied to *local* directory and file names. Parameters ---------- start_dir : str Starting directory. dir_includes : iter of str or None Glob patterns for directory inclusion. Be careful here because dir names are local, so, for example, if includes=('foo',), then directory 'foo' would be included, but any subdirectories of 'foo' that were not also named 'foo' would not. dir_excludes : iter of str Glob patterns for directory exclusion. file_includes : iter of str or None Glob patterns for file inclusion. file_excludes : iter of str Glob patterns for file exclusion. package_only : bool If True, only yield files that are contained in a python package. Yields ------ str Filenames (full path from start_dir). """ for root, dirs, files in os.walk(start_dir): if package_only and '__init__.py' not in files: dirs[:] = [] continue for pat in dir_excludes: dirs[:] = sorted([d for d in dirs if not fnmatch(d, pat)]) if dir_includes: incdirs = set() for pat in dir_includes: incdirs.update(d for d in dirs if fnmatch(d, pat)) dirs[:] = sorted(incdirs) for f in files: for pat in file_excludes: if fnmatch(f, pat): break else: if file_includes: for pat in file_includes: if fnmatch(f, pat): yield join(root, f) else: yield join(root, f)
def _to_filename(spec): """ Return the filename part of the given testspec or the full string if the string is a filename. Parameters ---------- spec : str The filename or testspec. Returns ------- str The filename. """ if ':' in spec and not isfile(spec): fname, _ = spec.rsplit(':', 1) if not fname.endswith('.py'): try: mod = importlib.import_module(fname) return mod.__file__ except ImportError: return spec return fname return spec def _load_and_exec(script_name, user_args): """ Load and exec the given script as __main__. Parameters ---------- script_name : str The name of the script to load and exec. user_args : list of str Args to be passed to the user script. """ if ':' in script_name and not isfile(script_name): return _load_and_run_test(script_name) sys.path.insert(0, dirname(script_name)) sys.argv[:] = [script_name] + user_args with open(script_name, 'rb') as fp: code = compile(fp.read(), script_name, 'exec') globals_dict = { '__file__': script_name, '__name__': '__main__', '__package__': None, '__cached__': None, } exec(code, globals_dict) # nosec: private, internal use only
[docs] def fname2mod_name(fname): """ Convert a string to a valid python module name. Parameters ---------- fname : str The filename to convert. Returns ------- str A valid module name corresponding to the given filename. """ to_replace = ['-', ' ', '.', '(', ')', '[', ']', '{', '}', '=', '+' '!', '@', '#', '$', '%', '^', '&', '*', '~', '`', ';', ':', '"', "'", '<', '>', '?', '/', '\\', '|'] if not fname.endswith('.py'): raise ValueError(f"'{fname}' does not end with '.py'") s = basename(fname).rsplit('.', 1)[0] for c in to_replace: s = s.replace(c, '_') return s
def _load_and_run_test(testspec): """ Load and run an individual test function. Parameters ---------- testspec : str <fpath_or_modpath>:<testcase>.<method> OR <fpath_or_modpath>:<function> """ syspath_save = sys.path[:] modpath, funcpath = testspec.rsplit(':', 1) orig_modpath = modpath if modpath.endswith('.py'): modpath = get_module_path(modpath) if modpath is None: # create a module dynamically modpath = fname2mod_name(orig_modpath) mod = types.ModuleType(modpath) sys.modules[modpath] = mod mod.__file__ = modpath mod.__name__ = modpath mod.__package__ = None mod.__cached__ = None with open(orig_modpath, 'rb') as fp: code = compile(fp.read(), orig_modpath, 'exec') exec(code, mod.__dict__) # nosec: private, internal use only else: mod = importlib.import_module(modpath) else: mod = importlib.import_module(modpath) try: return _run_test_func(mod, funcpath) finally: sys.path = syspath_save def _run_test_func(mod, funcpath): """ Run the given TestCase method or test function in the given module. Parameters ---------- mod : module The module where the test resides. funcpath : str Either <testcase>.<method_name> or <func_name>. Returns ------- object In the case of a module level function call, returns whatever the function returns. """ parts = funcpath.split('.', 1) if len(parts) == 2: tcase_name, method_name = parts testcase = getattr(mod, tcase_name)(methodName=method_name) setup = getattr(testcase, 'setUp', None) if setup is not None: setup() getattr(testcase, method_name)() teardown = getattr(testcase, 'tearDown', None) if teardown: teardown() else: funcname = parts[0] return getattr(mod, funcname)() if sys.version_info >= (3, 8): from importlib.metadata import entry_points if sys.version_info >= (3, 10): def _eps_get(group): eps = entry_points().select(group=group) for name in eps.names: yield eps[name] else: def _eps_get(group): eps = entry_points() if group in eps: yield from eps[group] def _iter_entry_points(group): # there seems to be a bug currently where entry points can show up more than # once in the iterator, so keep track of the ones we've already seen. # TODO: revisit later to see if we can remove the check seen = set() for ep in _eps_get(group): if ep.name not in seen: seen.add(ep.name) yield ep else: try: import pkg_resources except ImportError: def _iter_entry_points(group): issue_warning("Can't retrieve entry points because pkg_resources is not installed. " "Either install it using 'pip install setuptools' or upgrade to python " "3.8 or newer.") return () else: def _iter_entry_points(group): yield from pkg_resources.iter_entry_points(group)
[docs] def text2html(text, title='', style=None): """ Wrap the given text for display as an html file. Returns an html syntax string that can be written to a file. Parameters ---------- text : str Text to be displayed. title : str Title to display above text. style : str or None If not None, use as the contents of the style block for the enclosing <pre> tag. Returns ------- str Content string to create an html file. """ if style is None: style = """ display: block; font-family: monospace; font-size: 1.5em; white-space: pre; margin: 1em 0; """ return """ <!DOCTYPE html> <html lang="en"> <head> <style> .center { display: block; margin-left: auto; margin-right: auto; width: 90%; } h2 {text-align: center;} pre {""" + style + """ } </style> </head> <body> <h2>""" + title + """</h2> <pre> """ + text + """ </pre> </body> </html> """
[docs] def image2html(imagefile, title='', alt=''): """ Wrap the given image for display as an html file. Returns an html syntax string that can be written to a file. Parameters ---------- imagefile : str Name of image file to be displayed. title : str The page title. alt : str Set the alt text for the image. Returns ------- str Content string to create an html file. """ return """ <!DOCTYPE html> <html lang="en"> <head> <style> h2 {text-align: center;} .center { display: block; margin-left: auto; margin-right: auto; width: 80%; } </style> </head> <body> <h2>""" + title + "</h2>" + f""" <img src="{imagefile}" alt="{alt}" class="center"></img> </body> </html> """
[docs] def get_work_dir(): """ Return either os.getcwd() or the value of the OPENMDAO_WORKDIR environment variable. Returns ------- str The working directory. """ workdir = os.environ.get('OPENMDAO_WORKDIR', '') if workdir: return workdir return os.getcwd()
def _get_outputs_dir(obj, *subdirs, mkdir=True): """ Return a pathlib.Path for the outputs directory related to the given problem or system. This path is based on the "problem path" in a hierarchy of problems. The resulting outputs directory will be nested where each problem's output directory contains its own output files and subdirectories as well as any subproblems. This directory also will include a .openmdao_outputs hidden file that marks this directory as being created by OpenMDAO. This makes identifying the directory during cleanup more reliable. Parameters ---------- obj : Problem or System or Solver The problem or system or Solver from which we are opening a file. subdirs : str Additional subdirectories under the top level directory for the relevant problem. Each subdir is passed as a separate positional argument. mkdir : bool If True, force the creation of this directory. """ from openmdao.core.problem import Problem from openmdao.core.system import System from openmdao.solvers.solver import Solver if isinstance(obj, Problem): prob_meta = obj._metadata elif isinstance(obj, System): prob_meta = obj._problem_meta elif isinstance(obj, Solver): system = obj._system if system is None: raise RuntimeError('The output directory for Solvers cannot be accessed ' 'before final_setup.') prob_meta = system()._problem_meta else: raise RuntimeError(f'Cannot get problem metadata for object: {obj}') if prob_meta is None or prob_meta.get('pathname', None) is None: raise RuntimeError('The output directory cannot be accessed before setup.') prob_pathname = prob_meta['pathname'] work_dir = pathlib.Path(get_work_dir()) if mkdir and not work_dir.exists(): work_dir.mkdir(exist_ok=True) outs_dir = work_dir # it's possible that a sub-problem requests the output directory before its parent problem, # so we need to check existence of the parent directories for all parent problems and create # them (ensuring the .openmdao_out file is present) if they don't exist. Otherwise they # won't be properly identified during cleanup. # Also, we don't check if rank==0 here because when mkdir is True, we need to ensure that the # directory has been created by the time we return, so just letting any rank create the file # and handling race conditions by using exist_ok=True works better than trying to limit creation # to rank==0 and performing some sort of barrier operation to ensure that the file is created # before returning in all ranks. for p in prob_pathname.split('/'): outs_dir = outs_dir / f'{p}_out' if mkdir and not outs_dir.exists(): outs_dir.mkdir(exist_ok=True) # Touch the .openmdao_out file for the output directory to ease identification. outs_file = outs_dir / '.openmdao_out' if not outs_file.exists(): try: open(outs_file, 'w').close() except OSError: pass if subdirs: dirpath = outs_dir / pathlib.Path(*subdirs) if mkdir and not dirpath.exists(): dirpath.mkdir(parents=True, exist_ok=True) return dirpath return outs_dir def _is_openmdao_output_dir(directory): """ Check if a directory is an OpenMDAO output directory. Parameters ---------- directory : str or Path The directory to check. Returns ------- bool True if the directory is an OpenMDAO output directory, False otherwise. """ directory = pathlib.Path(directory) return directory.is_dir() and (directory / '.openmdao_out').exists() def _find_openmdao_output_dirs(paths, pattern='*_out', recurse=False): """ Find all OpenMDAO output directories in the given path. Parameters ---------- paths : str or Path or Iterable The path to search for OpenMDAO output directories. pattern : str A glob pattern that the output directories are required to match. recurse : bool If True, search recursively. Returns ------- list A list of OpenMDAO output directories. """ if isinstance(paths, (str, pathlib.Path)): paths = [paths] elif not isinstance(paths, Iterable): raise ValueError("The 'paths' parameter must be a string, Path, or an iterable of them.") openmdao_dirs = [] for path in paths: path = pathlib.Path(path) if not path.is_dir(): continue for root, dirs, _ in os.walk(path): # Use a copy of the dirs list to avoid modifying it while iterating root_path = pathlib.Path(root) if _is_openmdao_output_dir(root) and fnmatch(root_path.name, pattern): openmdao_dirs.append(root_path) for d in dirs[:]: dir_path = pathlib.Path(root) / d if _is_openmdao_output_dir(dir_path): if fnmatch(dir_path.name, pattern): openmdao_dirs.append(dir_path) dirs.remove(d) # Do not recurse into OpenMDAO output directories if not recurse: break return openmdao_dirs
[docs] def clean_outputs(obj='.', recurse=False, prompt=True, pattern='*_out', dryrun=False): """ Remove output directories created by OpenMDAO. A directory is determined to be an OpenMDAO output directory if its name ends in `_out` and it contains the file `.openmdao_out`. Parameters ---------- obj : Problem or System or Solver or str or Path The problem or system or solver whose output file should be removed. recurse : bool If True, and if obj is a string or Path, recurse into it finding and removing OpenMDAO output directories along the way. This option is ignored if obj is a Problem, System, or Solver. prompt : bool If True, prompt the user to confirm directories to be removed. This option is ignored if obj is a Problem, System, or Solver. pattern : str A glob pattern used for matching directories. dryrun : bool If True, report which directories would be removed without actually removing them. """ output_dirs = [] if isinstance(obj, (str, pathlib.Path)): # A single pathname or path object was given. output_dirs = _find_openmdao_output_dirs(obj, pattern=pattern, recurse=recurse) elif isinstance(obj, (Iterable,)): # Multiple paths given output_dirs.extend(_find_openmdao_output_dirs(obj, pattern, recurse)) elif hasattr(obj, 'get_outputs_dir'): output_dir = obj.get_outputs_dir(mkdir=False) prompt = False if output_dir and _is_openmdao_output_dir(output_dir): output_dirs.append(pathlib.Path(output_dir)) if not output_dirs: print('No OpenMDAO output directories found.') return else: print(f'Found {len(output_dirs)} OpenMDAO output directories:') removed_count = 0 for dir_path in sorted(output_dirs): if dryrun: print(f'Would remove {dir_path} (dryrun = True).') elif prompt: response = input(f"Remove {dir_path}? [y/N] ").strip().lower() if response == 'y': shutil.rmtree(dir_path) print(f'Removed {dir_path}') removed_count += 1 else: shutil.rmtree(dir_path) print(f'Removed {dir_path}') removed_count += 1 print(f'Removed {removed_count} OpenMDAO output directories.')