Skip to content

IO

Different IO operations used to load and save data through the exputils package. They are usually not needed to log or load data which is done with the functions under the exputils.data module. See the Logging and Loading sections for more information.

makedirs

Creates a directory and all intermediate directories if they do not exist in a file path.

Parameters:

Name Type Description Default
path str

The directory path to create.

required
Source code in exputils/io/general.py
def makedirs(path: str):
    """
    Creates a directory and all intermediate directories if they do not exist in a file path.

    Parameters:
        path (str): The directory path to create.
    """

    if not os.path.isdir(path):
        os.makedirs(path)

makedirs_for_file

Creates the necessary directories for a given file path if they do not already exist.

Parameters:

Name Type Description Default
filepath str

The complete file path for which the directories are to be created.

required
Source code in exputils/io/general.py
def makedirs_for_file(filepath: str):
    """
    Creates the necessary directories for a given file path if they do not already exist.

    Parameters:
        filepath (str): The complete file path for which the directories are to be created.
    """

    directory_path, _ = os.path.split(filepath)
    makedirs(directory_path)

load_numpy_files

Loads numpy files from a specified directory into an AttrDict.

Parameters:

Name Type Description Default
directory str

The path to the directory containing the numpy files.

required
allowed_data_filter list

A list of allowed file names to be loaded. If specified, only files with names in this list will be loaded.

None
denied_data_filter list

A list of denied file names to be excluded from loading. If specified, files with names in this list will not be loaded.

None
allow_pickle bool

Whether to allow loading pickled (serialized) objects. Default is True.
⚠ This could allow arbitrary code execution. Only load files you trust!

True

Raises:

Type Description
ValueError

If both allowed_data_filter and denied_data_filter are specified.

FileNotFoundError

If the specified directory does not exist.

Exception

If an error occurs during loading of a file.

Returns:

Name Type Description
data AttrDict

Dictionary with loaded data where the keys are file names without extensions and the values are the respective numpy arrays.

Source code in exputils/io/numpy.py
def load_numpy_files(directory: str,
                    allowed_data_filter: Optional[list] = None,
                    denied_data_filter: Optional[list] = None,
                    allow_pickle: bool = True) -> AttrDict:
    """Loads numpy files from a specified directory into an AttrDict.

    Parameters:
        directory (str):
            The path to the directory containing the numpy files.
        allowed_data_filter (list, optional):
            A list of allowed file names to be loaded.
            If specified, only files with names in this list will be loaded.
        denied_data_filter (list, optional):
            A list of denied file names to be excluded from loading.
            If specified, files with names in this list will not be loaded.
        allow_pickle (bool):
            Whether to allow loading pickled (serialized) objects.
            Default is True. <br>
            :warning: This could allow arbitrary code execution. Only load files you trust!

    Raises:
        ValueError:
            If both allowed_data_filter and denied_data_filter are specified.
        FileNotFoundError:
            If the specified directory does not exist.
        Exception:
            If an error occurs during loading of a file.

    Returns:
        data (AttrDict):
            Dictionary with loaded data where the keys are file names without extensions
            and the values are the respective numpy arrays.
    """

    if allowed_data_filter is not None and denied_data_filter is not None:
        raise ValueError('in_data_filter and out_data_filter can not both be set, only one or none!')

    if not os.path.isdir(directory):
        raise FileNotFoundError('Directory {!r} does not exist!'.format(directory))

    data = AttrDict()

    for file in glob(os.path.join(directory, '*.npy')):
        stat_name = os.path.splitext(os.path.basename(file))[0]

        if eu.misc.is_allowed(stat_name, allowed_list=allowed_data_filter, denied_list=denied_data_filter):
            try:
                stat_val = np.load(file, allow_pickle=allow_pickle)
            except FileNotFoundError:
                raise
            except Exception as e:
                raise Exception('Exception during loading of file {!r}!'.format(file)) from e

            if len(stat_val.shape) == 0:
                stat_val = stat_val.dtype.type(stat_val)

            data[stat_name] = stat_val

    for file in glob(os.path.join(directory, '*.npz')):
        stat_name = os.path.splitext(os.path.basename(file))[0]
        if eu.misc.is_allowed(stat_name, allowed_list=allowed_data_filter, denied_list=denied_data_filter):
            try:
                stat_vals = AttrDict(np.load(file, allow_pickle=allow_pickle))
            except FileNotFoundError:
                raise
            except Exception as e:
                raise Exception('Exception during loading of file {!r}!'.format(file)) from e

            # remove data that should not be loaded
            keys = [k for k, v in stat_vals.items() if not eu.misc.is_allowed(k, allowed_list=allowed_data_filter, denied_list=denied_data_filter)]
            for x in keys:
                del stat_vals[x]

            # numpy encapsulates scalars as darrays with an empty shape
            # recover the original type
            for substat_name, substat_val in stat_vals.items():
                if len(substat_val.shape) == 0:
                    stat_vals[substat_name] = substat_val.dtype.type(substat_val)

            data[stat_name] = stat_vals

    return data

save_dict_to_numpy_files

Saves a dictionary with numpy arrays to numpy files (either .npy, .npz, or .npz compressed formats).

Parameters:

Name Type Description Default
data dict

Dictionary containing the data to be saved, with keys as filenames and values as data to be saved.

required
path str

Directory or file path where the numpy files will be saved. Default is the current directory.

'.'
mode str

Mode in which to save the data. Can be 'npy', 'npz', or 'cnpz'. Default is 'npy'.

'npy'

Raises:

Type Description
ValueError

If an invalid mode is provided.

Source code in exputils/io/numpy.py
def save_dict_to_numpy_files(data: dict,
                             path: Optional[str] = '.',
                             mode: Optional[str] = 'npy'):
    """Saves a dictionary with numpy arrays to numpy files (either .npy, .npz, or .npz compressed formats).

    Parameters:
        data (dict):
            Dictionary containing the data to be saved, with keys as filenames and values as data to be saved.
        path (str):
            Directory or file path where the numpy files will be saved.
            Default is the current directory.
        mode (str):
            Mode in which to save the data.
            Can be 'npy', 'npz', or 'cnpz'.
            Default is 'npy'.

    Raises:
        ValueError: If an invalid mode is provided.
    """

    # save logs in numpy format if they exist
    if mode.lower() == 'npy':
        eu.io.makedirs(path)
        for name, values in data.items():
            np.save(os.path.join(path, name), values)

    elif mode.lower() == 'npz':
        eu.io.makedirs_for_file(path)
        np.savez(path, **data)

    elif mode.lower() == 'cnpz':
        eu.io.makedirs_for_file(path)
        np.savez_compressed(path, **data)

    else:
        raise ValueError('Unknown numpy logging mode {!r}! Only \'npy\', \'npz\' and \'cnpz\' are allowed.'.format(mode))

load_dill

Loads a serialized object from a file using the dill library.

Parameters:

Name Type Description Default
file_path str

The path to the file from which to load the object. The file extension is optionally added if not already present.

required

Returns:

Name Type Description
obj Any

The object that was deserialized from the file.

Notes:

  • If the specified file does not exist, the function attempts to append the expected file extension (.dill) before throwing an error.
  • ⚠ This could allow arbitrary code execution. Only load files you trust!
Source code in exputils/io/dill.py
def load_dill(file_path: str) -> object:
    """
    Loads a serialized object from a file using the [dill](https://pypi.org/project/dill/) library.

    Parameters:
        file_path (str):
            The path to the file from which to load the object.
            The file extension is optionally added if not already present.

    Returns:
        obj (Any): The object that was deserialized from the file.

    <h4>Notes:</h4>

    - If the specified file does not exist, the function attempts to append
      the expected file extension (.dill) before throwing an error.
    - :warning: This could allow arbitrary code execution. Only load files you trust!
    """
    if not os.path.exists(file_path):
        if not file_path.endswith('.' + DILL_FILE_EXTENSION):
            file_path += '.' + DILL_FILE_EXTENSION

    with open(file_path, 'rb') as fh:
        obj = dill.load(fh)
    return obj

load_dill_files

Loads all serialized objects from a directory using the dill library and returns them in a dictionary.

Parameters:

Name Type Description Default
directory str

The path to the directory containing dill-serialized files.

required

Raises:

Type Description
FileNotFoundError

If the specified directory does not exist.

Returns:

Name Type Description
data AttrDict

An attribute dictionary where keys are the file names (without extensions) and values are the deserialized objects.

Notes:

  • If the specified file does not exist, the function attempts to append the expected file extension (.dill) before throwing an error.
  • ⚠ This could allow arbitrary code execution. Only load files you trust!
Source code in exputils/io/dill.py
def load_dill_files(directory: str):
    """
    Loads all serialized objects from a directory using the [dill](https://pypi.org/project/dill/)
    library and returns them in a dictionary.

    Parameters:
        directory (str):
            The path to the directory containing dill-serialized files.

    Raises:
        FileNotFoundError: If the specified directory does not exist.

    Returns:
        data (AttrDict):
            An attribute dictionary where keys are the file names (without extensions) and
            values are the deserialized objects.

    <h4>Notes:</h4>

    - If the specified file does not exist, the function attempts to append
      the expected file extension (.dill) before throwing an error.
    - :warning: This could allow arbitrary code execution. Only load files you trust!
    """

    if not os.path.isdir(directory):
        raise FileNotFoundError('Directory {!r} does not exist!'.format(directory))

    data_dict = eu.AttrDict()

    for file in glob(os.path.join(directory, '*.' + DILL_FILE_EXTENSION)):
        data_name = os.path.splitext(os.path.basename(file))[0]
        data = load_dill(file)
        data_dict[data_name] = data

    return data_dict

save_dill

Serializes a Python object and saves it to a file using the dill serialization library.

Parameters:

Name Type Description Default
obj Any

The Python object to be serialized.

required
file_path str

The file path where the serialized object will be saved.

required

Notes:

  • If the provided file path does not have the correct file extension for Dill files, the extension will be added automatically.
  • The necessary directories for the file path will be created if they do not exist.
Source code in exputils/io/dill.py
def save_dill(obj,
              file_path: str):
    """
    Serializes a Python object and saves it to a file using the [dill](https://pypi.org/project/dill/)
    serialization library.

    Parameters:
        obj (Any):
            The Python object to be serialized.
        file_path (str):
            The file path where the serialized object will be saved.

    <h4>Notes:</h4>

    - If the provided file path does not have the correct file extension for Dill files,
      the extension will be added automatically.
    - The necessary directories for the file path will be created if they do not exist.
    """
    if not file_path.endswith('.' + DILL_FILE_EXTENSION):
        file_path += '.' + DILL_FILE_EXTENSION

    eu.io.makedirs_for_file(file_path)
    with open(file_path, 'wb') as fh:
        dill.dump(obj, fh)