Source code for FabNESO.tasks

"""
Task definitions for FabNESO plug-in to FabSIM software toolkit.

Defines tasks for running simulations using Neptune Exploratory Software (NESO).
"""

import json
import pickle
import re
import shutil
import subprocess
import time
from contextlib import nullcontext
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import Any, Literal, TypeAlias

import chaospy as cp
import numpy as np
import pandas as pd
import pyvbmc
from easyvvuq.analysis import PCEAnalysis
from easyvvuq.sampling import PCESampler

try:
    from fabsim.base import fab
    from fabsim.deploy.templates import template

    fab.add_local_paths("FabNESO")
    FAB_IMPORTED = True
except ImportError:
    # If FabSim not installed we create a dummy fab namespace definining placeholder
    # decorators to make module still importable, for example when building docs
    from types import SimpleNamespace

    fab = SimpleNamespace(task=lambda f: f, load_plugin_env_vars=lambda _: lambda f: f)
    FAB_IMPORTED = False


from .ensemble_tools import (
    SamplingRule,
    create_grid_ensemble,
    create_qmc_ensemble,
    edit_parameters,
    list_parameter_values,
)
from .read_outputs import read_hdf5_datasets


def _check_fab_module_imported() -> None:
    if not FAB_IMPORTED:
        msg = "fabsim.base.fab could not be imported - check FabSim3 is installed"
        raise ImportError(msg)


def _try_convert_to_int_and_check_positive(value: str | int, name: str) -> int:
    try:
        value = int(value)
    except ValueError as e:
        msg = f"{name} is not a valid integer literal: {value}"
        raise ValueError(msg) from e
    if value <= 0:
        msg = f"{name} must be a positive integer: {value}"
        raise ValueError(msg)
    return value


def _check_and_process_resource_args(
    processes: str | int,
    nodes: str | int,
    cpus_per_process: str | int,
    wall_time: str,
) -> tuple[int, int, int, str]:
    processes = _try_convert_to_int_and_check_positive(processes, "processes")
    nodes = _try_convert_to_int_and_check_positive(nodes, "nodes")
    cpus_per_process = _try_convert_to_int_and_check_positive(
        cpus_per_process, "cpus_per_process"
    )
    if processes % nodes != 0:
        msg = "processes must be a multiple of nodes"
        raise ValueError(msg)
    wall_time = wall_time.strip()
    match = re.match(r"^\d{1,2}:(?P<minutes>\d{1,2}):(?P<seconds>\d{1,2})$", wall_time)
    if match is None:
        msg = "wall_time should be of format hh:mm:ss"
        raise ValueError(msg)
    minutes_in_hour = seconds_in_minute = 60
    if (
        int(match.group("minutes")) >= minutes_in_hour
        or int(match.group("seconds")) >= seconds_in_minute
    ):
        msg = "wall_time minute and second components should be in range 0 to 59"
        raise ValueError(msg)
    return processes, nodes, cpus_per_process, wall_time


def _create_job_args_dict(
    solver: str,
    conditions_file_name: str,
    mesh_file_name: str,
    processes: int,
    nodes: int,
    cpus_per_process: int,
    wall_time: str,
) -> dict[str, int | str]:
    return {
        "script": "neso",
        "neso_solver": solver,
        "neso_conditions_file": conditions_file_name,
        "neso_mesh_file": mesh_file_name,
        # FabSim convention is to use 'cores' to set number of MPI processes
        "cores": processes,
        "nodes": nodes,
        "corespernode": processes // nodes,
        "cpuspertask": cpus_per_process,
        "job_wall_time": wall_time,
    }


[docs] @fab.task @fab.load_plugin_env_vars("FabNESO") def neso( config: str, solver: str = "Electrostatic2D3V", conditions_file_name: str = "conditions.xml", mesh_file_name: str = "mesh.xml", processes: str | int = 4, nodes: str | int = 1, cpus_per_process: str | int = 1, wall_time: str = "00:15:00", *, create_missing_parameters: bool = False, **parameter_overrides: str, ) -> None: """ Run a single NESO solver instance. Args: config: Directory with single run configuration information. Keyword Args: solver: Which NESO solver to use. conditions_file_name: Name of conditions XML file in configuration directory. mesh_file_name: Name of mesh XML in configuration directory. processes: Number of processes to run. nodes: Number of nodes to run on. Only applicable when running on a multi-node system. cpus_per_process: Number of processing units to use per process. Only applicable when running on a multi-node system. wall_time: Maximum time to allow job to run for. Only applicable when submitting to a job scheduler. create_missing_parameters: Force parameters in ``parameter_overrides`` missing from conditions file to be added. **parameter_overrides: Additional keyword arguments will be passed to ``FabNESO.ensemble_tools.edit_parameters`` to create a temporary conditions file with these parameter vaues overriden. """ _check_fab_module_imported() processes, nodes, cpus_per_process, wall_time = _check_and_process_resource_args( processes, nodes, cpus_per_process, wall_time ) # Use a temporary directory context so that we can handle parameter inputs # from the command line original_config_path = Path(fab.find_config_file_path(config)) temporary_context: TemporaryDirectory | nullcontext = ( TemporaryDirectory(prefix=f"{config}_", dir=original_config_path.parent) if parameter_overrides != {} else nullcontext() ) with temporary_context as temporary_config_directory: # If there have been additional parameters provided, create a copy of the # conditions file and edit the input parameters if parameter_overrides != {}: temporary_config_path = Path(temporary_config_directory) shutil.copytree( original_config_path, temporary_config_path, dirs_exist_ok=True ) config = temporary_config_path.name # switch our config to the new tmp ones edit_parameters( temporary_config_path / conditions_file_name, parameter_overrides, create_missing=create_missing_parameters, ) fab.with_config(config) fab.execute(fab.put_configs, config) fab.job( _create_job_args_dict( solver, conditions_file_name, mesh_file_name, processes, nodes, cpus_per_process, wall_time, ) )
def _parse_parameter_range_string( parameter_range_string: str, delimiter: str, ) -> tuple[float, float, int]: lower, upper, n_sample = parameter_range_string.split(delimiter) return float(lower), float(upper), int(n_sample)
[docs] @fab.task @fab.load_plugin_env_vars("FabNESO") def neso_grid_ensemble( config: str, solver: str = "Electrostatic2D3V", conditions_file_name: str = "conditions.xml", mesh_file_name: str = "mesh.xml", processes: int = 4, nodes: int = 1, cpus_per_process: int = 1, wall_time: str = "00:15:00", **parameter_ranges: str, ) -> None: """ Run ensemble of NESO solver instances on a evenly spaced parameter grid. Args: config: Directory with ensemble configuration information. Keyword Args: solver: Which NESO solver to use. conditions_file_name: Name of conditions XML file in configuration directory. mesh_file_name: Name of mesh XML in configuration directory. processes: Number of processes to run in each job in the ensemble. nodes: Number of nodes to run each job in ensemble on. Only applicable when running on a multi-node system. cpus_per_process: Number of processing units to use per process for each job in ensemble. Only applicable when running on a multi-node system. wall_time: Maximum time to allow each job in ensemble to run for. Only applicable when submitting to a job scheduler. **parameter_ranges: The parameter ranges to construct grid over. For each parameter name, a string of the format `lower:upper:n_sample` should be specified, resulting `n_sample` evenly spaced values over the interval `[lower, upper]`. The overall grid is constructed as the tensor product of the samples for each parameter. """ _check_fab_module_imported() processes, nodes, cpus_per_process, wall_time = _check_and_process_resource_args( processes, nodes, cpus_per_process, wall_time ) source_config_path = Path(fab.find_config_file_path(config)) temporary_context = TemporaryDirectory( prefix=f"{config}_", dir=source_config_path.parent ) with temporary_context as temporary_config_directory: temporary_config_path = Path(temporary_config_directory) output_path = temporary_config_path / "SWEEP" parsed_parameter_ranges = { parameter: _parse_parameter_range_string(values, ":") for parameter, values in parameter_ranges.items() } create_grid_ensemble( output_path=output_path, source_path=source_config_path, conditions_file=conditions_file_name, parameter_ranges=parsed_parameter_ranges, ) config = temporary_config_path.name fab.update_environment( _create_job_args_dict( solver, conditions_file_name, mesh_file_name, processes, nodes, cpus_per_process, wall_time, ) ) fab.with_config(config) fab.run_ensemble(config, str(output_path))
def _parse_parameter_interval_string( parameter_interval_string: str, delimiter: str, ) -> tuple[float, float]: lower, upper = parameter_interval_string.split(delimiter) return float(lower), float(upper)
[docs] @fab.task @fab.load_plugin_env_vars("FabNESO") def neso_qmc_ensemble( # noqa: PLR0913 config: str, solver: str = "Electrostatic2D3V", conditions_file_name: str = "conditions.xml", mesh_file_name: str = "mesh.xml", processes: int = 4, nodes: int = 1, cpus_per_process: int = 1, wall_time: str = "00:15:00", n_sample: int = 100, seed: int = 1234, rule: SamplingRule = "latin_hypercube", **parameter_intervals: str, ) -> None: """ Run ensemble of NESO solver instances on quasi-Monte Carlo parameter samples. Args: config: Directory with conditions and mesh files to create ensemble from. Keyword Args: solver: Which NESO solver to use. conditions_file_name: Name of conditions XML file in configuration directory. mesh_file_name: Name of mesh XML in configuration directory. processes: Number of processes to run in each job in the ensemble. nodes: Number of nodes to run each job in ensemble on. Only applicable when running on a multi-node system. cpus_per_process: Number of processing units to use per process for each job in ensemble. Only applicable when running on a multi-node system. wall_time: Maximum time to allow each job in ensemble to run for. Only applicable when submitting to a job scheduler. n_sample: Number of quasi Monte Carlo samples in ensemble. seed: Seed for pseudo-random number generator. rule: String specifying sampling scheme to use. **parameter_intervals: The parameter intervals over which to generate samples from. For each parameter name, a string of the format `lower:upper` should be specified, with the overall joint distribution on the parameter space the product of the uniform distributions on these intervals. """ _check_fab_module_imported() n_sample = _try_convert_to_int_and_check_positive(n_sample, "n_sample") seed = _try_convert_to_int_and_check_positive(seed, "seed") processes, nodes, cpus_per_process, wall_time = _check_and_process_resource_args( processes, nodes, cpus_per_process, wall_time ) source_config_path = Path(fab.find_config_file_path(config)) temporary_context = TemporaryDirectory( prefix=f"{config}_", dir=source_config_path.parent ) with temporary_context as temporary_config_directory: temporary_config_path = Path(temporary_config_directory) parsed_parameter_intervals = { parameter: _parse_parameter_interval_string(interval_string, ":") for parameter, interval_string in parameter_intervals.items() } output_path = temporary_config_path / "SWEEP" create_qmc_ensemble( output_path=output_path, source_path=source_config_path, conditions_file=conditions_file_name, parameter_intervals=parsed_parameter_intervals, rule=rule, n_sample=n_sample, seed=seed, ) config = temporary_config_path.name fab.update_environment( _create_job_args_dict( solver, conditions_file_name, mesh_file_name, processes, nodes, cpus_per_process, wall_time, ) ) fab.with_config(config) fab.run_ensemble(config, str(output_path))
def _parse_vbmc_bounds_string( vbmc_bounds_string: str, delimiter: str, ) -> tuple[float, float, float, float]: ( lower_bound, upper_bound, plausible_lower_bound, plausible_upper_bound, ) = vbmc_bounds_string.split(delimiter) return ( float(lower_bound), float(upper_bound), float(plausible_lower_bound), float(plausible_upper_bound), )
[docs] @fab.task @fab.load_plugin_env_vars("FabNESO") def neso_vbmc( # noqa: PLR0913 config: str, reference_field_file: str, solver: str = "Electrostatic2D3V", conditions_file_name: str = "conditions.xml", mesh_file_name: str = "mesh.xml", observation_noise_std: float = 0.1, processes: int = 4, nodes: int = 1, cpus_per_process: int = 1, wall_time: str = "00:15:00", output_directory_name: str = "", **vbmc_parameters: str, ) -> None: """ Run variational Bayesian Monte Carlo (VBMC) to calibrate NESO solver parameters. The VBMC algorithm (Acerbi, 2018) is an approximate Bayesian inference method for efficient parameter calibration in expensive to simulate models. Here we use the Python implementation of the algorithm in the package PyVBMC (Huggins et al., 2023). Args: config: Directory with ensemble configuration information. reference_field_file: Name of a NumPy .txt file that holds a reference field measurement for the calibration run. Keyword Args: solver: Which NESO solver to use. conditions_file_name: Name of conditions XML file in configuration directory. mesh_file_name: Name of mesh XML in configuration directory. observation_noise_std: Standard deviation of the observed noise, used for log likelihood calculation. processes: Number of processes to run in each job in the ensemble. nodes: Number of nodes to run on. Only applicable when running on a multi-node system. cpus_per_process: Number of processing units to use per process. Only applicable when running on a multi-node system. wall_time: Maximum time to allow job to run for. Only applicable when submitting to a job scheduler. **vbmc_parameters: The parameters to be scanned in the VBMC instance. The value is a colon separated list: lower bound: upper bound: plausible lower bound : plausible upper bound References: 1. Acerbi, L. (2018). Variational Bayesian Monte Carlo. Advances in Neural Information Processing Systems, 31. 2. Huggins et al., (2023). PyVBMC: Efficient Bayesian inference in Python. Journal of Open Source Software, 8(86), 5428, https://doi.org/10.21105/joss.05428 """ # Create the output directory output_directory = Path(output_directory_name) if not output_directory.is_dir(): output_directory.mkdir(parents=True) if not Path(reference_field_file).is_file(): msg = ( f"reference_field_file {reference_field_file} not found. " "One must be specificed for the calibration to run. If in doubt, please " "run the neso_write_field task to write a measured field to file first." ) raise ValueError(msg) # Contains a number of items that enable the vbmc running of NESO # These ensure that the field information is written out, which is not done # be default in the conditions files. # This would need to be changed per solver para_overrides = { "particle_num_write_field_steps": 100, "line_field_deriv_evaluations_step": 20, "line_field_deriv_evaluations_numx": 100, "line_field_deriv_evaluations_numy": 1, } # Put all of the NESO arguments in one dict neso_args = _create_job_args_dict( solver, conditions_file_name, mesh_file_name, processes, nodes, cpus_per_process, wall_time, ) # Make the config_dict for the calibration run config_dict = { "config": config, "para_overrides": para_overrides, "neso_args": neso_args, "observation_noise_std": observation_noise_std, } # Hard coded for the two_stream config. Ideally this would be factored out parameters_to_scan = { vbmc_parameter: _parse_vbmc_bounds_string(parameter_bounds, ":") for vbmc_parameter, parameter_bounds in vbmc_parameters.items() } config_dict["parameters_to_scan"] = parameters_to_scan # Set up the bounds information required by PyVBMC bounds = list(zip(*parameters_to_scan.values(), strict=True)) lower_bounds = np.array(bounds[0]) upper_bounds = np.array(bounds[1]) plausible_lower_bounds = np.array(bounds[2]) plausible_upper_bounds = np.array(bounds[3]) # Choose a random starting position rng = np.random.default_rng() theta_0 = rng.uniform(plausible_lower_bounds, plausible_upper_bounds) # Read in a reference field from an external file for the calibration config_dict["initial_run"] = np.loadtxt(reference_field_file) # Get a timestamp to label the vbmc logfile timestamp = time.strftime("%Y%m%d-%H%M%S") logfile_name = f"vbmc_log_{timestamp}.log" logfile_path = Path(output_directory) / logfile_name # Additional options that we will pass to VBMC options = { # 50 * D + 2 is the default, but leave this here to allow # smaller runs for debugging "max_fun_evals": 50 * (len(theta_0) + 2), # Can't see a way of doing this and saving the plots # without it creating a new xwindow? "plot": False, # Run with a log file "log_file_name": logfile_path, } # Make an instance of VBMC vbmc = pyvbmc.VBMC( lambda theta: _log_density(theta, config_dict), theta_0, lower_bounds, upper_bounds, plausible_lower_bounds, plausible_upper_bounds, options=options, ) # Run the vbmc instance vp, results = vbmc.optimize() # Print the final output of vbmc to the logfile with logfile_path.open("a+") as logfile: logfile.write(pyvbmc.formatting.format_dict(results)) # Save the final output posterior vbmc.vp.save(output_directory / f"final_posterior_{timestamp}.pkl")
[docs] @fab.task @fab.load_plugin_env_vars("FabNESO") def neso_write_field( config: str, solver: str = "Electrostatic2D3V", conditions_file_name: str = "conditions.xml", mesh_file_name: str = "mesh.xml", processes: int = 4, nodes: int = 1, cpus_per_process: int = 1, wall_time: str = "00:15:00", out_file_name: str = "field_write_out.txt", **parameter_overrides: str, ) -> None: """ Run a single NESO solver instance and save the observed field to a file. Args: config: Directory with single run configuration information. Keyword Args: solver: Which NESO solver to use. conditions_file_name: Name of conditions XML file in configuration directory. mesh_file_name: Name of mesh XML in configuration directory. processes: Number of processes to run. nodes: Number of nodes to run on. Only applicable when running on a multi-node system. cpus_per_process: Number of processing units to use per process. Only applicable when running on a multi-node system. wall_time: Maximum time to allow job to run for. Only applicable when submitting to a job scheduler. out_file_name: Name of the file to be created containing the measured field **parameter_overrides: Additional keyword arguments will be passed to ``FabNESO.ensemble_tools.edit_parameters`` to create a temporary conditions file with these parameter vaues overriden. """ # Assemble the NESO arguments neso_args = _create_job_args_dict( solver, conditions_file_name, mesh_file_name, processes, nodes, cpus_per_process, wall_time, ) para_overrides = dict( { # The necessary overrides for writing out the field. # Factor this out of this and VBMC task "particle_num_write_field_steps": 100, "line_field_deriv_evaluations_step": 20, "line_field_deriv_evaluations_numx": 100, "line_field_deriv_evaluations_numy": 1, }, **parameter_overrides, ) config_dict = {"config": config, "neso_args": neso_args} # Write out the returned field to file np.savetxt( out_file_name, _run_instance_return_field(config_dict, para_overrides)["field_value"], )
def _run_instance_return_field( config_dict: dict[str, Any], para_overrides: dict[str, Any] ) -> dict[str, np.ndarray]: """Run a single instance of the NESO solver and return the observed_field.""" # If we're running remotely, tell the submission to wait until done if "archer2" in fab.env.remote: fab.update_environment( { "job_dispatch": "cd /work/$project/$project/$username ; sbatch --wait", } ) neso( config=config_dict["config"], solver=config_dict["neso_args"]["neso_solver"], conditions_file_name=config_dict["neso_args"]["neso_conditions_file"], mesh_file_name=config_dict["neso_args"]["neso_mesh_file"], processes=config_dict["neso_args"]["cores"], nodes=config_dict["neso_args"]["nodes"], cpus_per_process=config_dict["neso_args"]["cpuspertask"], wall_time=config_dict["neso_args"]["job_wall_time"], create_missing_parameters=True, **para_overrides, ) fab.fetch_results() local_results_dir = Path(fab.env.job_results_local) / template( fab.env.job_name_template ) final_line_field_step = ( int( list_parameter_values( Path(fab.find_config_file_path(config_dict["config"])) / str(config_dict["neso_args"]["neso_conditions_file"]), "particle_num_time_steps", )[0] ) - para_overrides["line_field_deriv_evaluations_step"] ) return read_hdf5_datasets( local_results_dir / "Electrostatic2D3V_line_field_deriv_evaluations.h5part", { "x": f"Step#{final_line_field_step}/x", "field_value": f"Step#{final_line_field_step}/FIELD_EVALUATION_0", }, ) def _log_density( theta: list[float], config_dict: dict[str, Any], ) -> list: """Run an instance of the neso task and return the log_joint_density.""" parameters = dict( zip(config_dict["parameters_to_scan"].keys(), theta, strict=True), **config_dict["para_overrides"], ) # Run an instance of NESO and calculate the measured field strength. observed_results = _run_instance_return_field(config_dict, parameters) # Calculate the joint log likelihood using the reference field in the config_dict return -( (config_dict["initial_run"] - observed_results["field_value"]) ** 2 / (2 * config_dict["observation_noise_std"] ** 2) ).sum() def _parse_pce_bounds_string( parameter_scan_string: str, delimiter: str, ) -> tuple[float, float]: lower, upper = parameter_scan_string.split(delimiter) return float(lower), float(upper) def _parse_float_or_int_string_literal(literal: str) -> float | int: try: return int(literal) except ValueError: return float(literal) PCEVariant: TypeAlias = Literal[ "pseudo-spectral", "pseudo-spectral-sparse", "point-collocation" ]
[docs] @fab.task @fab.load_plugin_env_vars("FabNESO") def neso_pce_ensemble( config: str, solver: str = "Electrostatic2D3V", conditions_file_name: str = "conditions.xml", mesh_file_name: str = "mesh.xml", polynomial_order: int = 4, variant: PCEVariant = "pseudo-spectral", processes: str | int = 4, nodes: str | int = 1, cpus_per_process: str | int = 1, wall_time: str = "00:15:00", **parameter_bounds_or_overrides: str, ) -> None: """ Run ensemble of NESO simulations to perform a polynomial chaos expansion of outputs. Generates a set of parameters values (and associated weights) to evaluate model at using a quadrature rule, and evaluates model outputs at each of these parameter values. The model outputs can then be approximated by an expansion in a set of orthogonal (with respect to the assumed distribution over the parameter space) polynomials, with the coefficients of the expansion estimated from the sampled model outputs. This task just computes the model outputs for the sampled parameter values, with the separate `neso_pce_analysis` task using the fetched model outputs from this task to estimate the coefficients and so form the polynomial expansion approximation to the model. Args: config: Directory with ensemble configuration information. Keyword Args: solver: Which NESO solver to use. conditions_file_name: Name of conditions XML file in configuration directory. mesh_file_name: Name of mesh XML in configuration directory. polynomial_order: Polynomial order to use in polynomial chaos expansion. variant: Polynomial chaos expansion variant to use - one of `point-collocation` (point-collocation method), `pseudo-spectral` (pseudo-spectral projection method) or `pseudo-spectral-sparse` (pseudo-spectral projection method with Smolyak sparse grid). processes: Number of processes to run in each job in the ensemble. nodes: Number of nodes to run on. Only applicable when running on a multi-node system. cpus_per_process: Number of processing units to use per process. Only applicable when running on a multi-node system. wall_time: Maximum time to allow job to run for. Only applicable when submitting to a job scheduler. **parameter_bounds_or_overrides: Bounds of parameters to vary in polynomial chaos expansion or fixed overrides for parameters from values in conditions file in configuration directory. Each value is either a colon-separated string `lower_bound:upper_bound` specifying lower and upper bounds for independent uniform distributions over parameter values, or a string specifying a single int or float, in which case the corresponding parameter is considered fixed but the value given is used to override its default value in the conditions file. """ _check_fab_module_imported() parameter_distributions = { parameter_name: cp.Uniform(*_parse_pce_bounds_string(string_value, ":")) for parameter_name, string_value in parameter_bounds_or_overrides.items() if ":" in string_value } parameter_overrides = { parameter_name: _parse_float_or_int_string_literal(string_value) for parameter_name, string_value in parameter_bounds_or_overrides.items() if ":" not in string_value } # Map variant specifier to EasyVVUQ PCESampler keyword arguments - regression is # used to switch between point-collocation (True) and pseudo-spectral (False) # methods, while sparse (True) enables Smolyak sparse grid with pseudo-spectral # method. match variant: case "point-collocation": regression = True sparse = False case "pseudo-spectral": regression = False sparse = False case "pseudo-spectral-sparse": regression = False sparse = True pce_sampler = PCESampler( parameter_distributions, polynomial_order=int(polynomial_order), regression=regression, sparse=sparse, ) parameter_samples = list(pce_sampler) processes, nodes, cpus_per_process, wall_time = _check_and_process_resource_args( processes, nodes, cpus_per_process, wall_time ) path_to_config = Path(fab.find_config_file_path(config)) with TemporaryDirectory( prefix=f"{config}_", dir=path_to_config.parent ) as temporary_config_directory: temporary_config_path = Path(temporary_config_directory) for sample_index, parameter_dict in enumerate(parameter_samples): directory_path = temporary_config_path / "SWEEP" / f"sample_{sample_index}" shutil.copytree(path_to_config, directory_path) edit_parameters( directory_path / conditions_file_name, parameter_dict | parameter_overrides, ) config = temporary_config_path.name path_to_config = temporary_config_path sweep_dir = str(path_to_config / "SWEEP") fab.update_environment( _create_job_args_dict( solver, conditions_file_name, mesh_file_name, processes, nodes, cpus_per_process, wall_time, ) ) fab.with_config(config) fab.run_ensemble(config, sweep_dir) job_name = template(fab.env.job_name_template) local_results_directory = Path(fab.env.local_results) / job_name local_results_directory.mkdir(parents=True) with (local_results_directory / "pce_sampler.pickle").open("wb") as f: pickle.dump(pce_sampler, f) with (local_results_directory / "parameter_samples.json").open("w") as f: json.dump(parameter_samples, f) print( # noqa: T201 f"Sampler pickle and parameters JSON saved to {local_results_directory}.\n" "Run fetch_results task once runs are completed to also save run outputs " "to same directory and then run neso_pce_analysis task to compute " "PCE expansion from run outputs." )
[docs] @fab.task @fab.load_plugin_env_vars("FabNESO") def neso_pce_analysis( config: Path | str, results_dir: Path | str, extract_outputs_script: str = "extract_outputs.py", ) -> None: """ Analyse outputs from a previous polynomial chaos expansion (PCE) ensemble run. Uses run outputs for sampled parameter values to compute a PCE approximation to model, and uses this to compute various statistics of output and allowing construction of a surrogate model. The analysis results are saved to a pickle file `pce_analysis_results.pickle` in the results directory. Args: config: Name of configuration directory with script to use to extract relevant solver outputs from results files and output to a JSON file. results_dir: Directory containing PCE ensemble outputs from a run of `neso_pce` task. The analysis results pickle file will be written to this directory. Keyword Args: extract_outputs_script: Name of script for extracting outputs from results files in configuration directory. """ _check_fab_module_imported() path_to_config = Path(fab.find_config_file_path(config)) extract_outputs_script_path = path_to_config / extract_outputs_script results_dir = Path(results_dir) with (results_dir / "pce_sampler.pickle").open("rb") as f: pce_sampler = pickle.load(f) # noqa: S301 with (results_dir / "parameter_samples.json").open("r") as f: parameter_samples = json.load(f) results_data = [] for sample_index, parameter_dict in enumerate(parameter_samples): sample_results_dir = results_dir / "RUNS" / f"sample_{sample_index}" outputs_file = sample_results_dir / "outputs.json" subprocess.call( [ # noqa: S603, S607 "python", str(extract_outputs_script_path), sample_results_dir, outputs_file, ] ) with outputs_file.open("r") as f: outputs = json.load(f) results_data.append( {(key, i): v for key, value in outputs.items() for i, v in enumerate(value)} | {(key, 0): value for key, value in parameter_dict.items()} ) results_dataframe = pd.DataFrame( results_data, columns=pd.MultiIndex.from_tuples(results_data[0].keys()) ) pce_analysis = PCEAnalysis(sampler=pce_sampler, qoi_cols=list(outputs.keys())) analysis_results = pce_analysis.analyse(results_dataframe) with (results_dir / "pce_analysis_results.pickle").open("wb") as f: pickle.dump(analysis_results, f)