Source code for banffprocessor.processor_input

import json
from enum import Enum
from pathlib import Path

from banff._log import log_levels

import banffprocessor.processor_logger as plg
from banffprocessor.exceptions import ProcessorInputParameterError
from banffprocessor.nls import _
from banffprocessor.util.case_insensitive_enum_meta import CaseInsensitiveEnumMeta

log_lcl = plg.get_processor_child_logger("processor_input")

## Specified in and loaded from main program input json file ##
[docs] class ProcessorInput: """Holds input parameters from a user's JSON input file used to configure a Banff Processor job.""" def __init__(self, job_id: str, unit_id: str | None = None, input_folder: str | Path | None = None, indata_filename: str | Path | None = None, auxdata_filename: str | Path | None = None, indata_aux_filename: str | Path | None = None, histdata_filename: str | Path | None = None, indata_hist_filename: str | Path | None = None, histstatus_filename: str | Path | None = None, instatus_hist_filename: str | Path | None = None, instatus_filename: str | Path | None = None, user_plugins_folder: str | Path | None = None, metadata_folder: str | Path | None = None, process_output_type: str | int | None = None, seed: int | None = None, no_by_stats: str | bool | None = None, randnumvar: str | None = None, save_format: list[str] | None = None, output_folder: str | Path | None = None, log_level: int | None = None, ) -> None: """Construct a ProcessorInput object. :param job_id: The job_id of the job to run. :type job_id: str :param unit_id: The unit_id for the job to run. Not required for UDPs, VerifyEdits, EditStats, or 'Job' procedures but required for all other procs. Defaults to None :type unit_id: str, optional :param input_folder: The directory containing the JSON file used to create this object, and/or the directory containing the input data and status files as well as sub-directories for /metadata and /plugins. Also used to create the subdirectory /out for output files, if no `output_folder` is provided as a part of the provided arguments. :type input_folder: str | Path :param indata_filename: The filename of the indata file which is required in almost all cases, defaults to None :type indata_filename: str | Path | None, optional :param auxdata_filename: The filename of the auxillary data file, defaults to None. This field is an alias for `indata_aux_filename`, they represent the same value. :type auxdata_filename: str | Path | None, optional :param indata_aux_filename: The filename of the auxillary data file, defaults to None :type indata_aux_filename: str | Path | None, optional :param histdata_filename: The filename of the historical data file, defaults to None. This field is an alias for `indata_hist_filename`, they represent the same value. :type histdata_filename: str | Path | None, optional :param indata_hist_filename: The filename of the auxillary data file, defaults to None :type indata_hist_filename: str | Path | None, optional :param histstatus_filename: The filename of the historical status file, defaults to None. This field is an alias for `instatus_hist_filename`, they represent the same value. :type histstatus_filename: str | Path | None, optional :param instatus_hist_filename: The filename of the auxillary data file, defaults to None :type instatus_hist_filename: str | Path | None, optional :param instatus_filename: The filename of the instatus file, defaults to None :type instatus_filename: str | Path | None, optional :param user_plugins_folder: The directory where the user defined plugin modules for this job are located, defaults to None :type user_plugins_folder: str | Path | None, optional :param metadata_folder: The directory where the metadata files for this job are located, defaults to None :type metadata_folder: str | Path | None, optional :param process_output_type: Specifies a level which determines the types of output files that will be saved/produced, defaults to None :type process_output_type: str | int | None, optional :param seed: _description_, defaults to None :type seed: int | None, optional :param no_by_stats: _description_, defaults to None :type no_by_stats: str | bool | None, optional :param randnumvar: _description_, defaults to None :type randnumvar: str | None, optional :param save_format: _description_, defaults to None :type save_format: list[str] | None, optional :param output_folder: _description_, defaults to None :type output_folder: str | Path | None, optional :param log_level: _description_, defaults to None :type log_level: str | None, optional :raises ProcessorInputParameterError: If no valid `job_id` or `unit_id` is provided :raises ProcessorInputParameterError: If the `save_format` parameter is provided in an improper format """ # input_folder is technically not required if the user provides explicit locations # for their metadata files and their output folder if(not input_folder and not (metadata_folder and output_folder)): msg = _("Input parameter input_folder must be provided unless both " "metadata_folder and output_folder are provided.") log_lcl.exception(msg) raise ValueError(msg) # NOT FOUND IN ACTUAL INPUT JSON FILE, VALUE SHOULD BE THE FOLDER THE FILE IS FOUND IN # this folder is used as a default file location if the path to a required file was # not explicitly defined in any of the other input parameters such as indata_filename self.input_folder = get_path_val(input_folder) def abs_path(file_or_dir: Path | str | None, param_name: str) -> Path | None: """Convert file_or_dir to an absolute path. If file_or_dir is a non-absolute path it is treated as relative to the input_folder and the final path will begin with input_folder. :param file_or_dir: The filepath to convert. :type file_or_dir: str | Path | None :param param_name: The name of the parameter being processed (for error reporting) :type param_name: str :return: file_or_dir as represented by a Path object. :rtype: Path | None """ file_or_dir = get_path_val(file_or_dir) if(not file_or_dir): return None output_parents = False if(not file_or_dir.is_absolute()): if(self.input_folder): file_or_dir = (self.input_folder / file_or_dir).resolve() # Specifically for output_folder, if given as relative path, # we want to create any parent folders of the final output directory # if it doesn't yet exist. If output_folder is given as abolute # we DON'T want to create the parent folders, only the final output directory. output_parents = True else: msg = _("Input folder was not given but {} is a relative path.").format(param_name) log_lcl.exception(msg) raise ProcessorInputParameterError(msg) if(not file_or_dir.exists()): if(param_name == "output_folder"): try: file_or_dir.mkdir(parents=output_parents) except FileNotFoundError as e: msg = _("Parent folders in input parameter filepath output_folder: {} could not be " "found so output folder could not be created.").format(str(file_or_dir)) log_lcl.exception(msg) raise ProcessorInputParameterError(msg) from e else: msg = _("Filepath in input parameter {}: {} is not accesible or does " "not exist.").format(param_name, str(file_or_dir)) log_lcl.exception(msg) raise ProcessorInputParameterError(msg) return file_or_dir # Optional alternate save location for output files self.output_folder = abs_path(output_folder, "output_folder") if(not self.output_folder): # Create output folder if one wasn't provided self.output_folder = self.input_folder / "out" if not self.output_folder.exists(): self.output_folder.mkdir() # Optional alternate folder location of metadata files self.metadata_folder = abs_path(metadata_folder, "metadata_folder") # Load all of our metadata if(not self.metadata_folder): # The user may or may not have opted for a sub-folder meta_path = self.input_folder / "metadata" # If no specific metadata folder is given, check for a "metadata" subfolder in the # input_folder. If this doesn't exist just use the input_folder self.metadata_folder = meta_path if meta_path.exists() else self.input_folder # Optional alternate folder location of user program files self.user_plugins_folder = abs_path(user_plugins_folder, "user_plugins_folder") # Check if the user provided a custom folder path if(not self.user_plugins_folder and self.input_folder): # If not see if their plugins are in a "plugins" subfolder in the input folder plugins_path = self.input_folder / "plugins" if(plugins_path.exists()): self.user_plugins_folder = plugins_path self.job_id = get_string_param_value(job_id) if(self.job_id is None): msg = _("No valid job_id found in processor_input file.") log_lcl.exception(msg) raise ProcessorInputParameterError(msg) self.unit_id = get_string_param_value(unit_id) # Allowed to be none in the case of a job only performing "Verifyedits" # Also should allow either just filename or a full filepath # If just a filename is provided, file will be searched for in the input_folder # otherwise the full filepath is used for loading the file into a dataframe self.indata_filename = abs_path(indata_filename, "indata_filename") # statusAll may be provided for input to any procs that require an instatus # parameter, if there is no statusAll file created by any prior procs in the job self.instatus_filename = abs_path(instatus_filename, "instatus_filename") if(indata_aux_filename): self.indata_aux_filename = abs_path(indata_aux_filename, "indata_aux_filename") else: self.indata_aux_filename = abs_path(auxdata_filename, "auxdata_filename") if(indata_hist_filename): self.indata_hist_filename = abs_path(indata_hist_filename, "indata_hist_filename") else: self.indata_hist_filename = abs_path(histdata_filename, "histdata_filename") if(instatus_hist_filename): self.instatus_hist_filename = abs_path(instatus_hist_filename, "instatus_hist_filename") else: self.instatus_hist_filename = abs_path(histstatus_filename, "histstatus_filename") # Output type option to allow users to specify the level of information # recorded and output from their respective procs try: self.process_output_type = None if(isinstance(process_output_type, str)): self.process_output_type = ProcessOutputType[process_output_type] elif(isinstance(process_output_type, int)): self.process_output_type = ProcessOutputType(process_output_type) elif(process_output_type is not None): raise ValueError except (KeyError, ValueError) as e: msg = _("ProcessorInput process_output_type field contains an unrecognized value.") raise ProcessorInputParameterError(msg) from e # Boolean properties so we don't have to import ProcessOutputType # every place we want to check these self.output_custom = (self.process_output_type == ProcessOutputType.CUSTOM) self.output_all = (self.process_output_type == ProcessOutputType.ALL) # Seed value to be used in Banff procs that allow the seed option. # Useful for comparing results between multiple runs. self.seed = int(seed) if seed is not None else None # TIME - display the time for each seqno self.time = True # TIMESTORE - Creates the jobInfoRecords dataset which contains the start, # end, and elapsed amount of time of each sequence run by the Banff Processor. self.time_store = True # no_by_stats - Determines if the no_by_stats parameter is set to True for each standard Banff Procedure # The input may be passed in as a boolean or a string, for example true, 'true' or 'True' if no_by_stats is None or str(no_by_stats).casefold() == "false": # The no_by_stats parameter cannot be set to false in the procedures, it is either True or None self.no_by_stats = None elif str(no_by_stats).casefold() == "true": self.no_by_stats = True else: msg = _("Invalid value in processor_input file: no_by_stats must be true, false or not specified.") log_lcl.exception(msg) raise ProcessorInputParameterError(msg) self.randnumvar = get_string_param_value(randnumvar) # Optional param used to override the output file format which by default # uses the same type as the input file if(save_format is not None): if(isinstance(save_format, list)): # Only save a value if it's a list with items in it self.save_format = save_format if save_format else None else: msg = _("save_format parameter of the input JSON file must be a list " "of supported file extensions or be excluded or empty.") log_lcl.exception(msg) raise ProcessorInputParameterError(msg) else: self.save_format = None # Determines if we should create the INFO and/or DEBUG log files if(log_level is None or log_level == 1): # Default option self.log_level = log_levels.INFO elif(log_level < 1): # does not create any file handlers self.log_level = None elif(log_level > 1): self.log_level = log_levels.DEBUG
[docs] @classmethod def from_file(cls, filepath: str | Path) -> "ProcessorInput": """Initialize a :class:`src.banffprocessor.processor_input.ProcessorInput` object from a JSON file. :param filepath: the full path to the JSON file containing the input parameters required to run the processor. :type filepath: str | Path :raises ProcessorInputParameterError: If the `filepath` does not contain a valid directory or JSON filename :raises FileNotFoundError: If the file at `filepath` is not able to be found :return: The object loaded from the parameters in the JSON file :rtype: :class:`src.banffprocessor.processor_input.ProcessorInput` """ filepath = Path(filepath) if(not filepath.is_absolute() or not filepath.is_file()): msg = _("Filepath is not properly configured. " "Make sure that you use the full filepath of the input JSON file.") log_lcl.exception(msg) raise ProcessorInputParameterError(msg) if(filepath.suffix.casefold() != ".json"): msg = _("input_filepath is not a JSON file. The input file parameters " "must be passed as the full filepath of the input JSON file.") log_lcl.exception(msg) raise ProcessorInputParameterError(msg) # Load input variables from json file found in processing folder try: with filepath.open() as json_input_params: json_input = json.load(json_input_params) # Build the object and return return cls(input_folder=filepath.parent, **json_input) except FileNotFoundError as e: msg = _("Unable to find JSON input file under input filepath {}").format(filepath) log_lcl.exception(msg) raise FileNotFoundError(msg) from e
[docs] def get_string_param_value(parameter_to_check: str | None) -> str | None: """Process string values from the parameter file. :param parameter_to_check: The string value to process :type parameter_to_check: str | None :return: `None` if `parameter_to_check` is `None`, empty or only whitespace and the original string with whitespace trimmed from beginning and end if not :rtype: str | None """ if parameter_to_check is None or parameter_to_check == "" or str(parameter_to_check).isspace(): return None # Otherwise apply the strip function just in case return str(parameter_to_check).strip()
[docs] def get_path_val(fpath: str | Path | None) -> Path | None: """Return the Path representation of `fpath` or None if `fpath` is None or empty. :param fpath: The filepath to convert :type fpath: str | Path | None :return: The Path representation of `fpath` :rtype: Path | None """ if(isinstance(fpath, str)): fpath = get_string_param_value(fpath) return Path(fpath) if fpath else None
[docs] class ProcessOutputType(Enum, metaclass=CaseInsensitiveEnumMeta): """Represents the different sets of outputs the processor should be creating for a job.""" MINIMAL = 1 ALL = 2 CUSTOM = 3