Source code for banffprocessor.processor_input
import json
from enum import Enum
from pathlib import Path
from banff._log import log_levels
import banffprocessor.processor_logger as plg
from banffprocessor.exceptions import ProcessorInputParameterError
from banffprocessor.nls import _
from banffprocessor.util.case_insensitive_enum_meta import CaseInsensitiveEnumMeta
log_lcl = plg.get_processor_child_logger("processor_input")
## Specified in and loaded from main program input json file ##
[docs]
class ProcessorInput:
"""Holds input parameters from a user's JSON input file used to configure a Banff Processor job."""
def __init__(self, job_id: str,
unit_id: str | None = None, input_folder: str | Path | None = None,
indata_filename: str | Path | None = None,
auxdata_filename: str | Path | None = None, indata_aux_filename: str | Path | None = None,
histdata_filename: str | Path | None = None, indata_hist_filename: str | Path | None = None,
histstatus_filename: str | Path | None = None, instatus_hist_filename: str | Path | None = None,
instatus_filename: str | Path | None = None, user_plugins_folder: str | Path | None = None,
metadata_folder: str | Path | None = None, process_output_type: str | int | None = None,
seed: int | None = None, no_by_stats: str | bool | None = None,
randnumvar: str | None = None, save_format: list[str] | None = None,
output_folder: str | Path | None = None, log_level: int | None = None,
) -> None:
"""Construct a ProcessorInput object.
:param job_id: The job_id of the job to run.
:type job_id: str
:param unit_id: The unit_id for the job to run. Not required for UDPs, VerifyEdits,
EditStats, or 'Job' procedures but required for all other procs. Defaults to None
:type unit_id: str, optional
:param input_folder: The directory containing the JSON file used to create this object, and/or
the directory containing the input data and status files as well as sub-directories for
/metadata and /plugins. Also used to create the subdirectory /out for output files, if
no `output_folder` is provided as a part of the provided arguments.
:type input_folder: str | Path
:param indata_filename: The filename of the indata file which is required in almost
all cases, defaults to None
:type indata_filename: str | Path | None, optional
:param auxdata_filename: The filename of the auxillary data file, defaults to None. This field
is an alias for `indata_aux_filename`, they represent the same value.
:type auxdata_filename: str | Path | None, optional
:param indata_aux_filename: The filename of the auxillary data file, defaults to None
:type indata_aux_filename: str | Path | None, optional
:param histdata_filename: The filename of the historical data file, defaults to None. This field
is an alias for `indata_hist_filename`, they represent the same value.
:type histdata_filename: str | Path | None, optional
:param indata_hist_filename: The filename of the auxillary data file, defaults to None
:type indata_hist_filename: str | Path | None, optional
:param histstatus_filename: The filename of the historical status file, defaults to None. This field
is an alias for `instatus_hist_filename`, they represent the same value.
:type histstatus_filename: str | Path | None, optional
:param instatus_hist_filename: The filename of the auxillary data file, defaults to None
:type instatus_hist_filename: str | Path | None, optional
:param instatus_filename: The filename of the instatus file, defaults to None
:type instatus_filename: str | Path | None, optional
:param user_plugins_folder: The directory where the user defined plugin modules for this job
are located, defaults to None
:type user_plugins_folder: str | Path | None, optional
:param metadata_folder: The directory where the metadata files for this job are located,
defaults to None
:type metadata_folder: str | Path | None, optional
:param process_output_type: Specifies a level which determines the types of output files
that will be saved/produced, defaults to None
:type process_output_type: str | int | None, optional
:param seed: _description_, defaults to None
:type seed: int | None, optional
:param no_by_stats: _description_, defaults to None
:type no_by_stats: str | bool | None, optional
:param randnumvar: _description_, defaults to None
:type randnumvar: str | None, optional
:param save_format: _description_, defaults to None
:type save_format: list[str] | None, optional
:param output_folder: _description_, defaults to None
:type output_folder: str | Path | None, optional
:param log_level: _description_, defaults to None
:type log_level: str | None, optional
:raises ProcessorInputParameterError: If no valid `job_id` or `unit_id` is provided
:raises ProcessorInputParameterError: If the `save_format` parameter is provided in
an improper format
"""
# input_folder is technically not required if the user provides explicit locations
# for their metadata files and their output folder
if(not input_folder and not (metadata_folder and output_folder)):
msg = _("Input parameter input_folder must be provided unless both "
"metadata_folder and output_folder are provided.")
log_lcl.exception(msg)
raise ValueError(msg)
# NOT FOUND IN ACTUAL INPUT JSON FILE, VALUE SHOULD BE THE FOLDER THE FILE IS FOUND IN
# this folder is used as a default file location if the path to a required file was
# not explicitly defined in any of the other input parameters such as indata_filename
self.input_folder = get_path_val(input_folder)
def abs_path(file_or_dir: Path | str | None, param_name: str) -> Path | None:
"""Convert file_or_dir to an absolute path.
If file_or_dir is a non-absolute path it is treated as relative to the input_folder and
the final path will begin with input_folder.
:param file_or_dir: The filepath to convert.
:type file_or_dir: str | Path | None
:param param_name: The name of the parameter being processed (for error reporting)
:type param_name: str
:return: file_or_dir as represented by a Path object.
:rtype: Path | None
"""
file_or_dir = get_path_val(file_or_dir)
if(not file_or_dir):
return None
output_parents = False
if(not file_or_dir.is_absolute()):
if(self.input_folder):
file_or_dir = (self.input_folder / file_or_dir).resolve()
# Specifically for output_folder, if given as relative path,
# we want to create any parent folders of the final output directory
# if it doesn't yet exist. If output_folder is given as abolute
# we DON'T want to create the parent folders, only the final output directory.
output_parents = True
else:
msg = _("Input folder was not given but {} is a relative path.").format(param_name)
log_lcl.exception(msg)
raise ProcessorInputParameterError(msg)
if(not file_or_dir.exists()):
if(param_name == "output_folder"):
try:
file_or_dir.mkdir(parents=output_parents)
except FileNotFoundError as e:
msg = _("Parent folders in input parameter filepath output_folder: {} could not be "
"found so output folder could not be created.").format(str(file_or_dir))
log_lcl.exception(msg)
raise ProcessorInputParameterError(msg) from e
else:
msg = _("Filepath in input parameter {}: {} is not accesible or does "
"not exist.").format(param_name, str(file_or_dir))
log_lcl.exception(msg)
raise ProcessorInputParameterError(msg)
return file_or_dir
# Optional alternate save location for output files
self.output_folder = abs_path(output_folder, "output_folder")
if(not self.output_folder):
# Create output folder if one wasn't provided
self.output_folder = self.input_folder / "out"
if not self.output_folder.exists():
self.output_folder.mkdir()
# Optional alternate folder location of metadata files
self.metadata_folder = abs_path(metadata_folder, "metadata_folder")
# Load all of our metadata
if(not self.metadata_folder):
# The user may or may not have opted for a sub-folder
meta_path = self.input_folder / "metadata"
# If no specific metadata folder is given, check for a "metadata" subfolder in the
# input_folder. If this doesn't exist just use the input_folder
self.metadata_folder = meta_path if meta_path.exists() else self.input_folder
# Optional alternate folder location of user program files
self.user_plugins_folder = abs_path(user_plugins_folder, "user_plugins_folder")
# Check if the user provided a custom folder path
if(not self.user_plugins_folder and self.input_folder):
# If not see if their plugins are in a "plugins" subfolder in the input folder
plugins_path = self.input_folder / "plugins"
if(plugins_path.exists()):
self.user_plugins_folder = plugins_path
self.job_id = get_string_param_value(job_id)
if(self.job_id is None):
msg = _("No valid job_id found in processor_input file.")
log_lcl.exception(msg)
raise ProcessorInputParameterError(msg)
self.unit_id = get_string_param_value(unit_id)
# Allowed to be none in the case of a job only performing "Verifyedits"
# Also should allow either just filename or a full filepath
# If just a filename is provided, file will be searched for in the input_folder
# otherwise the full filepath is used for loading the file into a dataframe
self.indata_filename = abs_path(indata_filename, "indata_filename")
# statusAll may be provided for input to any procs that require an instatus
# parameter, if there is no statusAll file created by any prior procs in the job
self.instatus_filename = abs_path(instatus_filename, "instatus_filename")
if(indata_aux_filename):
self.indata_aux_filename = abs_path(indata_aux_filename, "indata_aux_filename")
else:
self.indata_aux_filename = abs_path(auxdata_filename, "auxdata_filename")
if(indata_hist_filename):
self.indata_hist_filename = abs_path(indata_hist_filename, "indata_hist_filename")
else:
self.indata_hist_filename = abs_path(histdata_filename, "histdata_filename")
if(instatus_hist_filename):
self.instatus_hist_filename = abs_path(instatus_hist_filename, "instatus_hist_filename")
else:
self.instatus_hist_filename = abs_path(histstatus_filename, "histstatus_filename")
# Output type option to allow users to specify the level of information
# recorded and output from their respective procs
try:
self.process_output_type = None
if(isinstance(process_output_type, str)):
self.process_output_type = ProcessOutputType[process_output_type]
elif(isinstance(process_output_type, int)):
self.process_output_type = ProcessOutputType(process_output_type)
elif(process_output_type is not None):
raise ValueError
except (KeyError, ValueError) as e:
msg = _("ProcessorInput process_output_type field contains an unrecognized value.")
raise ProcessorInputParameterError(msg) from e
# Boolean properties so we don't have to import ProcessOutputType
# every place we want to check these
self.output_custom = (self.process_output_type == ProcessOutputType.CUSTOM)
self.output_all = (self.process_output_type == ProcessOutputType.ALL)
# Seed value to be used in Banff procs that allow the seed option.
# Useful for comparing results between multiple runs.
self.seed = int(seed) if seed is not None else None
# TIME - display the time for each seqno
self.time = True
# TIMESTORE - Creates the jobInfoRecords dataset which contains the start,
# end, and elapsed amount of time of each sequence run by the Banff Processor.
self.time_store = True
# no_by_stats - Determines if the no_by_stats parameter is set to True for each standard Banff Procedure
# The input may be passed in as a boolean or a string, for example true, 'true' or 'True'
if no_by_stats is None or str(no_by_stats).casefold() == "false":
# The no_by_stats parameter cannot be set to false in the procedures, it is either True or None
self.no_by_stats = None
elif str(no_by_stats).casefold() == "true":
self.no_by_stats = True
else:
msg = _("Invalid value in processor_input file: no_by_stats must be true, false or not specified.")
log_lcl.exception(msg)
raise ProcessorInputParameterError(msg)
self.randnumvar = get_string_param_value(randnumvar)
# Optional param used to override the output file format which by default
# uses the same type as the input file
if(save_format is not None):
if(isinstance(save_format, list)):
# Only save a value if it's a list with items in it
self.save_format = save_format if save_format else None
else:
msg = _("save_format parameter of the input JSON file must be a list "
"of supported file extensions or be excluded or empty.")
log_lcl.exception(msg)
raise ProcessorInputParameterError(msg)
else:
self.save_format = None
# Determines if we should create the INFO and/or DEBUG log files
if(log_level is None or log_level == 1):
# Default option
self.log_level = log_levels.INFO
elif(log_level < 1):
# does not create any file handlers
self.log_level = None
elif(log_level > 1):
self.log_level = log_levels.DEBUG
[docs]
@classmethod
def from_file(cls, filepath: str | Path) -> "ProcessorInput":
"""Initialize a :class:`src.banffprocessor.processor_input.ProcessorInput` object from a JSON file.
:param filepath: the full path to the JSON file containing the input parameters
required to run the processor.
:type filepath: str | Path
:raises ProcessorInputParameterError: If the `filepath` does not contain a valid
directory or JSON filename
:raises FileNotFoundError: If the file at `filepath` is not able to be found
:return: The object loaded from the parameters in the JSON file
:rtype: :class:`src.banffprocessor.processor_input.ProcessorInput`
"""
filepath = Path(filepath)
if(not filepath.is_absolute() or not filepath.is_file()):
msg = _("Filepath is not properly configured. "
"Make sure that you use the full filepath of the input JSON file.")
log_lcl.exception(msg)
raise ProcessorInputParameterError(msg)
if(filepath.suffix.casefold() != ".json"):
msg = _("input_filepath is not a JSON file. The input file parameters "
"must be passed as the full filepath of the input JSON file.")
log_lcl.exception(msg)
raise ProcessorInputParameterError(msg)
# Load input variables from json file found in processing folder
try:
with filepath.open() as json_input_params:
json_input = json.load(json_input_params)
# Build the object and return
return cls(input_folder=filepath.parent, **json_input)
except FileNotFoundError as e:
msg = _("Unable to find JSON input file under input filepath {}").format(filepath)
log_lcl.exception(msg)
raise FileNotFoundError(msg) from e
[docs]
def get_string_param_value(parameter_to_check: str | None) -> str | None:
"""Process string values from the parameter file.
:param parameter_to_check: The string value to process
:type parameter_to_check: str | None
:return: `None` if `parameter_to_check` is `None`, empty or only whitespace
and the original string with whitespace trimmed from beginning and end if not
:rtype: str | None
"""
if parameter_to_check is None or parameter_to_check == "" or str(parameter_to_check).isspace():
return None
# Otherwise apply the strip function just in case
return str(parameter_to_check).strip()
[docs]
def get_path_val(fpath: str | Path | None) -> Path | None:
"""Return the Path representation of `fpath` or None if `fpath` is None or empty.
:param fpath: The filepath to convert
:type fpath: str | Path | None
:return: The Path representation of `fpath`
:rtype: Path | None
"""
if(isinstance(fpath, str)):
fpath = get_string_param_value(fpath)
return Path(fpath) if fpath else None
[docs]
class ProcessOutputType(Enum, metaclass=CaseInsensitiveEnumMeta):
"""Represents the different sets of outputs the processor should be creating for a job."""
MINIMAL = 1
ALL = 2
CUSTOM = 3