Source code for banffprocessor.procedures.banff_procedures.outlier

import banff.exceptions
from banff import outlier
from banff._log import log_levels

# Import must be absolute in order to ensure all modules reference the same global _c_handlers
import banffprocessor.processor_logger as plg
from banffprocessor.exceptions import ProcessorInputParameterError
from banffprocessor.metadata.models.outlierspecs import Outlierspecs
from banffprocessor.nls import _
from banffprocessor.procedures import factory
from banffprocessor.processor_data import ProcessorData
from banffprocessor.util.dataset import table_empty

# Setup local log for processor module specifically
log_lcl = plg.get_processor_child_logger("outlier")

# Required Metadata files = "outlierspecs"
# Optional Metdata files = "varlists"

[docs] class Outlier: """Implements the Outlier Banff procedure as a `:class:banffprocessor.procedures.procedure_interface`.""" output_tables: tuple[str] = ("outlier_status", "outsummary")
[docs] @classmethod def execute(cls, processor_data: ProcessorData) -> int: """Execute the banff.outlier call, and returns the results.""" # alias the param name to shorten references bp = processor_data job_step = bp.current_job_step outlier_spec = bp.metaobjects.get_specs_obj(Outlierspecs, job_step.specid) varlist = [] with_varlist = [] if(outlier_spec.varid is not None): varlist = bp.metaobjects.get_varlist_fieldids(outlier_spec.varid) if(outlier_spec.withid is not None): with_varlist = bp.metaobjects.get_varlist_fieldids(outlier_spec.withid) # Choose the auxillary or historic file to use, depending on method # currently aux and historic files use the same argument in the banff proc call indata_hist_param = None # We only want to check for an extra data file if it was indicated in the metadata if(outlier_spec.numDataSets == 2): indata_aux = bp.get_dataset("indata_aux") indata_hist = bp.get_dataset("indata_hist") if(outlier_spec.method == "HISTORIC"): if(indata_hist is not None): indata_hist_param = indata_hist elif(indata_aux is not None): indata_hist_param = indata_aux else: msg = _("Outlierspecs metadata table indicated two data sets for this step " "so HISTORIC outlier method expects one of historic data file " "or auxillary data file but neither were able to be loaded.") log_lcl.exception(msg) raise ProcessorInputParameterError(msg) elif(outlier_spec.method in {"RATIO", "SIGMAP"}): if(indata_aux is not None): indata_hist_param = indata_aux elif(indata_hist is not None): indata_hist_param = indata_hist else: msg = _("Outlierspecs metadata table indicated two data sets for this step " "so RATIO or SIGMAP outlier method expects one of auxillary data file " "or historic data file but neither were able to be loaded.") log_lcl.exception(msg) raise ProcessorInputParameterError(msg) # Imputed_File should always have data by this point, but we'll make sure to pass None # instead of an empty table to the banff call just to make sure we don't pass an empty table imputed_file = bp.get_dataset("imputed_file") # Form our Banff call try: banff_call = outlier( unit_id=bp.input_params.unit_id, weight=outlier_spec.weight, by=" ".join(bp.by_varlist) if bp.by_varlist else None, var=" ".join(varlist) if varlist else None, with_var=" ".join(with_varlist) if with_varlist else None, no_by_stats=bp.input_params.no_by_stats, presort=True, # Not supposed to provide these as False, only True or None accept_negative=job_step.acceptnegative, accept_zero=outlier_spec.acceptzero, beta_e=outlier_spec.betae, beta_i=outlier_spec.betai, exponent=outlier_spec.exponent, mdm=outlier_spec.mdm, mei=outlier_spec.mei, mii=outlier_spec.mii, start_centile=outlier_spec.startcentile, min_obs=outlier_spec.minobs, method=outlier_spec.method, side=outlier_spec.side, sigma=outlier_spec.sigma, exclude_where_indata=bp.metaobjects.get_expression(outlier_spec.dataexclvar), indata=imputed_file if imputed_file is not None and not table_empty(imputed_file) else None, indata_hist=indata_hist_param, # Specify to get extra data in outstatus, used for validation in tests outlier_stats=True, outstatus="pyarrow", outstatus_detailed="pyarrow" if bp.output_required("outlier_status") else False, outsummary="pyarrow" if bp.output_required("outsummary") else False, # We want everything captured while an input param configures the handlers # which indirectly filter. trace=log_levels.NOTSET, # Note that capture=None will supress console output in new version so use False or omit logger=log_lcl, _BP_c_log_handlers=plg.get_c_handlers(), ) except banff.exceptions.ProcedureCError as e: msg = _("An error occured during execution of this procedure.") log_lcl.exception(msg) return e.return_code # Get the return code from the exception bp.outstatus = banff_call.outstatus # Will only have values if they were requested if(banff_call.outstatus_detailed): bp.set_dataset("outlier_status", banff_call.outstatus_detailed) if(banff_call.outsummary): bp.set_dataset("outsummary", banff_call.outsummary) return banff_call.rc
[docs] def register(factory: factory) -> None: """Register this procedure class in the Banff processor procedure factory.""" factory.register("outlier", Outlier)