banff.testing package#

Submodules#

banff.testing.banff_testing module#

class banff.testing.banff_testing.PytestDetermin(pytest_capture=None, sas_log_name=None, msg_list_sas=None, msg_list_sas_exact=None, msg_list_contains=None, msg_list_contains_exact=None, expected_error_count=0, expected_warning_count=0, rc_should_be_zero=True, round_data=None, drop_columns=True, expected_outdata=None, expected_outstatus=None, accept_negative=None, no_by_stats=None, edits=None, unit_id=None, by=None, indata=None, instatus=None, outdata=None, outstatus=None, presort=None, prefill_by_vars=None, trace=None, capture=False, **kwargs)[source]#

Bases: PytestProcedure

Pytest helper function for Deterministic procedure.

class banff.testing.banff_testing.PytestDonorimp(pytest_capture=None, sas_log_name=None, msg_list_sas=None, msg_list_sas_exact=None, msg_list_contains=None, msg_list_contains_exact=None, expected_error_count=0, expected_warning_count=0, rc_should_be_zero=True, round_data=None, drop_columns=True, expected_outdata=None, expected_outstatus=None, expected_outdonormap=None, expected_outmatching_fields=None, unit_id=None, by=None, must_match=None, data_excl_var=None, rand_num_var=None, random=None, seed=None, edits=None, post_edits=None, display_level=None, accept_negative=None, no_by_stats=None, min_donors=None, percent_donors=None, n=None, eligdon=None, n_limit=None, mrl=None, indata=None, instatus=None, outdata=None, outstatus=None, outdonormap=None, outmatching_fields=None, presort=None, prefill_by_vars=None, exclude_where_indata=None, trace=None, capture=False, **kwargs)[source]#

Bases: PytestProcedure

Pytest helper function for Donor Imputation procedure.

class banff.testing.banff_testing.PytestEditstat(pytest_capture=None, sas_log_name=None, msg_list_sas=None, msg_list_sas_exact=None, msg_list_contains=None, msg_list_contains_exact=None, expected_error_count=0, expected_warning_count=0, rc_should_be_zero=True, round_data=None, drop_columns=True, expected_outedit_applic=None, expected_outedit_status=None, expected_outglobal_status=None, expected_outk_edits_status=None, expected_outedits_reduced=None, expected_outvars_role=None, accept_negative=None, edits=None, by=None, indata=None, outedit_applic=None, outedit_status=None, outglobal_status=None, outk_edits_status=None, outedits_reduced=None, outvars_role=None, presort=None, trace=None, capture=False, **kwargs)[source]#

Bases: PytestProcedure

Pytest helper function for Edit Statistics procedure.

class banff.testing.banff_testing.PytestErrorloc(pytest_capture=None, sas_log_name=None, msg_list_sas=None, msg_list_sas_exact=None, msg_list_contains=None, msg_list_contains_exact=None, expected_error_count=0, expected_warning_count=0, rc_should_be_zero=True, round_data=None, drop_columns=True, expected_outstatus=None, expected_outreject=None, unit_id=None, by=None, rand_num_var=None, edits=None, weights=None, cardinality=None, time_per_obs=None, seed=None, display_level=None, accept_negative=None, no_by_stats=None, indata=None, instatus=None, outstatus=None, outreject=None, presort=None, prefill_by_vars=None, trace=None, capture=False, **kwargs)[source]#

Bases: PytestProcedure

Pytest helper function for Error Localization procedure.

class banff.testing.banff_testing.PytestEstimato(pytest_capture=None, sas_log_name=None, msg_list_sas=None, msg_list_sas_exact=None, msg_list_contains=None, msg_list_contains_exact=None, expected_error_count=0, expected_warning_count=0, rc_should_be_zero=True, round_data=None, drop_columns=True, expected_outstatus=None, expected_outdata=None, expected_outacceptable=None, expected_outest_ef=None, expected_outest_lr=None, expected_outest_parm=None, expected_outrand_err=None, unit_id=None, by=None, data_excl_var=None, hist_excl_var=None, seed=None, verify_specs=None, accept_negative=None, no_by_stats=None, indata=None, instatus=None, indata_hist=None, inalgorithm=None, inestimator=None, instatus_hist=None, outstatus=None, outdata=None, outacceptable=None, outest_ef=None, outest_lr=None, outest_parm=None, outrand_err=None, presort=None, prefill_by_vars=None, exclude_where_indata=None, exclude_where_indata_hist=None, trace=None, capture=False, **kwargs)[source]#

Bases: PytestProcedure

Pytest helper function for Estimator procedure.

class banff.testing.banff_testing.PytestMassimpu(pytest_capture=None, sas_log_name=None, msg_list_sas=None, msg_list_sas_exact=None, msg_list_contains=None, msg_list_contains_exact=None, expected_error_count=0, expected_warning_count=0, rc_should_be_zero=True, round_data=None, drop_columns=True, expected_outdata=None, expected_outstatus=None, expected_outdonormap=None, accept_negative=None, no_by_stats=None, random=None, mrl=None, percent_donors=None, min_donors=None, n_limit=None, seed=None, unit_id=None, by=None, must_impute=None, must_match=None, indata=None, outdata=None, outdonormap=None, presort=None, trace=None, capture=False, **kwargs)[source]#

Bases: PytestProcedure

Pytest helper function for Massimputation procedure.

class banff.testing.banff_testing.PytestOutlier(pytest_capture=None, sas_log_name=None, msg_list_sas=None, msg_list_sas_exact=None, msg_list_contains=None, msg_list_contains_exact=None, expected_error_count=0, expected_warning_count=0, rc_should_be_zero=True, round_data=None, drop_columns=True, expected_outstatus=None, expected_outstatus_detailed=None, expected_outsummary=None, unit_id=None, weight=None, by=None, var=None, with_var=None, accept_negative=None, no_by_stats=None, accept_zero=None, outlier_stats=None, beta_e=None, beta_i=None, exponent=None, mdm=None, mei=None, mii=None, start_centile=None, min_obs=None, method=None, side=None, sigma=None, indata=None, indata_hist=None, outstatus=None, outstatus_detailed=None, outsummary=None, presort=None, exclude_where_indata=None, trace=None, capture=False, **kwargs)[source]#

Bases: PytestProcedure

Pytest helper function for Outlier procedure.

class banff.testing.banff_testing.PytestProrate(pytest_capture=None, sas_log_name=None, msg_list_sas=None, msg_list_sas_exact=None, msg_list_contains=None, msg_list_contains_exact=None, expected_error_count=0, expected_warning_count=0, rc_should_be_zero=True, round_data=None, drop_columns=True, expected_outdata=None, expected_outstatus=None, expected_outreject=None, accept_negative=None, no_by_stats=None, verify_edits=None, lower_bound=None, upper_bound=None, decimal=None, edits=None, method=None, modifier=None, unit_id=None, by=None, indata=None, instatus=None, outstatus=None, outdata=None, outreject=None, presort=None, prefill_by_vars=None, trace=None, capture=False, **kwargs)[source]#

Bases: PytestProcedure

Pytest helper function for Prorate procedure.

class banff.testing.banff_testing.PytestVerifyed(pytest_capture=None, sas_log_name=None, msg_list_sas=None, msg_list_sas_exact=None, msg_list_contains=None, msg_list_contains_exact=None, expected_error_count=0, expected_warning_count=0, rc_should_be_zero=True, accept_negative=None, extremal=None, imply=None, edits=None, trace=None, capture=False, **kwargs)[source]#

Bases: PytestProcedure

Pytest helper function for Verify Edits procedure.

Module contents#

banff.testing.PAT_from_string(foo, sep=None)[source]#

Generate a pyarrow table from a text string.

This function aids in the creation of control datasets for test cases. It generates a pyarrow table from a specially crafted multi-line text string which provides the table’s column names, types, and values.

For example, a table having 1 string and 5 numeric columns with 2 rows of data, including an empty string in the ident column and missing value in Q4:

‘’’ s n n n n n ident total Q1 Q2 Q3 Q4 REC04 1000 150 250 130 250 ‘’ 1001 151 251 131 NaN ‘’’

Empty lines are ignored 1st line describes the column types

s- string n- numeric NOTE: types are always separated by whitespace, regardless of sep=

2nd line names the columns subsequent lines (if any) provide values for each row

Whitespace is used to delimit values in each row by default.

specify sep to override: example sep=’,’ to use commas

When fewer types (1st line) are provided that columns, the final type is applied to all remaining columns

i.e. the type line above with “s n” would produce the same result

Empty Strings:

When using the default sep=None, empty string values should be specified as empty single quotes (‘’), which are converted to an empty string. Alternatively (and if the value SHOULD be two single quotes), specify sep= to use a non-whitespace character (like a comma).

banff.testing.assert_dataset_equal(test_dataset, control_dataset, dataset_name, upcase_columns=False, sort_columns=True, sort_values=True, round_data=None, convert_columns=True, drop_columns=None, compare_with=None)[source]#

Check that test and control datasets are sufficiently equal.

Handles common issues such as
  • empty datasets

  • some type mismatches

  • different column sort order

  • float precision issues

dataset_namestr

used in print statements

sort_columnsbool

sort both dataset’s columns before comparison

sort_valuesbool

sort values of all columns in both dataset’s before comparison

round_dataint | None

If integer, round floating point values to round_data decimal places

convert_columnsbool

convert integer columns to floating point columns

drop_columnsstr | list of str

drop these columns, if found, on control datasets prior to comparison

compare_withNone | pandas.DataFrame | pyarrow.Table

Convert test and control datasets to the specified format for comparison. When unspecified (or None) use the the type that test_dataset uses.

banff.testing.assert_dataset_value(dataset=None, dataset_name='', row_num=None, col_name=None, expected_values=None)[source]#

Validate specific dataset value against set of expected values.

Specity the row number (int, 0-index) and column name (str) a list [‘of’, ‘expected’, ‘values’]

banff.testing.assert_log_consistent(msg, test_log, sas_log_path, must_exist=False, clean_whitespace=False)[source]#

Check for msg in test_log and in log file found at sas_log_path.

assert that it is either present in both, or not present in both i.e. assert fails if there’s an inconsistency w.r.t. presence

Tighten the check using must_exist=True - assert also fails if msg not found in the SAS log

banff.testing.assert_log_contains(msg, test_log, clean_whitespace=False)[source]#

Check for msg in test log.

Assert that it is found See assert_log_consistent for more details

banff.testing.assert_substr_count(substr_to_count='ERROR:', test_log=None, expected_count=0)[source]#

Calculate count of substr_to_count in test_log.

Asserts that it is equal to expected_count

banff.testing.pytest_determin#

alias of PytestDetermin

banff.testing.pytest_donorimp#

alias of PytestDonorimp

banff.testing.pytest_editstat#

alias of PytestEditstat

banff.testing.pytest_errorloc#

alias of PytestErrorloc

banff.testing.pytest_estimato#

alias of PytestEstimato

banff.testing.pytest_massimpu#

alias of PytestMassimpu

banff.testing.pytest_outlier#

alias of PytestOutlier

banff.testing.pytest_prorate#

alias of PytestProrate

banff.testing.pytest_verifyed#

alias of PytestVerifyed

banff.testing.run_pytest()[source]#

Invoke pytest from within Python code.

In files which implement pytest tests, add the following code ```python import sys if __name__ == “__main__”:

run_pytest()

``` Execute the file using python <filename> and this function will launch pytest with preset options.

banff.testing.run_standard_assertions(expect_zero_rc=True, rc=None, python_log=None, sas_log_path=None, msg_list_sas=None, msg_list_sas_exact=None, msg_list_contains=None, msg_list_contains_exact=None, expect_error_count=None, expect_warning_count=None, ds_compare_list=None, round_data=None, drop_columns=None, upcase_columns=False, pytest_capture=None)[source]#