Source code for banffprocessor.metadata.models.donorspecs

"""Metadata model for Donor Imputation specifications."""

import duckdb

from banffprocessor.exceptions import MetadataConstraintError
from banffprocessor.metadata.models.metadataclass import MetadataClass
from banffprocessor.nls import _


[docs] class Donorspecs(MetadataClass): """Metadata class for donor imputation specifications.""" def __init__(self, specid: str, n: int, dataexclvar: str | None =None, posteditgroupid: str | None =None, mustmatchid: str | None =None, mindonors: int | None =None, pcentdonors: float | None =None, eligdon: str | None =None, random: bool | None =None, nlimit: int | None =None, mrl: float | None =None, dbconn: duckdb.DuckDBPyConnection = duckdb) -> None: """Validate and create metadata entry, if validation passes.""" self.specid = specid self.mindonors = None if mindonors is None else int(mindonors) #constraint invalidMinDonors if(self.mindonors is not None) and (self.mindonors < 1): msg = _("Constraint violated in {} table: ").format(self.__class__.__name__) msg += _("{} value must be greater than or equal to {}.").format("MinDonors", "1") raise MetadataConstraintError(msg) self.pcentdonors = None if pcentdonors is None else float(pcentdonors) #constraint invalidPcentdonors if(self.pcentdonors is not None) and (self.pcentdonors < 1 or self.pcentdonors >= 100): msg = _("Constraint violated in {} table: ").format(self.__class__.__name__) msg += _("PcentDonors value must be between 1 inclusive and 100 exclusive.") raise MetadataConstraintError(msg) self.n = int(n) #constraint invalidN if(self.n < 1): msg = _("Constraint violated in {} table: ").format(self.__class__.__name__) msg += _("{} value must be greater than or equal to {}.").format("N", "1") raise MetadataConstraintError(msg) self.eligdon = eligdon #constraint invalidEligdon if(self.eligdon is not None) and (self.eligdon.upper() not in {" ", "A", "ANY", "O", "ORIGINAL"}): msg = _("Constraint violated in {} table: ").format(self.__class__.__name__) msg += _("{} value must be one of {}.").format("Eligdon", " ' ', 'A', 'ANY', 'O', 'Original' ") raise MetadataConstraintError(msg) # Currently gives value None if field is empty or whitespace #constraint invalidRandom if random is not None and random != " ": if(random.upper() == "Y"): self.random = True elif(random.upper() == "N"): self.random = False else: msg = _("Constraint violated in {} table: ").format(self.__class__.__name__) msg += _("{} value must be one of {}.").format("Random", " ' ', 'Y' or 'N' ") raise MetadataConstraintError(msg) else: self.random = None self.nlimit = None if nlimit is None else int(nlimit) #constraint invalidNlimit if(self.nlimit is not None) and (self.nlimit < 1): msg = _("Constraint violated in {} table: ").format(self.__class__.__name__) msg += _("{} value must be greater than or equal to {}.").format("Nlimit", "1") raise MetadataConstraintError(msg) self.mrl = None if mrl is None else float(mrl) #constraint invalidMRL if(self.mrl is not None) and (self.mrl <= 0): msg = _("Constraint violated in {} table: ").format(self.__class__.__name__) msg += _("{} value must be greater than {}.").format("MRL","0") raise MetadataConstraintError(msg) self.mustmatchid = mustmatchid self.posteditgroupid = posteditgroupid self.dataexclvar = dataexclvar # Note that the order of attributes must match the order in the create statement statement = f"INSERT INTO banff.{self.__class__.__name__} VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)" # noqa: S608 dbconn.execute(statement,[self.specid, self.mindonors, self.pcentdonors, self.n, self.eligdon, self.random, self.nlimit, self.mrl, self.dataexclvar, self.mustmatchid, self.posteditgroupid])
[docs] @classmethod def initialize(cls, dbconn: duckdb.DuckDBPyConnection =duckdb) -> None: """Create duckdb table to store the metadata.""" cls.setup(dbconn=dbconn) create_statement = f"""CREATE TABLE banff.{cls.__name__} ( specid VARCHAR PRIMARY KEY, mindonors INT, pcentdonors REAL, n INT NOT NULL, eligdon VARCHAR, random BOOLEAN, nlimit INT, mrl REAL, dataexclvar VARCHAR, mustmatchid VARCHAR, posteditgroupid VARCHAR ) """ dbconn.execute(create_statement)
[docs] @staticmethod def get_schema(root_element_name: str = "banffProcessor") -> str: """Return schema (XSD) contents as a string.""" return f"""<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"> <xs:element name="{root_element_name}"> <xs:complexType> <xs:sequence> <xs:element name="donorspecs" maxOccurs="1000" minOccurs="0"> <xs:complexType> <xs:all> <xs:element name="specid"> <xs:simpleType> <xs:restriction base="xs:string"> <xs:minLength value="1"/> <xs:maxLength value="100"/> </xs:restriction> </xs:simpleType> </xs:element> <xs:element minOccurs="0" name="mindonors" nillable="true"> <xs:simpleType> <xs:restriction base="xs:positiveInteger"> <xs:minInclusive value="1"/> </xs:restriction> </xs:simpleType> </xs:element> <xs:element minOccurs="0" name="pcentdonors" nillable="true"> <xs:simpleType> <xs:restriction base="xs:float"> <xs:maxExclusive value="100"/> <xs:minInclusive value="1"/> </xs:restriction> </xs:simpleType> </xs:element> <xs:element name="n"> <xs:simpleType> <xs:restriction base="xs:positiveInteger"> <xs:minInclusive value="1"/> </xs:restriction> </xs:simpleType> </xs:element> <xs:element minOccurs="0" name="eligdon" nillable="true"> <xs:simpleType> <xs:restriction base="xs:string"> <xs:enumeration value="ANY"/> <xs:enumeration value="any"/> <xs:enumeration value="ORIGINAL"/> <xs:enumeration value="original"/> </xs:restriction> </xs:simpleType> </xs:element> <xs:element minOccurs="0" name="random" nillable="true"> <xs:simpleType> <xs:restriction base="xs:string"> <xs:enumeration value="Y"/> <xs:enumeration value="N"/> <xs:enumeration value="y"/> <xs:enumeration value="n"/> </xs:restriction> </xs:simpleType> </xs:element> <xs:element minOccurs="0" name="nlimit" nillable="true"> <xs:simpleType> <xs:restriction base="xs:positiveInteger"> <xs:minInclusive value="1"/> </xs:restriction> </xs:simpleType> </xs:element> <xs:element minOccurs="0" name="mrl" nillable="true"> <xs:simpleType> <xs:restriction base="xs:float"> <xs:minExclusive value="0"/> </xs:restriction> </xs:simpleType> </xs:element> <xs:element minOccurs="0" name="dataexclvar" nillable="true"> <xs:simpleType> <xs:restriction base="xs:string"> <xs:minLength value="0"/> <xs:maxLength value="100"/> </xs:restriction> </xs:simpleType> </xs:element> <xs:element minOccurs="0" name="mustmatchid" nillable="true"> <xs:simpleType> <xs:restriction base="xs:string"> <xs:minLength value="0"/> <xs:maxLength value="100"/> </xs:restriction> </xs:simpleType> </xs:element> <xs:element minOccurs="0" name="posteditgroupid" nillable="true"> <xs:simpleType> <xs:restriction base="xs:string"> <xs:minLength value="0"/> <xs:maxLength value="100"/> </xs:restriction> </xs:simpleType> </xs:element> </xs:all> </xs:complexType> </xs:element> </xs:sequence> </xs:complexType> </xs:element> </xs:schema>"""
[docs] @classmethod def check_constraints(cls, dbconn: duckdb.DuckDBPyConnection = duckdb) -> None: """Check constaints after all metadata has been loaded (typically foreign key constraints).""" table_name = cls.__name__ forniegn_keys = [["dataexclvar", "Expressions", "expressionid"], ["mustmatchid", "VarLists", "varlistid"], ["posteditgroupid", "Editgroups", "editgroupid"]] for item in forniegn_keys: var_name = item[0] table2_name = item[1] table2_var_name = item[2] undefined_values = dbconn.sql(f"""select distinct {var_name} from banff.{table_name} where {var_name} is not NULL and {var_name} not in (select {table2_var_name} from banff.{table2_name}) limit 5 """) # noqa: S608 if undefined_values.shape[0] > 0: values_not_found = ", ".join(undefined_values.to_df()[var_name]) cls.handle_foreign_key_violation(table_name, var_name, table2_name, table2_var_name, values_not_found)