Source code for banffprocessor.metadata.models.estimatorspecs

"""Metadata model for Estimator specifications."""

import duckdb

from banffprocessor.metadata.models.metadataclass import MetadataClass


[docs] class Estimatorspecs(MetadataClass): """Metadata class for Estimator specifications.""" def __init__(self, specid: str, estimatorid: str, dataexclvar: str | None = None, histexclvar: str | None = None, dbconn: duckdb.DuckDBPyConnection = duckdb) -> None: """Validate and create metadata entry, if validation passes.""" self.specid = specid self.estimatorid = estimatorid self.dataexclvar = dataexclvar self.histexclvar = histexclvar # Note that the order of attributes must match the order in the create statement statement = f"INSERT INTO banff.{self.__class__.__name__} VALUES (?, ?, ?, ?)" # noqa: S608 dbconn.execute(statement,[self.specid, self.dataexclvar, self.histexclvar, self.estimatorid])
[docs] @classmethod def initialize(cls, dbconn: duckdb.DuckDBPyConnection = duckdb)-> None: """Create duckdb table to store the metadata.""" cls.setup(dbconn=dbconn) create_statement = f"""CREATE TABLE banff.{cls.__name__} ( specid VARCHAR PRIMARY KEY, dataexclvar VARCHAR, histexclvar VARCHAR, Estimatorid VARCHAR NOT NULL ) """ dbconn.execute(create_statement)
[docs] @staticmethod def get_schema(root_element_name: str = "banffProcessor") -> str: """Return schema (XSD) contents as a string.""" return f"""<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"> <xs:element name="{root_element_name}"> <xs:complexType> <xs:sequence> <xs:element name="estimatorspecs" maxOccurs="5000" minOccurs="0"> <xs:complexType> <xs:all> <xs:element name="specid"> <xs:simpleType> <xs:restriction base="xs:string"> <xs:minLength value="1"/> <xs:maxLength value="100"/> </xs:restriction> </xs:simpleType> </xs:element> <xs:element minOccurs="0" name="dataexclvar" nillable="true"> <xs:simpleType> <xs:restriction base="xs:string"> <xs:minLength value="1"/> <xs:maxLength value="100"/> </xs:restriction> </xs:simpleType> </xs:element> <xs:element minOccurs="0" name="histexclvar" nillable="true"> <xs:simpleType> <xs:restriction base="xs:string"> <xs:minLength value="1"/> <xs:maxLength value="100"/> </xs:restriction> </xs:simpleType> </xs:element> <xs:element name="estimatorid"> <xs:simpleType> <xs:restriction base="xs:string"> <xs:minLength value="1"/> <xs:maxLength value="100"/> </xs:restriction> </xs:simpleType> </xs:element> </xs:all> </xs:complexType> </xs:element> </xs:sequence> </xs:complexType> </xs:element> </xs:schema>"""
[docs] @classmethod def check_constraints(cls, dbconn: duckdb.DuckDBPyConnection = duckdb) -> None: """Check constaints after all metadata has been loaded (typically foreign key constraints).""" table_name = cls.__name__ forniegn_keys = [["dataexclvar", "Expressions", "expressionid"], ["histexclvar", "Expressions", "expressionid"]] for item in forniegn_keys: var_name = item[0] table2_name = item[1] table2_var_name = item[2] undefined_values = dbconn.sql(f"""select distinct {var_name} from banff.{table_name} where {var_name} is not NULL and {var_name} not in (select {table2_var_name} from banff.{table2_name}) limit 5 """) # noqa: S608 if undefined_values.shape[0] > 0: values_not_found = ", ".join(undefined_values.to_df()[var_name]) cls.handle_foreign_key_violation(table_name, var_name, table2_name, table2_var_name, values_not_found)