Source code for banffprocessor.metadata.models.metadataclass

"""Abstract Class for Banff Processor Metadata models."""

from io import StringIO

import duckdb
import xmlschema
from defusedxml.ElementTree import parse

from banffprocessor.exceptions import MetadataConstraintError
from banffprocessor.nls import _


[docs] class MetadataClass: """Banff Processor Metadata Class. Abstract class definition for all metadata classes to extend, to allow for easier type hinting without needing to explicitly write out all class names and it reduces replicated code """ DATA_FIELD_SCHEMA_MAX_LENGTH: str = "64"
[docs] @staticmethod def get_schema(root_element_name: str = "banffProcessor") -> str: """Return a string that contains the XML Schema Definition for the classes metadata. By default the root element will be banffProcessor, but this may be changed as some XML generators use a standard root, like 'root' or 'data'. """
[docs] @classmethod def get_record_count(cls, dbconn: duckdb.DuckDBPyConnection = duckdb) -> int: """Return the number of records in the metadata table.""" select_statement = f"select count(*) from banff.{cls.__name__}" # noqa: S608 return dbconn.execute(select_statement).fetchone()[0]
[docs] @classmethod def load_xml(cls, xml_file_name: str) -> None: """Attempt to load the given XML file. The xml file is attempted to be loaded in the banff processor metadata based on the XMLschema. """ try: xml_tree = parse(xml_file_name) root_element_name = xml_tree.getroot().tag my_schema = xmlschema.XMLSchema(StringIO(cls.get_schema(root_element_name=root_element_name))) my_schema.validate(xml_tree) return my_schema.to_dict(xml_tree) except xmlschema.validators.exceptions.XMLSchemaValidationError as e: msg = _("XML Issue detected related to {}: {}").format(e.path, e.reason) raise MetadataConstraintError(msg) from None
[docs] @classmethod def initialize(cls, dbconn: duckdb.DuckDBPyConnection = duckdb) -> None: """Perform any initialization before loading metadata. This is typically creating the database table to store the metadata in. """
[docs] @classmethod def setup(cls, dbconn: duckdb.DuckDBPyConnection = duckdb) -> None: """Perform setup. This is called by sub-classes to ensure the the standard setup is performed during the initialization process. """ # Calling cleanup just in case the table still exists from a previous run cls.cleanup(dbconn=duckdb) # Creating the Banff schema, if it doesn't already exist dbconn.execute("CREATE SCHEMA IF NOT EXISTS banff;")
[docs] @classmethod def cleanup(cls, dbconn: duckdb.DuckDBPyConnection = duckdb) -> None: """Cleanup metadata in the given database. The metadata table will be deleted if the connection is still open and the table exists. If the database is not open, the connection object will have no default_connection attribute. """ if (hasattr(dbconn, "default_connection")): dbconn.execute(f"DROP TABLE IF EXISTS banff.{cls.__name__}")
[docs] @classmethod def check_constraints(cls, dbconn: duckdb.DuckDBPyConnection = duckdb) -> None: """Constraints to check after all metadata has been loaded. Subclasses may implement a set of checks, if necessary. If a constraint fails, an exception is raised. """
[docs] @staticmethod def handle_foreign_key_violation(table1_name :str, table1_column :str, table2_name :str, table2_column :str, values_not_found: str) -> None: """Handle foreign key violations. When a relationship error is detected, a MetadataConstraintError exception is raised. """ msg = _("Constraint violated in {} table: ").format(table1_name) msg += _("Foreign key constraint violation between {}.{} and {}.{}. ").format(table1_name, table1_column, table2_name, table2_column) msg += _("The following are not defined: {}.").format(values_not_found) raise MetadataConstraintError(msg)