Source code for py_hla_match.matching

import logging
from typing import List, Tuple, Optional
from dataclasses import dataclass

from py_hla_match.models import HLAPair, Individual
from py_hla_match.hla import HLA
from py_hla_match.policy import (
    AlleleMatchLevel,
    ExpressionSuffixMatchLevel,
    ARDMatchLevel,
    ARDMatchLevelCertainty,
    MolecularMatchLevel,
    MolecularMatchLevelCertainty
)
from py_hla_match.config import (
    get_config,
    HLAMatchConfig,
)
from py_hla_match.exceptions import (
    InvalidLocusComparisonError,
    ARDMatchRefinementError
)
from py_hla_match.external import DPB1Result, query_dpb1_tce
from py_hla_match.singleton import get_ard_instance


logger = logging.getLogger(__name__)


[docs] class MatchResult: """ Result object for comparing two HLA genotype pairs at a single locus. This class is designed for research use to describe HLA match or mismatch categories between two individuals. :ivar patient: HLA allele pair in the 'patient' role. :ivar donor: HLA allele pair in the 'donor' role. :ivar pairing_score: Internal ordinal score summarising the two ``AlleleMatchLevel`` values. :ivar allele_match_levels: Tuple of ``AlleleMatchLevel`` values for the two allele-level comparisons (patient allele 1 vs donor allele X, patient allele 2 vs donor allele Y). :ivar ard_match_levels: Tuple of ``ARDMatchLevel`` values refining ARD-equivalent allele pairs (``NOT_APPLICABLE`` if not ARD-matched). :ivar ard_match_certainties: Tuple of ``ARDMatchLevelCertainty`` values indicating how certain the ARD refinement is given typing resolution. :ivar molecular_match_levels: Tuple of ``MolecularMatchLevel`` values refining ARD-equivalent allele pairs at 1–4-field level (``NOT_APPLICABLE`` if not ARD-matched). :ivar molecular_match_certainties: Tuple of ``MolecularMatchLevelCertainty`` values indicating how certain the molecular refinement is given typing resolution. :ivar dpb1_tce_status: Optional DPB1 permissive/non-permissive classification from the EBI TCE API (only populated for DPB1 loci). :ivar is_homozygous_patient: ``True`` if the patient is homozygous at this locus at ARD-reduced level, ``False`` if heterozygous, or ``None`` if ARD-reduced alleles are not available. """ def __init__( self, patient: HLAPair, donor: HLAPair, pairing_score: int, allele_match_levels: Tuple[AlleleMatchLevel, AlleleMatchLevel], ard_match_levels: Optional[ Tuple[ARDMatchLevel, ARDMatchLevel] ] = None, ard_match_level_certainty: Optional[ Tuple[ARDMatchLevelCertainty, ARDMatchLevelCertainty] ] = None, molecular_match_levels: Optional[ Tuple[MolecularMatchLevel, MolecularMatchLevel] ] = None, molecular_match_level_certainty: Optional[ Tuple[ MolecularMatchLevelCertainty, MolecularMatchLevelCertainty ] ] = None, ) -> None: self.patient = patient self.donor = donor self.pairing_score = pairing_score self.allele_match_levels = allele_match_levels if ard_match_levels is None: self.ard_match_levels = ( ARDMatchLevel.NOT_APPLICABLE, ARDMatchLevel.NOT_APPLICABLE, ) else: self.ard_match_levels = ard_match_levels if ard_match_level_certainty is None: self.ard_match_certainties = ( ARDMatchLevelCertainty.NOT_APPLICABLE, ARDMatchLevelCertainty.NOT_APPLICABLE, ) else: self.ard_match_certainties = ard_match_level_certainty if molecular_match_levels is None: self.molecular_match_levels = ( MolecularMatchLevel.NOT_APPLICABLE, MolecularMatchLevel.NOT_APPLICABLE, ) else: self.molecular_match_levels = molecular_match_levels if molecular_match_level_certainty is None: self.molecular_match_certainties = ( MolecularMatchLevelCertainty.NOT_APPLICABLE, MolecularMatchLevelCertainty.NOT_APPLICABLE, ) else: self.molecular_match_certainties = molecular_match_level_certainty # optional external matching information self.dpb1_tce_result: Optional[DPB1Result] = None # check homozygous patient # TODO: homozygosity check currently is capped at ARD which may not # be considered *true* self.is_homozygous_patient = ( # get boolean if patient alleles are equal ( self.patient.hla1.ard_redux_allele_string == self.patient.hla2.ard_redux_allele_string ) # if ard redux is available if (self.patient.hla1.ard_redux_allele_string and self.patient.hla2.ard_redux_allele_string) # else None else None )
[docs] def get_match_level_for_resolution(self, resolution: str) -> str: """ Get locus-level match category for a given resolution. :param resolution: Resolution level (``"basic"`` or ``"high"``). :return: Match level for the given resolution as a string. :raises ValueError: If an unknown resolution level is requested. """ if resolution == "basic": return self.loci_match_basic_resolution elif resolution == "high": return self.loci_match_high_resolution else: raise ValueError( f"Unknown resolution level: {resolution}\n" f"Expected 'basic', 'high'." )
@property def loci_match_basic_resolution(self): if not hasattr(self, '_locus_match_basic_resolution'): self._locus_match_basic_resolution = \ self._loci_level_match('basic_resolution') return self._locus_match_basic_resolution @property def loci_match_high_resolution(self): if not hasattr(self, '_locus_match_high_resolution'): self._locus_match_high_resolution = \ self._loci_level_match('high_resolution') return self._locus_match_high_resolution def _loci_level_match(self, resolution): """ Locus-level match category based on AlleleMatchLevels. """ match_level_1, match_level_2 = self.allele_match_levels if ( match_level_1 is AlleleMatchLevel.NOT_ASSESSABLE or match_level_2 is AlleleMatchLevel.NOT_ASSESSABLE ): return AlleleMatchLevel.NOT_ASSESSABLE.name if resolution == 'basic_resolution': return self._calculate_loci_match_basic_resolution( match_level_1, match_level_2 ) elif resolution == 'high_resolution': return self._calculate_loci_match_high_resolution( match_level_1, match_level_2 ) else: raise ValueError( f"Unknown resolution level: {resolution}\n" f"Expected 'basic_resolution', 'high_resolution'." ) def _calculate_loci_match_basic_resolution( self, match_level_1, match_level_2 ): """ TODO: base on domain expert Determines the basic resolution match status based on the allele match levels. :return: "ARD_MATCH", "PARTIAL_ARD_MISMATCH", or "ARD_MISMATCH" """ # type check if not all( isinstance(level, AlleleMatchLevel) for level in [match_level_1, match_level_2] ): raise TypeError( f"match_level_1 and match_level_2 must be instances of " f"{AlleleMatchLevel}, not {type(match_level_1)} and " f"{type(match_level_2)}." ) # Group AlleleMatchLevels into basic resolution match and mismatch # levels match_levels = { AlleleMatchLevel.ARD_MATCH } mismatch_levels = { AlleleMatchLevel.DRB345_SUBLOCUS_MISMATCH, AlleleMatchLevel.ANTIGEN_MISMATCH, AlleleMatchLevel.ALLELE_MISMATCH, } # Combine grouped basic resolution match level to "MATCH" if match_level_1 in match_levels and match_level_2 in match_levels: return "ARD_MATCH" # Partial mismatch if one allele is basic resolution match level and # the other is basic resolution mismatch level elif ( match_level_1 in match_levels and match_level_2 in mismatch_levels ) or ( match_level_1 in mismatch_levels and match_level_2 in match_levels ): return "PARTIAL_ARD_MISMATCH" # Both alleles are basic mismatch level else: return "ARD_MISMATCH" def _calculate_loci_match_high_resolution( self, match_level_1, match_level_2 ): """ TODO: base on domain expert Determines the high resolution match status with detailed mismatch types. :return: A string indicating the match status with high resolution mismatches. """ # type check if not all( isinstance(level, AlleleMatchLevel) for level in [match_level_1, match_level_2] ): raise TypeError( f"match_level_1 and match_level_2 must be instances of " f"{AlleleMatchLevel}, not {type(match_level_1)} and " f"{type(match_level_2)}." ) # Group AlleleMatchLevels into high resolution match and mismatch # levels match_levels = { AlleleMatchLevel.ARD_MATCH } mismatch_levels = { AlleleMatchLevel.DRB345_SUBLOCUS_MISMATCH, AlleleMatchLevel.ANTIGEN_MISMATCH, AlleleMatchLevel.ALLELE_MISMATCH, } # Combine grouped high resolution match level to "MATCH" if match_level_1 in match_levels and match_level_2 in match_levels: return "ARD_MATCH" # Partial mismatch if one allele is high resolution match level and # the other is high resolution mismatch level elif ( match_level_1 in match_levels and match_level_2 in mismatch_levels ): # resolve high resolution mismatch level return f"PARTIAL_{match_level_2.name}" elif ( match_level_1 in mismatch_levels and match_level_2 in match_levels ): # resolve high resolution mismatch level return f"PARTIAL_{match_level_1.name}" # Both alleles are high resolution mismatch level elif ( match_level_1 in mismatch_levels and match_level_2 in mismatch_levels and match_level_1 < match_level_2 # Order of mismatch "severity" ): return f"{match_level_1.name}_AND_{match_level_2.name}" elif ( match_level_1 in mismatch_levels and match_level_2 in mismatch_levels and match_level_1 > match_level_2 # Order of mismatch "severity" ): return f"{match_level_2.name}_AND_{match_level_1.name}" # Additional sanity check elif ( match_level_1 in mismatch_levels and match_level_2 in mismatch_levels and match_level_1 is match_level_2 ): return f"DOUBLE_{match_level_1.name}" # TODO: discuss terminology else: raise ValueError( f"Unexpected match levels {match_level_1.name}" f"and {match_level_2.name}" ) @staticmethod def _api_allele(hla: HLA) -> Optional[str]: """ Get highest resolution string to query EBI TCE API. """ if hla.ard_redux_allele_string: return hla.ard_redux_allele_string if hla.allele_group: return f"{hla.locus}*{hla.allele_group}" return None
[docs] def get_dpb1_tce_status( self, api_version: str = "3.0", timeout: int = 10 ) -> Optional[DPB1Result]: """ Calculate DPB1 permissive/non-permissive classification via EBI API. Intended for research workflows. WARNING: may slow things down significantly. Sets self.dpb1_tce_result to one of: - DPB1Result :param api_version: The version of the EBI API to query (default "3.0") :param timeout: Time in seconds to wait for the API response :return: The DPB1Result object, or None if the locus is not DPB1 """ if self.patient.locus != "DPB1": logger.debug( f"Not applicable to {self.patient.locus}." ) return None patient_dpb1 = self._api_allele(self.patient.hla1) patient_dpb2 = self._api_allele(self.patient.hla2) donor_dpb1 = self._api_allele(self.donor.hla1) donor_dpb2 = self._api_allele(self.donor.hla2) if not all( [patient_dpb1, patient_dpb2, donor_dpb1, donor_dpb2] ): logger.warning( f"One or more required alleles are missing for DPB1 to call " f"EBI API, got P1:'{patient_dpb1}', P2:'{patient_dpb2}', " f"D1:'{donor_dpb1}', D2:'{donor_dpb2}'. dpb1_tce_result " f"remains unchanged ({self.dpb1_tce_result})" ) return None # if we are here, the query should be valid dpb1_tce_result = query_dpb1_tce( patient_dpb1=patient_dpb1, patient_dpb2=patient_dpb2, donor_dpb1=donor_dpb1, donor_dpb2=donor_dpb2, version=api_version, timeout=timeout ) # update match result and return DPB1Result self.dpb1_tce_result = dpb1_tce_result return self.dpb1_tce_result
@dataclass(frozen=True) class _PairingResult: """ Internal result from allele pairing. Intended for research workflows. This dataclass stores match levels and certainties of allele pairings. Used internally by `_get_correct_allele_pairing`. Attributes: score (int): Sum of AlleleMatchLevel values for both allele pairs. Used as primary criterion for selecting optimal pairing. allele_match_levels (Tuple[AlleleMatchLevel, AlleleMatchLevel]): ARD-based match level for each paired allele comparison. ard_match_levels (Tuple[ARDMatchLevel, ARDMatchLevel]): G-group vs P-group refinement for ARD-matched alleles. NOT_APPLICABLE if AlleleMatchLevel != ARD_MATCH. ard_match_certainties ( Tuple[ARDMatchLevelCertainty, ARDMatchLevelCertainty] ): Certainty of ARD match level given typing resolution. UNCERTAIN indicates a higher ARDMatchLevel may be possible. molecular_match_levels ( Tuple[MolecularMatchLevel, MolecularMatchLevel] ): Field-by-field identity refinement for ARD-matched alleles. NOT_APPLICABLE if AlleleMatchLevel != ARD_MATCH. molecular_match_certainties ( Tuple[MolecularMatchLevelCertainty, MolecularMatchLevelCertainty] ): Certainty of molecular match level given typing resolution. UNCERTAIN indicates a higher MolecularMatchLevel may be possible. """ score: int allele_match_levels: Tuple[AlleleMatchLevel, AlleleMatchLevel] ard_match_levels: Tuple[ARDMatchLevel, ARDMatchLevel] ard_match_certainties: Tuple[ ARDMatchLevelCertainty, ARDMatchLevelCertainty ] molecular_match_levels: Tuple[MolecularMatchLevel, MolecularMatchLevel] molecular_match_certainties: Tuple[ MolecularMatchLevelCertainty, MolecularMatchLevelCertainty ] def _map_expression_decision( decision: ExpressionSuffixMatchLevel, ) -> Optional[AlleleMatchLevel]: if decision is ExpressionSuffixMatchLevel.IGNORE: return None mapping = { ExpressionSuffixMatchLevel.NOT_ASSESSABLE: AlleleMatchLevel.NOT_ASSESSABLE, ExpressionSuffixMatchLevel.ALLELE_MISMATCH: AlleleMatchLevel.ALLELE_MISMATCH, ExpressionSuffixMatchLevel.ANTIGEN_MISMATCH: AlleleMatchLevel.ANTIGEN_MISMATCH, ExpressionSuffixMatchLevel.ARD_MATCH: AlleleMatchLevel.ARD_MATCH, } return mapping[decision] def _apply_expression_suffix_policy( hla1: HLA, hla2: HLA, cfg: HLAMatchConfig ) -> Optional[AlleleMatchLevel]: """ Apply configurable expression-suffix policy once ARD is equivalent. """ suffix1, suffix2 = hla1.suffix, hla2.suffix if suffix1 is None and suffix2 is None: return None rules = cfg.expression_suffix_policy # Any 'Q' present (defaults to NOT_ASSESSABLE) if ( (suffix1 in rules.ambiguous_suffixes) or (suffix2 in rules.ambiguous_suffixes) ): return _map_expression_decision(rules.q_present) # Any risk suffixes present risk = rules.risk_suffixes risk1 = suffix1 in risk if suffix1 is not None else False risk2 = suffix2 in risk if suffix2 is not None else False if risk1 and risk2: if suffix1 == suffix2: return _map_expression_decision(rules.equal_risk) return _map_expression_decision(rules.risk_vs_different_risk) if (risk1 and suffix2 is None) or (risk2 and suffix1 is None): return _map_expression_decision(rules.risk_vs_none) return None
[docs] def allele_match(hla1: HLA, hla2: HLA) -> AlleleMatchLevel: """ Compares two HLA alleles and returns a MatchLevel :param hla1: First HLA allele object :param hla2: Second HLA allele object :return: MatchLevel enum value indicating position of matches and mismatch (cf. HLA nomenclature) :raises TypeError: If hla1 or hla2 is not an instance of HLA :raises InvalidLocusComparisonError: If hla1 and hla2 have incompatible loci """ if not isinstance(hla1, HLA): raise TypeError( f"hla1 must be an instance of HLA, not {type(hla1).__name__}." ) if not isinstance(hla2, HLA): raise TypeError( f"hla2 must be an instance of HLA, not {type(hla2).__name__}." ) # (1) LOCUS and LOW-RES comparison # first check if loci match (NOTE: DRB3/4/5 hard coded to locus DRB345) if hla1.locus != hla2.locus: raise InvalidLocusComparisonError(hla1.locus, hla2.locus) # for locus DRB345, we still stored the original DRB3/4/5 sub-locus if hla1.locus == 'DRB345' and hla1.drb_sub_locus != hla2.drb_sub_locus: return AlleleMatchLevel.DRB345_SUBLOCUS_MISMATCH if min( hla1.has_resolution_level(), hla2.has_resolution_level() ) < 1: # no allele fields return AlleleMatchLevel.NOT_ASSESSABLE if min( hla1.has_resolution_level(), hla2.has_resolution_level() ) < 2: # check if allele groups differ if hla1.allele_group != hla2.allele_group: return AlleleMatchLevel.ANTIGEN_MISMATCH # else we cannot determine a match level (missing data) else: return AlleleMatchLevel.NOT_ASSESSABLE # --- from here on we have at least two-field resolution --- # (2) TWO-FIELD COMPARISON # check for allele group mismatch if hla1.allele_group != hla2.allele_group: return AlleleMatchLevel.ANTIGEN_MISMATCH if ( hla1.ard_redux_allele_string is None or hla2.ard_redux_allele_string is None ): # NOTE: this should never happen (!) raise RuntimeError( f"HLA parsing failed for '{hla1.allele_string}' or " f"'{hla2.allele_string}'. Please report this issue." ) if hla1.ard_redux_allele != hla2.ard_redux_allele: return AlleleMatchLevel.ALLELE_MISMATCH # (3) EXPRESSION COMPARISON (suffixes) # NOTE: we may need to move expression comparison to be evaluated # directly after locus comparison # Check for suffix if ( hla1.suffix is not None or hla2.suffix is not None ): suffix_level = _apply_expression_suffix_policy( hla1, hla2, get_config() ) if suffix_level is not None: return suffix_level # from here on we have at least an ARD level match that is NOT effected by # expression differences (suffixes) # (3) ARD MATCH return AlleleMatchLevel.ARD_MATCH
def _refine_ard_match_level_by_group_association( hla1: HLA, hla2: HLA, allele_match_level: AlleleMatchLevel ) -> tuple[ARDMatchLevel, ARDMatchLevelCertainty]: """ Compares two ARD-matched HLA alleles and returns an ARDMatchLevel Args: hla1: First HLA allele object hla2: Second HLA allele object allele_match_level: AlleleMatchLevel of hla1 and hla2 Returns: Tuple[ARDMatchLevel, ARDMatchLevelCertainty] ARDMatchLevel IntEnum value indicating level of ARD matching ARDMatchLevelCertainty Enum indicating certainty of ARD match level Raises: TypeError: If hla1 or hla2 is not an instance of HLA InvalidLocusComparisonError: If hla1 and hla2 have incompatible loci Only applicable to AlleleMatchLevel == ARD_MATCH Otherwise, returns NOT_APPLICABLE for both level and certainty """ # sanity checks if not isinstance(hla1, HLA): raise TypeError( f"hla1 must be an instance of HLA, not {type(hla1).__name__}." ) if not isinstance(hla2, HLA): raise TypeError( f"hla2 must be an instance of HLA, not {type(hla2).__name__}." ) if hla1.locus != hla2.locus: raise InvalidLocusComparisonError(hla1.locus, hla2.locus) # additional safeguards against misuse is_claimed_ard_match = (allele_match_level is AlleleMatchLevel.ARD_MATCH) # HLA class guarantees valid hla two-field allele if redux worked has_ard_data = ( hla1.ard_redux_allele_string is not None and hla2.ard_redux_allele_string is not None ) # still redux string must be equal to confirm ARD_MATCH is_actual_ard_match = ( has_ard_data and hla1.ard_redux_allele_string == hla2.ard_redux_allele_string ) if is_claimed_ard_match and not has_ard_data: raise ARDMatchRefinementError( f"ARD_MATCH but ARD reduction data missing. " f"hla1.ard_redux_allele_string={hla1.ard_redux_allele_string}, " f"hla2.ard_redux_allele_string={hla2.ard_redux_allele_string}" ) if is_claimed_ard_match and not is_actual_ard_match: raise ARDMatchRefinementError( f"ARD_MATCH but alleles differ at ARD level. " f"hla1.ard_redux_allele_string={hla1.ard_redux_allele_string}, " f"hla2.ard_redux_allele_string={hla2.ard_redux_allele_string}" ) if not is_claimed_ard_match and is_actual_ard_match: raise ARDMatchRefinementError( f"{allele_match_level.name} but alleles ARE " f"ARD-equivalent. This indicates a bug in the caller. " f"hla1.ard_redux_allele_string={hla1.ard_redux_allele_string}, " f"hla2.ard_redux_allele_string={hla2.ard_redux_allele_string}" ) # (1) Valid non-ARD_MATCH: return NOT_APPLICABLE if allele_match_level is not AlleleMatchLevel.ARD_MATCH: return ( ARDMatchLevel.NOT_APPLICABLE, ARDMatchLevelCertainty.NOT_APPLICABLE ) # NOTE: specific group_code ('01P', instead of 'P') encoded in allele field # of HLA object (or synonymous_variant field for 'G' group) # TODO: imo this needs and update in the HLA parsing logic # not a bug per se, just counterintuitive and welcomes errors # (2) P-group is first exit if we lack information # NOTE: HLA parsing **guarantees** that a given "P" is highest resolution if hla1.group_code == "P" or hla2.group_code == "P": return ( ARDMatchLevel.P_GROUP_MATCH, # could still be G-group match ARDMatchLevelCertainty.UNCERTAIN ) # (3) G-group next min_resolution = min( hla1.has_resolution_level(), hla2.has_resolution_level() ) # G group is more complex, we have G-group match if: # a) hla1.synonymous_variant == hla2.synonymous_variant without G-group if min_resolution >= 3: if ( hla1.group_code != "G" and hla2.group_code != "G" and hla1.synonymous_variant == hla2.synonymous_variant ): return ( ARDMatchLevel.G_GROUP_MATCH, ARDMatchLevelCertainty.CERTAIN ) # b) hla1.group_code == "G" and hla2.group_code == "G" if ( hla1.group_code == "G" and hla2.group_code == "G" and hla1.synonymous_variant == hla2.synonymous_variant ): return ( ARDMatchLevel.G_GROUP_MATCH, ARDMatchLevelCertainty.CERTAIN ) # c) one allele has G-group, the other not, but both are in the same # G-group if hla1.group_code == "G" or hla2.group_code == "G": pyard = get_ard_instance() pyard_g1_string = pyard.redux(hla1.allele_string, 'G') pyard_g2_string = pyard.redux(hla2.allele_string, 'G') if ( pyard_g1_string == pyard_g2_string and pyard_g1_string.endswith('G') and pyard_g2_string.endswith('G') ): return ( ARDMatchLevel.G_GROUP_MATCH, ARDMatchLevelCertainty.CERTAIN ) # d) if both are not G-group coded: if hla1.group_code != "G" and hla2.group_code != "G": pyard = get_ard_instance() pyard_g1_string = pyard.redux(hla1.allele_string, 'G') pyard_g2_string = pyard.redux(hla2.allele_string, 'G') if ( pyard_g1_string == pyard_g2_string and pyard_g1_string.endswith('G') and pyard_g2_string.endswith('G') ): return ( ARDMatchLevel.G_GROUP_MATCH, ARDMatchLevelCertainty.CERTAIN ) # (4) quo vadis? # due to overlap of P- and G-groups we could actually get more info # e.g. A*01:468 and ​A*01:471 are part of A*01:01P and A*01:01:01G # however, py-ard's 'G' reduction is currently not robust # e.g., print(pyard.redux("A*01:01", 'G')) returns 'A*01:01:01G', # but A*01:01:162 (valid allele) is not part of A*01:01:01G # NOTE: so until this is resolved for now return ( ARDMatchLevel.P_GROUP_MATCH, # could still be G-group match ARDMatchLevelCertainty.UNCERTAIN ) def _refine_ard_match_level_at_molecular_level( hla1: HLA, hla2: HLA, allele_match_level: AlleleMatchLevel ) -> tuple[MolecularMatchLevel, MolecularMatchLevelCertainty]: """ Compares two ARD-matched HLA alleles and returns a MolecularMatchLevel Args: hla1: First HLA allele object hla2: Second HLA allele object allele_match_level: AlleleMatchLevel of hla1 and hla2 Returns: Tuple[MolecularMatchLevel, MolecularMatchLevelCertainty] MolecularMatchLevel IntEnum value indicating degree of 1–4 field identity MolecularMatchLevelCertainty Enum indicating whether a higher level is still possible given typing resolution Raises: TypeError: If hla1 or hla2 is not an instance of HLA InvalidLocusComparisonError: If hla1 and hla2 have incompatible loci Only applicable to AlleleMatchLevel == ARD_MATCH Otherwise, returns NOT_APPLICABLE for both level and certainty """ # sanity checks if not isinstance(hla1, HLA): raise TypeError( f"hla1 must be an instance of HLA, not {type(hla1).__name__}." ) if not isinstance(hla2, HLA): raise TypeError( f"hla2 must be an instance of HLA, not {type(hla2).__name__}." ) if hla1.locus != hla2.locus: raise InvalidLocusComparisonError(hla1.locus, hla2.locus) # additional safeguards against misuse is_claimed_ard_match = (allele_match_level is AlleleMatchLevel.ARD_MATCH) # HLA class guarantees valid hla two-field allele if redux worked has_ard_data = ( hla1.ard_redux_allele_string is not None and hla2.ard_redux_allele_string is not None ) # still redux string must be equal to confirm ARD_MATCH is_actual_ard_match = ( has_ard_data and hla1.ard_redux_allele_string == hla2.ard_redux_allele_string ) if is_claimed_ard_match and not has_ard_data: raise ARDMatchRefinementError( f"ARD_MATCH but ARD reduction data missing. " f"hla1.ard_redux_allele_string={hla1.ard_redux_allele_string}, " f"hla2.ard_redux_allele_string={hla2.ard_redux_allele_string}" ) if is_claimed_ard_match and not is_actual_ard_match: raise ARDMatchRefinementError( f"ARD_MATCH but alleles differ at ARD level. " f"hla1.ard_redux_allele_string={hla1.ard_redux_allele_string}, " f"hla2.ard_redux_allele_string={hla2.ard_redux_allele_string}" ) if not is_claimed_ard_match and is_actual_ard_match: raise ARDMatchRefinementError( f"{allele_match_level.name} but alleles ARE " f"ARD-equivalent. This indicates a bug in the caller. " f"hla1.ard_redux_allele_string={hla1.ard_redux_allele_string}, " f"hla2.ard_redux_allele_string={hla2.ard_redux_allele_string}" ) # (1) Valid non-ARD_MATCH: return NOT_APPLICABLE # e.g. A*01:01 vs A*02:01 if allele_match_level is not AlleleMatchLevel.ARD_MATCH: return ( MolecularMatchLevel.NOT_APPLICABLE, MolecularMatchLevelCertainty.NOT_APPLICABLE ) # (2) Group code: molecular not applicable # e.g. A*01:01P vs A*01:01:01:01 if hla1.group_code == "P" or hla2.group_code == "P": return ( MolecularMatchLevel.NOT_ASSESSABLE, # Could be protein/coding/exact MolecularMatchLevelCertainty.UNCERTAIN ) if hla1.group_code == "G" or hla2.group_code == "G": return ( MolecularMatchLevel.NOT_ASSESSABLE, # Could be protein/coding/exact MolecularMatchLevelCertainty.UNCERTAIN ) # From here: ARD_MATCH, no P-group - let's try to refine ARD_MATCH # we need the resolution multiple times min_resolution = min( hla1.has_resolution_level(), hla2.has_resolution_level() ) # (3) cases with res == 2 # e.g. A*01:01 vs A*01:01 if min_resolution == 2: # we either have full protein match: if ( hla1.allele == hla2.allele ): return ( MolecularMatchLevel.FULL_PROTEIN_MATCH, # Could be coding/exact MolecularMatchLevelCertainty.UNCERTAIN ) # or a mismatch: # e.g. A*01:01 vs A*01:15 (same P-group) else: return ( MolecularMatchLevel.ARD_MATCH_ONLY, MolecularMatchLevelCertainty.CERTAIN ) # (4) min_resolution == 3: we know the 3rd field (synonymous variant) if min_resolution == 3: if ( hla1.allele != hla2.allele ): # 2nd fields differ return ( MolecularMatchLevel.ARD_MATCH_ONLY, # Could still be EXACT_ALLELE_MATCH if 4th field also equal MolecularMatchLevelCertainty.CERTAIN ) elif ( # hla1.allele == hla2.allele hla1.synonymous_variant != hla2.synonymous_variant ): # 3rd fields differ return ( MolecularMatchLevel.FULL_PROTEIN_MATCH, # Could still be coding/exact if 3rd-4th field also equal MolecularMatchLevelCertainty.CERTAIN ) else: # 1–3 fields identical, unknown 4th field return ( MolecularMatchLevel.CODING_SEQUENCE_MATCH, MolecularMatchLevelCertainty.UNCERTAIN ) # (5) min_resolution == 4: both alleles have 4-field resolution if min_resolution == 4: if ( hla1.allele != hla2.allele ): # second fields differ return ( MolecularMatchLevel.ARD_MATCH_ONLY, MolecularMatchLevelCertainty.CERTAIN ) elif ( hla1.synonymous_variant != hla2.synonymous_variant ): # third firlds differ return ( MolecularMatchLevel.FULL_PROTEIN_MATCH, MolecularMatchLevelCertainty.CERTAIN ) # Third field identical -> check non-coding (4th) field elif ( hla1.non_coding_variant != hla2.non_coding_variant ): # 1–3 fields identical, 4th differs return ( MolecularMatchLevel.CODING_SEQUENCE_MATCH, MolecularMatchLevelCertainty.CERTAIN ) else: # All 1–4 fields identical return ( MolecularMatchLevel.EXACT_ALLELE_MATCH, MolecularMatchLevelCertainty.CERTAIN ) def _get_correct_allele_pairing( patient_alleles: HLAPair, donor_alleles: HLAPair ) -> _PairingResult: """ Determines the correct pairing of patient and donor HLA alleles by evaluating all possible combinations. Intended for research workflows. :param patient_alleles: ``HLAPair`` containing two patient HLA alleles. :param donor_alleles: ``HLAPair`` containing two donor HLA alleles. :return: A ``_PairingResult`` instance containing allele-, ARD- and molecular-level match classifications and certainties for the optimal pairing. Notes: - Considers two possible pairings: 1. (patient_hla1, donor_hla1) and (patient_hla2, donor_hla2) 2. (patient_hla1, donor_hla2) and (patient_hla2, donor_hla1) - Selection uses three-level lexicographic scoring: 1. Primary: AlleleMatchLevel sum (match vs mismatch) 2. Secondary: MolecularMatchLevel sum (field identity) 3. Tertiary: ARDMatchLevel sum (G-group vs P-group in ARD) - If all scores are equal, the first pairing is returned """ pairings = [ ( patient_alleles.hla1, donor_alleles.hla1, patient_alleles.hla2, donor_alleles.hla2 ), ( patient_alleles.hla1, donor_alleles.hla2, patient_alleles.hla2, donor_alleles.hla1 ), ] # Lexicographic comparison: (allele, molecular, ard) best_score: Tuple[float, float, float] = ( float('-inf'), float('-inf'), float('-inf') ) best_result: Optional[_PairingResult] = None for pairing in pairings: patient_hla1, donor_hla1, patient_hla2, donor_hla2 = pairing # (1) Primary: AlleleMatchLevel allele_match1 = allele_match(patient_hla1, donor_hla1) allele_match2 = allele_match(patient_hla2, donor_hla2) allele_score = int(allele_match1) + int(allele_match2) # (2) Refinements if ARD_MATCH if allele_match1 is AlleleMatchLevel.ARD_MATCH: ard_match1, ard_certainty1 = \ _refine_ard_match_level_by_group_association( patient_hla1, donor_hla1, allele_match1 ) molecular_match1, molecular_certainty1 = \ _refine_ard_match_level_at_molecular_level( patient_hla1, donor_hla1, allele_match1 ) else: ard_match1 = ARDMatchLevel.NOT_APPLICABLE ard_certainty1 = ARDMatchLevelCertainty.NOT_APPLICABLE molecular_match1 = MolecularMatchLevel.NOT_APPLICABLE molecular_certainty1 = MolecularMatchLevelCertainty.NOT_APPLICABLE # Same allele_match2 if allele_match2 is AlleleMatchLevel.ARD_MATCH: ard_match2, ard_certainty2 = \ _refine_ard_match_level_by_group_association( patient_hla2, donor_hla2, allele_match2 ) molecular_match2, molecular_certainty2 = \ _refine_ard_match_level_at_molecular_level( patient_hla2, donor_hla2, allele_match2 ) else: ard_match2 = ARDMatchLevel.NOT_APPLICABLE ard_certainty2 = ARDMatchLevelCertainty.NOT_APPLICABLE molecular_match2 = MolecularMatchLevel.NOT_APPLICABLE molecular_certainty2 = MolecularMatchLevelCertainty.NOT_APPLICABLE # (3) Tie-breaker scores molecular_score = int(molecular_match1) + int(molecular_match2) ard_score = int(ard_match1) + int(ard_match2) # (4) Lexicographic comparison: (allele, molecular, ard) current_score = (allele_score, molecular_score, ard_score) if current_score > best_score: best_score = current_score best_result = _PairingResult( score=allele_score, allele_match_levels=(allele_match1, allele_match2), ard_match_levels=(ard_match1, ard_match2), ard_match_certainties=(ard_certainty1, ard_certainty2), molecular_match_levels=(molecular_match1, molecular_match2), molecular_match_certainties=( molecular_certainty1, molecular_certainty2 ), ) # Cannot be None: we always have exactly 2 pairings return best_result # type: ignore[return-value]
[docs] def allele_pair_match(patient: HLAPair, donor: HLAPair) -> MatchResult: """ Compute research match/mismatch levels for two HLA allele pairs, one in the 'patient' role and one in the 'donor' role. Intended for research workflows. :param patient: Patient ``HLAPair`` containing two HLA alleles. :param donor: Donor ``HLAPair`` containing two HLA alleles. :return: ``MatchResult`` object storing allele-level match categories and all ARD and molecular refinements for the optimal pairing. Notes: - The function assumes that both patient and donor have exactly two HLA alleles - Uses `_get_correct_allele_pairing` to evaluate all possible allele pairings and selects the one with the highest score """ result = _get_correct_allele_pairing(patient, donor) return MatchResult( patient=patient, donor=donor, pairing_score=result.score, allele_match_levels=result.allele_match_levels, ard_match_levels=result.ard_match_levels, ard_match_level_certainty=result.ard_match_certainties, molecular_match_levels=result.molecular_match_levels, molecular_match_level_certainty=result.molecular_match_certainties, )
[docs] def multi_locus_match( patient: Individual, donor: Individual ) -> List[MatchResult]: """ Compute HLA match/mismatch categories between two Individuals for all loci that are typed in the first Individual. Intended for research workflows. :param patient: Patient object :param donor: Donor object :return: List of MatchResult objects for each locus """ results: List[MatchResult] = [] # quick lookup for donor pairs by locus donor_dict = {pair.locus: pair for pair in donor.hla_data} for patient_pair in patient.hla_data: locus = patient_pair.locus if locus in donor_dict: donor_pair = donor_dict[locus] else: logger.warning( f"Locus {locus} not found in donor data – " "matching will be reported as NOT_ASSESSABLE." ) if locus == "DRB345": donor_pair = HLAPair(HLA("DRBX*NA"), HLA("DRBX*NA")) else: donor_pair = HLAPair(HLA(f"{locus}*NA"), HLA(f"{locus}*NA")) # compute match (missing donor pair will propagate NOT_ASSESSABLE) match_result = allele_pair_match(patient_pair, donor_pair) # additional diagnostics if all(level == AlleleMatchLevel.NOT_ASSESSABLE for level in match_result.allele_match_levels): logger.warning( f"Typing resolution insufficient for locus {locus} " f"(patient {patient_pair} / donor {donor_pair})." ) results.append(match_result) return results