Skip to content

create_status_coder

A factory function which returns a function capable of determining the status code of a read tagged by an AlignmentTagger object.

Parameters:

Name Type Description Default
insert_seqs list

A list of acceptable insert sequences. Defaults to [‘*’], which will skip the insert seq check altogether.

['*']
mapq_threshold int

A mapq_threshold. Reads with map quality less than this value will be marked as failing the mapq threshold test. Default is 10.

10
check_5_prime_clip bool

Whether to check for 5’ end clipping in the read. Defaults to False.

False
check_passing bool

Whether to check the passing key in the barcode_details dict. Defaults to True.

True

Returns:

Type Description
Callable[[AlignedSegment], int]

Callable[[AlignedSegment], int]: A function which given a tagged

Callable[[AlignedSegment], int]

pysam AlignedSegment will return the status code for the read.

Source code in callingcardstools/QC/create_status_coder.py
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
def create_status_coder(
        insert_seqs: list = ['*'],
        mapq_threshold: int = 10,
        check_5_prime_clip: bool = False,
        check_passing: bool = True) -> Callable[[AlignedSegment], int]:
    """
    A factory function which returns a function capable of determining the
    status code of a read tagged by an AlignmentTagger object.

    Args:
        insert_seqs (list): A list of acceptable insert sequences. Defaults
            to ['*'], which will skip the insert seq check altogether.
        mapq_threshold (int): A mapq_threshold. Reads with map quality less
            than this value will be marked as failing the mapq threshold test.
            Default is 10.
        check_5_prime_clip (bool): Whether to check for 5' end clipping in
            the read. Defaults to False.
        check_passing (bool, optional): Whether to check the passing key in
            the barcode_details dict. Defaults to True.

    Returns:
        Callable[[AlignedSegment], int]: A function which given a tagged
        pysam AlignedSegment will return the status code for the read.
    """

    def coder(read_details: AlignedSegment,
              status_code: int = 0) -> int:
        """
        Returns the status code for a given read after checking for various 
        flags.

        Args:
            read_details (AlignedSegment): A pysam AlignedSegment object.
            status_code (int, optional): Initial status code. Defaults to 0.

        Raises:
            ValueError: If read_details is not a dictionary or does not
                contain expected keys.
            KeyError: If required keys are not present in read_details.
            ValueError: If the types of values in read_details do not 
                match the expected types.

        Returns:
            int: The status code for a given read.
        """
        if not isinstance(read_details, dict):
            raise ValueError('read_details must be a dictionary with '
                             'keys "read" which is a pysam.AlignedSegment and '
                             '"barcode_details" which is a dict')
        if not {'read', 'barcode_details'} - read_details.keys() == set():
            raise KeyError('"read" and "barcode_details" must be keys in'
                           'read_details')
        if not isinstance(read_details.get('read'), AlignedSegment):
            raise ValueError('read_details["read"] must be a '
                             'pysam.AlignedSegment object')
        if not isinstance(read_details.get('barcode_details'), dict):
            raise ValueError('read_details["barcode_details"] must be a '
                             'dict')
        if check_passing:
            if not isinstance(
                    read_details.get('barcode_details').get('passing', None),
                    bool):
                raise KeyError('passing must be a key in '
                               'read_details["barcode_details"]')

        # if check passing is set to false, then the passing key may not
        # exist. In this event, assume the read is passing
        if not read_details.get('barcode_details').get('passing', True):
            status_code += StatusFlags.BARCODE.flag()
        # if the read is unmapped, add the flag, but don't check
        # other alignment metrics
        if read_details.get('read').is_unmapped:
            status_code += StatusFlags.UNMAPPED.flag()
        else:
            if read_details.get('read').is_qcfail:
                status_code += StatusFlags.ALIGNER_QC_FAIL.flag()
            if read_details.get('read').is_secondary or \
                    read_details.get('read').is_supplementary:
                status_code += StatusFlags.NOT_PRIMARY.flag()
            if read_details.get('read').mapping_quality < mapq_threshold:
                status_code += StatusFlags.MAPQ.flag()
            # note: for mammals, this isn't necessary as the insert seq can
            # be checked
            if check_5_prime_clip:
                # if the read is clipped on the 5' end, flag
                if (read_details.get('read').is_forward and
                    read_details.get('read').query_alignment_start != 0) or \
                        (read_details.get('read').is_reverse and
                            read_details.get('read').query_alignment_end !=
                         read_details.get('read').infer_query_length()):
                    status_code += StatusFlags.FIVE_PRIME_CLIP.flag()
        # check the insert sequence
        try:
            if insert_seqs != ["*"]:
                if read_details.get('read').get_tag("XZ") not in insert_seqs:
                    status_code += StatusFlags.INSERT_SEQ.flag()
        except AttributeError as exc:
            logger.debug(
                f"insert sequence not found in Barcode Parser. {exc}")

        return status_code

    return coder