Skip to content

BarcodeCounterQc

This module contains the BarcodeQcCounter class, which is used for analyzing barcode quality control data. The class can process and update barcode metrics, combine multiple objects, and write the results to output files.

Author: Chase Mateusiak Date: 2023-05-16

BarcodeQcCounter

A class for counting and processing barcode quality control data.

Attributes:

Name Type Description
metrics DefaultDict

A nested defaultdict containing the barcode metrics.

ltr1_seq_dict DefaultDict

A defaultdict storing the R1 transposon sequences.

Source code in callingcardstools/BarcodeParser/mammals/BarcodeQcCounter.py
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
class BarcodeQcCounter:
    """A class for counting and processing barcode quality control data.

    Attributes:
        metrics (DefaultDict): A nested defaultdict containing the
            barcode metrics.
        ltr1_seq_dict (DefaultDict): A defaultdict storing the R1
            transposon sequences.

    """
    _metrics: DefaultDict
    _bc_status: DefaultDict

    def __init__(self, pickle_path: str = None) -> None:
        """Initializes a BarcodeQcCounter instance.

        Args:
            pickle_path (str, optional): Path to a pickled BarcodeQcCounter
                object. If provided, loads the object from the file.
                Defaults to None.

        Raises:
            FileNotFoundError: If the provided pickle path does not exist.
        """
        if pickle_path:
            if not os.path.exists(pickle_path):
                msg = f"Path to pickle file {pickle_path} does not exist"
                raise FileNotFoundError(msg)
            self.load(pickle_path)
        else:
            self.metrics = OuterDefaultDict(int)
            self.bc_status = OuterDefaultDict(bool)

    @property
    def metrics(self) -> defaultdict:
        """Returns the _metrics attribute.

        Returns:
            defaultdict: The _metrics attribute.
        """
        return self._metrics

    @metrics.setter
    def metrics(self, value: defaultdict) -> None:
        """Sets the _metrics attribute"""
        self._metrics = value

    @property
    def bc_status(self) -> defaultdict:
        """Returns the _bc_status attribute.

        Returns:
            defaultdict: The _bc_status attribute.
        """
        return self._bc_status

    @bc_status.setter
    def bc_status(self, value: defaultdict) -> None:
        """Sets the _bc_status attribute"""
        self._bc_status = value

    # private methods ---------------------------------------------------------
    def _combine(self, other: "BarcodeQcCounter") -> None:
        """Combine the metrics from another BarcodeQcCounter object.

        Args:
            other (BarcodeQcCounter): Another BarcodeQcCounter object
                whose metrics will be combined with this object.
        """
        def combine_dicts_additive(d1, d2):
            """Recursive function to combine two nested dictionaries."""
            for k, v in d2.items():
                if isinstance(v, dict):
                    d1[k] = combine_dicts_additive(d1.get(k, {}), v)
                else:
                    d1[k] = d1.get(k, 0) + v
            return d1

        def combine_dicts_bool(d1, d2):
            """Recursive function to combine two nested dictionaries."""
            for k, v in d2.items():
                if isinstance(v, dict):
                    d1[k] = combine_dicts_bool(d1.get(k, {}), v)
                else:
                    d1[k] = v
            return d1

        # combine the metrics dictionaries
        self._metrics = combine_dicts_additive(self._metrics, other.metrics)
        # combine the bc_status dictionaries
        self._bc_status = combine_dicts_bool(self._bc_status, other.bc_status)

    # public methods ----------------------------------------------------------

    def load(self, file_path: str) -> None:
        """Load a BarcodeQcCounter object from a file using Pickle.

        Args:
            file_path (str): The file path where the object is stored.
        """
        logger.info("loading BarcodeQcCounter object from %s", file_path)
        with open(file_path, "rb") as file:
            file_data = pickle.load(file)
            if not isinstance(file_data, BarcodeQcCounter):
                raise TypeError(
                    f"{file_path} is not a BarcodeQcCounter object")
            # copy the data from the loaded object to the current instance
            self.metrics = file_data.metrics
            self.bc_status = file_data.bc_status

    @classmethod
    def combine(
            cls, counters: Iterable["BarcodeQcCounter"]) -> "BarcodeQcCounter":
        """Combine multiple BarcodeQcCounter objects into a single object.

        Args:
            counters (Iterable[BarcodeQcCounter]): An iterable of
                BarcodeQcCounter objects.

        Returns:
            BarcodeQcCounter: A new BarcodeQcCounter object with the
                combined metrics.
        """
        result = BarcodeQcCounter()

        for counter in counters:
            result._combine(counter)

        return result

    def __add__(self, other: "BarcodeQcCounter") -> "BarcodeQcCounter":
        """Add two BarcodeQcCounter objects together with the + operator."""
        if not isinstance(other, BarcodeQcCounter):
            raise TypeError("Both objects must be of type 'BarcodeQcCounter'")

        result = BarcodeQcCounter()
        return result.combine([self, other])

    def update(self,
               pb_seq: str,
               ltr1_seq: str,
               ltr2_seq: str,
               srt_seq: str,
               bc_status: bool) -> None:
        """Updates the metrics with given component and deviation tuples.

        Args:
            pb_seq (str): The primer binding sequence.
            ltr1_seq (str): The left transposon sequence.
            ltr2_seq (str): The right transposon sequence.
            srt_seq (str): The sample barcode sequence.
            bc_status (bool): The barcode status.
        """

        (self._metrics
         [pb_seq]
         [ltr1_seq]
         [ltr2_seq]
         [srt_seq]) += 1

        (self._bc_status
         [pb_seq]
         [ltr1_seq]
         [ltr2_seq]
         [srt_seq]) = bc_status

    def write(self,
              filename: str,
              suffix: str = "",
              raw: bool = False) -> None:
        """Write a pickle and/or a comma-delimited file summarizing the
        barcode QC metrics.

        Args:
            filename (str, optional): The base filename for the output files.
                Defaults to "barcode_qc".
            suffix (str, optional): A suffix to be appended to the base
                filename. Defaults to an empty string.
            raw (bool, optional): If True, pickles the object.
                Defaults to False.
        """
        # if raw is true, then pickle the object
        if raw:
            pickle_path = filename + '_' + suffix + '_barcode_qc.pkl'\
                if suffix else filename + '_barcode_qc.pkl'
            logger.info("pickling barcode_qc object to %s{pick_path}")
            with open(pickle_path, "wb") as pickle_file:
                pickle.dump(self, pickle_file)

        else:
            # write the barcode qc metrics to a csv file
            tsv_path = filename + '_' + suffix + "_barcode_qc.tsv" \
                if suffix else filename + '_barcode_qc.tsv'
            logger.info("writing barcode qc metrics to %s", tsv_path)
            with open(tsv_path, "w", encoding='utf-8') as tsv_file:
                csv_writer = csv.writer(tsv_file, delimiter='\t')
                csv_writer.writerow([
                    "pb_seq",
                    "ltr1_seq",
                    "ltr2_seq",
                    "srt_seq",
                    "count",
                    "barcode_status"
                ])

                for pb_seq, ltr1_dict in self._metrics.items():
                    for ltr1_seq, ltr2_dict in ltr1_dict.items():
                        for ltr2_seq, srt_dict in ltr2_dict.items():
                            for srt_seq, count in srt_dict.items():
                                bc_status = ("pass" if
                                             (self._bc_status[pb_seq]
                                              [ltr1_seq]
                                              [ltr2_seq]
                                              [srt_seq])
                                             else "false")
                                csv_writer.writerow([
                                    pb_seq,
                                    ltr1_seq,
                                    ltr2_seq,
                                    srt_seq,
                                    count,
                                    bc_status
                                ])

bc_status: defaultdict property writable

Returns the _bc_status attribute.

Returns:

Name Type Description
defaultdict defaultdict

The _bc_status attribute.

metrics: defaultdict property writable

Returns the _metrics attribute.

Returns:

Name Type Description
defaultdict defaultdict

The _metrics attribute.

__add__(other)

Add two BarcodeQcCounter objects together with the + operator.

Source code in callingcardstools/BarcodeParser/mammals/BarcodeQcCounter.py
194
195
196
197
198
199
200
def __add__(self, other: "BarcodeQcCounter") -> "BarcodeQcCounter":
    """Add two BarcodeQcCounter objects together with the + operator."""
    if not isinstance(other, BarcodeQcCounter):
        raise TypeError("Both objects must be of type 'BarcodeQcCounter'")

    result = BarcodeQcCounter()
    return result.combine([self, other])

__init__(pickle_path=None)

Initializes a BarcodeQcCounter instance.

Parameters:

Name Type Description Default
pickle_path str

Path to a pickled BarcodeQcCounter object. If provided, loads the object from the file. Defaults to None.

None

Raises:

Type Description
FileNotFoundError

If the provided pickle path does not exist.

Source code in callingcardstools/BarcodeParser/mammals/BarcodeQcCounter.py
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
def __init__(self, pickle_path: str = None) -> None:
    """Initializes a BarcodeQcCounter instance.

    Args:
        pickle_path (str, optional): Path to a pickled BarcodeQcCounter
            object. If provided, loads the object from the file.
            Defaults to None.

    Raises:
        FileNotFoundError: If the provided pickle path does not exist.
    """
    if pickle_path:
        if not os.path.exists(pickle_path):
            msg = f"Path to pickle file {pickle_path} does not exist"
            raise FileNotFoundError(msg)
        self.load(pickle_path)
    else:
        self.metrics = OuterDefaultDict(int)
        self.bc_status = OuterDefaultDict(bool)

combine(counters) classmethod

Combine multiple BarcodeQcCounter objects into a single object.

Parameters:

Name Type Description Default
counters Iterable[BarcodeQcCounter]

An iterable of BarcodeQcCounter objects.

required

Returns:

Name Type Description
BarcodeQcCounter BarcodeQcCounter

A new BarcodeQcCounter object with the combined metrics.

Source code in callingcardstools/BarcodeParser/mammals/BarcodeQcCounter.py
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
@classmethod
def combine(
        cls, counters: Iterable["BarcodeQcCounter"]) -> "BarcodeQcCounter":
    """Combine multiple BarcodeQcCounter objects into a single object.

    Args:
        counters (Iterable[BarcodeQcCounter]): An iterable of
            BarcodeQcCounter objects.

    Returns:
        BarcodeQcCounter: A new BarcodeQcCounter object with the
            combined metrics.
    """
    result = BarcodeQcCounter()

    for counter in counters:
        result._combine(counter)

    return result

load(file_path)

Load a BarcodeQcCounter object from a file using Pickle.

Parameters:

Name Type Description Default
file_path str

The file path where the object is stored.

required
Source code in callingcardstools/BarcodeParser/mammals/BarcodeQcCounter.py
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
def load(self, file_path: str) -> None:
    """Load a BarcodeQcCounter object from a file using Pickle.

    Args:
        file_path (str): The file path where the object is stored.
    """
    logger.info("loading BarcodeQcCounter object from %s", file_path)
    with open(file_path, "rb") as file:
        file_data = pickle.load(file)
        if not isinstance(file_data, BarcodeQcCounter):
            raise TypeError(
                f"{file_path} is not a BarcodeQcCounter object")
        # copy the data from the loaded object to the current instance
        self.metrics = file_data.metrics
        self.bc_status = file_data.bc_status

update(pb_seq, ltr1_seq, ltr2_seq, srt_seq, bc_status)

Updates the metrics with given component and deviation tuples.

Parameters:

Name Type Description Default
pb_seq str

The primer binding sequence.

required
ltr1_seq str

The left transposon sequence.

required
ltr2_seq str

The right transposon sequence.

required
srt_seq str

The sample barcode sequence.

required
bc_status bool

The barcode status.

required
Source code in callingcardstools/BarcodeParser/mammals/BarcodeQcCounter.py
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
def update(self,
           pb_seq: str,
           ltr1_seq: str,
           ltr2_seq: str,
           srt_seq: str,
           bc_status: bool) -> None:
    """Updates the metrics with given component and deviation tuples.

    Args:
        pb_seq (str): The primer binding sequence.
        ltr1_seq (str): The left transposon sequence.
        ltr2_seq (str): The right transposon sequence.
        srt_seq (str): The sample barcode sequence.
        bc_status (bool): The barcode status.
    """

    (self._metrics
     [pb_seq]
     [ltr1_seq]
     [ltr2_seq]
     [srt_seq]) += 1

    (self._bc_status
     [pb_seq]
     [ltr1_seq]
     [ltr2_seq]
     [srt_seq]) = bc_status

write(filename, suffix='', raw=False)

Write a pickle and/or a comma-delimited file summarizing the barcode QC metrics.

Parameters:

Name Type Description Default
filename str

The base filename for the output files. Defaults to “barcode_qc”.

required
suffix str

A suffix to be appended to the base filename. Defaults to an empty string.

''
raw bool

If True, pickles the object. Defaults to False.

False
Source code in callingcardstools/BarcodeParser/mammals/BarcodeQcCounter.py
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
def write(self,
          filename: str,
          suffix: str = "",
          raw: bool = False) -> None:
    """Write a pickle and/or a comma-delimited file summarizing the
    barcode QC metrics.

    Args:
        filename (str, optional): The base filename for the output files.
            Defaults to "barcode_qc".
        suffix (str, optional): A suffix to be appended to the base
            filename. Defaults to an empty string.
        raw (bool, optional): If True, pickles the object.
            Defaults to False.
    """
    # if raw is true, then pickle the object
    if raw:
        pickle_path = filename + '_' + suffix + '_barcode_qc.pkl'\
            if suffix else filename + '_barcode_qc.pkl'
        logger.info("pickling barcode_qc object to %s{pick_path}")
        with open(pickle_path, "wb") as pickle_file:
            pickle.dump(self, pickle_file)

    else:
        # write the barcode qc metrics to a csv file
        tsv_path = filename + '_' + suffix + "_barcode_qc.tsv" \
            if suffix else filename + '_barcode_qc.tsv'
        logger.info("writing barcode qc metrics to %s", tsv_path)
        with open(tsv_path, "w", encoding='utf-8') as tsv_file:
            csv_writer = csv.writer(tsv_file, delimiter='\t')
            csv_writer.writerow([
                "pb_seq",
                "ltr1_seq",
                "ltr2_seq",
                "srt_seq",
                "count",
                "barcode_status"
            ])

            for pb_seq, ltr1_dict in self._metrics.items():
                for ltr1_seq, ltr2_dict in ltr1_dict.items():
                    for ltr2_seq, srt_dict in ltr2_dict.items():
                        for srt_seq, count in srt_dict.items():
                            bc_status = ("pass" if
                                         (self._bc_status[pb_seq]
                                          [ltr1_seq]
                                          [ltr2_seq]
                                          [srt_seq])
                                         else "false")
                            csv_writer.writerow([
                                pb_seq,
                                ltr1_seq,
                                ltr2_seq,
                                srt_seq,
                                count,
                                bc_status
                            ])

InnerDefaultDict

Bases: defaultdict

A nested defaultdict class.

:param defaultdict: a nested defaultdict class :type defaultdict: defaultdict

Source code in callingcardstools/BarcodeParser/mammals/BarcodeQcCounter.py
20
21
22
23
24
25
26
27
28
class InnerDefaultDict(defaultdict):
    """A nested defaultdict class.

    :param defaultdict: a nested defaultdict class
    :type defaultdict: defaultdict
    """

    def __init__(self, data_type=int):
        super().__init__(data_type)

MiddleDefaultDict1

Bases: defaultdict

A nested defaultdict class.

:param defaultdict: a nested defaultdict class :type defaultdict: defaultdict

Source code in callingcardstools/BarcodeParser/mammals/BarcodeQcCounter.py
31
32
33
34
35
36
37
38
39
class MiddleDefaultDict1(defaultdict):
    """A nested defaultdict class.

    :param defaultdict: a nested defaultdict class
    :type defaultdict: defaultdict
    """

    def __init__(self, data_type=int):
        super().__init__(partial(InnerDefaultDict, data_type))

MiddleDefaultDict2

Bases: defaultdict

A nested defaultdict class.

:param defaultdict: a nested defaultdict class :type defaultdict: defaultdict

Source code in callingcardstools/BarcodeParser/mammals/BarcodeQcCounter.py
42
43
44
45
46
47
48
49
50
class MiddleDefaultDict2(defaultdict):
    """A nested defaultdict class.

    :param defaultdict: a nested defaultdict class
    :type defaultdict: defaultdict
    """

    def __init__(self, data_type=int):
        super().__init__(partial(MiddleDefaultDict1, data_type))

OuterDefaultDict

Bases: defaultdict

A nested defaultdict class.

:param defaultdict: a nested defaultdict class :type defaultdict: defaultdict

Source code in callingcardstools/BarcodeParser/mammals/BarcodeQcCounter.py
53
54
55
56
57
58
59
60
61
class OuterDefaultDict(defaultdict):
    """A nested defaultdict class.

    :param defaultdict: a nested defaultdict class
    :type defaultdict: defaultdict
    """

    def __init__(self, data_type=int):
        super().__init__(partial(MiddleDefaultDict2, data_type))