Skip to content

BarcodeCounterQc

This module contains the BarcodeQcCounter class, which is used for analyzing barcode quality control data. The class can process and update barcode metrics, combine multiple objects, and write the results to output files.

Author: Chase Mateusiak Date: 2023-05-01

BarcodeQcCounter

A class for counting and processing barcode quality control data.

Attributes:

Name Type Description
metrics DefaultDict

A nested defaultdict containing the barcode metrics.

r1_transposon_seq_dict DefaultDict

A defaultdict storing the R1 transposon sequences.

Source code in callingcardstools/BarcodeParser/yeast/BarcodeQcCounter.py
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
class BarcodeQcCounter:
    """A class for counting and processing barcode quality control data.

    Attributes:
        metrics (DefaultDict): A nested defaultdict containing the 
            barcode metrics.
        r1_transposon_seq_dict (DefaultDict): A defaultdict storing the R1 
            transposon sequences.

    """
    _metrics: DefaultDict
    _r1_transposon_seq_dict: DefaultDict

    def __init__(self, pickle_path: str = None) -> None:
        """Initializes a BarcodeQcCounter instance.

        Args:
            pickle_path (str, optional): Path to a pickled BarcodeQcCounter
                object. If provided, loads the object from the file.
                Defaults to None.

        Raises:
            FileNotFoundError: If the provided pickle path does not exist.
        """
        if pickle_path:
            if not os.path.exists(pickle_path):
                msg = f"Path to pickle file {pickle_path} does not exist"
                raise FileNotFoundError(msg)
            self.load(pickle_path)
        else:
            self._metrics = OuterDefaultDict(int)
            self._r1_transposon_seq_dict = defaultdict(set)

    @property
    def metrics(self) -> defaultdict:
        """Returns the _metrics attribute.

        Returns:
            defaultdict: The _metrics attribute.
        """
        return self._metrics

    @property
    def r1_transposon_dict(self) -> defaultdict:
        """Returns the _r1_transposon_seq_dict attribute.

        Returns:
            defaultdict: the _r1_transposon_seq_dict attribute.
        """
        return self._r1_transposon_seq_dict

    # private methods ---------------------------------------------------------

    def _combine(self, other: "BarcodeQcCounter") -> None:
        """Combine the metrics from another BarcodeQcCounter object.

        Args:
            other (BarcodeQcCounter): Another BarcodeQcCounter object
                whose metrics will be combined with this object.
        """
        # Combine _metrics dictionaries
        for r1_transposon_edit_dist, r1_primer_dict in other.metrics.items():
            for r1_primer_seq, r2_transposon_dict in r1_primer_dict.items():
                for r2_transposon_seq, r2_restriction_enzyme_dict in \
                        r2_transposon_dict.items():
                    for r2_restriction_enzyme_name, count in \
                            r2_restriction_enzyme_dict.items():
                        (self._metrics
                         [r1_transposon_edit_dist]
                         [r1_primer_seq]
                         [r2_transposon_seq]
                         [r2_restriction_enzyme_name]) += count

        # Combine _r1_transposon_seq_dict dictionaries
        for r1_transposon_edit_dist, r1_transposon_seq_set in \
                other.r1_transposon_dict.items():
            self._r1_transposon_seq_dict[r1_transposon_edit_dist]\
                .update(r1_transposon_seq_set)

    # public methods ----------------------------------------------------------
    def load(self, file_path: str) -> None:
        """Load a BarcodeQcCounter object from a file using Pickle.

        Args:
            file_path (str): The file path where the object is stored.
        """
        logger.info("loading BarcodeQcCounter object from %s", file_path)
        with open(file_path, "rb") as file:
            file_data = pickle.load(file)
            if not isinstance(file_data, BarcodeQcCounter):
                raise TypeError(
                    f"{file_path} is not a BarcodeQcCounter object")
            # copy the data from the loaded object to the current instance
            self._metrics = file_data._metrics
            self._r1_transposon_seq_dict = file_data._r1_transposon_seq_dict

    @classmethod
    def combine(
            cls, counters: Iterable["BarcodeQcCounter"]) -> "BarcodeQcCounter":
        """Combine multiple BarcodeQcCounter objects into a single object.

        Args:
            counters (Iterable[BarcodeQcCounter]): An iterable of
                BarcodeQcCounter objects.

        Returns:
            BarcodeQcCounter: A new BarcodeQcCounter object with the
                combined metrics.
        """
        result = BarcodeQcCounter()

        for counter in counters:
            result._combine(counter)

        return result

    def __add__(self, other: "BarcodeQcCounter") -> "BarcodeQcCounter":
        """Add two BarcodeQcCounter objects together with the + operator."""
        if not isinstance(other, BarcodeQcCounter):
            raise TypeError("Both objects must be of type 'BarcodeQcCounter'")

        result = BarcodeQcCounter()
        result.combine(self)
        result.combine(other)
        return result

    def update(self,
               component_tuple: tuple,
               r1_transposon_edit_dist: int,
               r2_restriction_enzyme_name: str) -> None:
        """Updates the metrics with given component and deviation tuples.

        Args:
            component_tuple (tuple): A tuple containing R1 primer,
                R1 transposon, and R2 transposon sequences.
            r1_transposon_edit_dist (int): The edit distance between the
                R1 transposon sequence and the expected R1 transposon
            r2_restriction_enzyme_name (str): The R2 restriction enzyme name.
        """
        (r1_primer_seq,
         r1_transposon_seq,
         r2_transposon_seq) = component_tuple

        (self._metrics
         [r1_transposon_edit_dist]
         [r1_primer_seq]
         [r2_transposon_seq]
         [r2_restriction_enzyme_name]) += 1

        self._r1_transposon_seq_dict[r1_transposon_edit_dist]\
            .add(r1_transposon_seq)

    def _summarize_by_tf(self, component_dict: dict) -> None:
        """Summarizes the metrics by transcription factor (TF).

        Args:
            component_dict (dict): A dictionary containing keys for 
                'tf', 'r1_primers', and 'r2_transposons', and their 
                respective lists of values.

        Returns:
            tuple: A tuple containing R1 primer summary and R2 
                transposon summary.
        """
        #
        r1_primer_summary = []
        r2_transposon_summary = []
        # only iterate over those reads which had an r1 transposon seq
        # edit distance of n or less

        # r1_for_given_r2_dict = defaultdict(lambda: defaultdict(set))
        r1_for_given_r2_dict = MiddleDefaultDict1(set)
        for i, r1_transposon_dict in self._metrics.items():
            # first level of iteration is over the r1 primer keys.
            # The dictionary is a nested dictionary with the keys being
            # r2_transposon sequences and values another dicitonary with
            # the restriciton enzyme and count
            for r1_primer_seq, r1_primer_dict in r1_transposon_dict.items():
                # if the r1 primer sequence is the expected sequence
                # for a given tf, then iterate over the r2 transposon
                # entries and record the results
                for r2_transposon_seq, r2_transposon_seq_dict in \
                        r1_primer_dict.items():
                    # if the r2_transposon_seq is recognized, then save the
                    # r1_primer_seq. structure of the dict is:
                    # {'valid_r2_trans_seq': set(r1_primer_seq1, ...)}
                    if r2_transposon_seq in \
                            component_dict['r2_transposon']:
                        (r1_for_given_r2_dict
                         [i]
                         [r2_transposon_seq]
                         .add(r1_primer_seq))
                    # if the r1_primer_seq is an expected sequence, then
                    # iterate over the r2_transposon_seq_dict and record the
                    # results
                    if r1_primer_seq in component_dict['r1_primer']:
                        r1_primer_index = \
                            component_dict['r1_primer'].index(r1_primer_seq)
                        r2_transposon_target_seq = \
                            component_dict['r2_transposon'][r1_primer_index]
                        edit_dist = \
                            align(
                                r2_transposon_seq,
                                r2_transposon_target_seq)
                        r1_primer_record = {
                            "tf": component_dict['tf'][r1_primer_index],
                            "r1_primer_seq":
                            component_dict['r1_primer'][r1_primer_index],
                            "r1_transposon_edit_dist": i,
                            "r2_transposon_seq": r2_transposon_seq,
                            "r2_transposon_edit_dist":
                            edit_dist.get("editDistance")}
                        for restriction_enzyme, count in \
                                r2_transposon_seq_dict.items():
                            record_copy = r1_primer_record.copy()
                            record_copy.update({
                                'restriction_enzyme':
                                restriction_enzyme,
                                'count': count})
                            r1_primer_summary.append(record_copy)

        # in the second iteration, iterate over only those r1_primer_seqs with
        # a valid r2_transposon_seq
        for r1_transposon_ed, r1_transposon_ed_dict in \
                r1_for_given_r2_dict.items():
            for r2_transposon_seq, r1_primer_seq_set in \
                    r1_transposon_ed_dict.items():
                # extract the TF and expected r1_primer sequence for this
                # r2_transposon_seq and TF
                index = component_dict['r2_transposon']\
                    .index(r2_transposon_seq)
                tf = component_dict['tf'][index]
                r1_primer_expected = component_dict["r1_primer"][index]
                # iterate over all of the `r1_primer_seq` for this
                # r2_transposon_seq
                for r1_primer_query in r1_primer_seq_set:
                    # align the r1_primer to the expected r1_primer for this
                    # r2_transposon_seq and TF
                    edit_dist = align(
                        r1_primer_query,
                        r1_primer_expected)
                    # create the base record
                    r2_transposon_record = {
                        "tf": tf,
                        "r2_transposon_seq": r2_transposon_seq,
                        "r1_transposon_edit_dist": r1_transposon_ed,
                        "r1_primer_seq": r1_primer_query,
                        "r1_primer_edit_dist":
                        edit_dist.get("editDistance")}
                    for restriction_enzyme, count in \
                        (self._metrics
                         [r1_transposon_ed]
                         [r1_primer_query]
                         [r2_transposon_seq]
                         .items()):
                        # make a copy of the record
                        record_copy = r2_transposon_record.copy()
                        # add additional restriction enzyme info
                        record_copy.update({
                            'restriction_enzyme':
                            restriction_enzyme,
                            'count': count})
                        r2_transposon_summary.append(record_copy)

        return r1_primer_summary, r2_transposon_summary

    def write(self,
              raw: bool = False,
              component_dict: dict = None,
              output_dirpath: str = ".",
              filename: str = "barcode_qc",
              suffix: str = "") -> None:
        """Write a pickle and/or a comma-delimited file summarizing the
        barcode QC metrics.

        Args:
            raw (bool, optional): If True, pickles the object.
                Defaults to False.
            component_dict (dict, optional): A dictionary containing keys
                for 'tf', 'r1_primers', and 'r2_transposons', and their
                respective lists of values. If provided, writes summaries
                for each component. Defaults to None.
            output_dirpath (str, optional): The output directory path where
                the files will be saved. Defaults to the current directory.
            filename (str, optional): The base filename for the output files.
                Defaults to "barcode_qc".
            suffix (str, optional): A suffix to be appended to the base
                filename. Defaults to an empty string.
        """
        # check that the output_dirpath is a valid directory
        if not os.path.join(output_dirpath):
            raise ValueError("output_dirpath must be a valid directory")
        # if raw is true, then pickle the object
        if raw:
            pickle_path = os.path.join(
                output_dirpath, filename + '_' + suffix + ".pickle")
            logger.info("pickling barcode_qc object to %s{pick_path}")
            with open(pickle_path, "wb") as pickle_file:
                pickle.dump(self, pickle_file)

        # if component_dict is passed
        if component_dict:
            # input checks
            if not isinstance(component_dict, dict):
                raise TypeError("component_dict must be a dictionary")
            if not {'tf', 'r1_primer', 'r2_transposon'} == \
                    set(list(component_dict.keys())):
                raise ValueError("component_dict must be a dictionary "
                                 "where the keys are 'tf', 'r1_primers', "
                                 "'r2_transposons' and the values are "
                                 "lists of the same length. The index of "
                                 "each list corresponds to the same "
                                 "transcription factor.")
            for k, v in component_dict.items():
                if not isinstance(v, list):
                    raise TypeError("component_dict values must be lists")
            if len({len(x) for x in component_dict.values()}) != 1:
                raise ValueError("component_dict values must be lists of "
                                 "the same length")
            # extract summaries from the metrics
            r1_primer_summary, r2_transposon_summary = \
                self._summarize_by_tf(component_dict)

            # write r1_primer_summary to file
            append_suffix = '_' + suffix if suffix else ''
            r1_primer_basename = \
                filename + "_r1_primer_summary" + append_suffix + ".csv"
            r1_primer_summary_path = os.path.join(
                output_dirpath, r1_primer_basename)
            r1_primer_summary_df = pd.DataFrame(r1_primer_summary)
            logger.info("writing r1_primer_summary "
                        "to %s{r1_primer_summary_path}")
            r1_primer_summary_df.to_csv(r1_primer_summary_path, index=False)

            # write r2_transposon summary to file
            r2_transposon_summary_basename = \
                filename + "_r2_transposon_summary" + append_suffix + ".csv"
            r2_transposon_summary_path = os.path.join(
                output_dirpath, r2_transposon_summary_basename)
            r2_transposon_summary_df = pd.DataFrame(r2_transposon_summary)
            logger.info("writing r2_transposon_summary "
                        "to %s{r2_transposon_summary_path}")
            r2_transposon_summary_df.to_csv(
                r2_transposon_summary_path, index=False)

metrics: defaultdict property

Returns the _metrics attribute.

Returns:

Name Type Description
defaultdict defaultdict

The _metrics attribute.

r1_transposon_dict: defaultdict property

Returns the _r1_transposon_seq_dict attribute.

Returns:

Name Type Description
defaultdict defaultdict

the _r1_transposon_seq_dict attribute.

__add__(other)

Add two BarcodeQcCounter objects together with the + operator.

Source code in callingcardstools/BarcodeParser/yeast/BarcodeQcCounter.py
163
164
165
166
167
168
169
170
171
def __add__(self, other: "BarcodeQcCounter") -> "BarcodeQcCounter":
    """Add two BarcodeQcCounter objects together with the + operator."""
    if not isinstance(other, BarcodeQcCounter):
        raise TypeError("Both objects must be of type 'BarcodeQcCounter'")

    result = BarcodeQcCounter()
    result.combine(self)
    result.combine(other)
    return result

__init__(pickle_path=None)

Initializes a BarcodeQcCounter instance.

Parameters:

Name Type Description Default
pickle_path str

Path to a pickled BarcodeQcCounter object. If provided, loads the object from the file. Defaults to None.

None

Raises:

Type Description
FileNotFoundError

If the provided pickle path does not exist.

Source code in callingcardstools/BarcodeParser/yeast/BarcodeQcCounter.py
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
def __init__(self, pickle_path: str = None) -> None:
    """Initializes a BarcodeQcCounter instance.

    Args:
        pickle_path (str, optional): Path to a pickled BarcodeQcCounter
            object. If provided, loads the object from the file.
            Defaults to None.

    Raises:
        FileNotFoundError: If the provided pickle path does not exist.
    """
    if pickle_path:
        if not os.path.exists(pickle_path):
            msg = f"Path to pickle file {pickle_path} does not exist"
            raise FileNotFoundError(msg)
        self.load(pickle_path)
    else:
        self._metrics = OuterDefaultDict(int)
        self._r1_transposon_seq_dict = defaultdict(set)

combine(counters) classmethod

Combine multiple BarcodeQcCounter objects into a single object.

Parameters:

Name Type Description Default
counters Iterable[BarcodeQcCounter]

An iterable of BarcodeQcCounter objects.

required

Returns:

Name Type Description
BarcodeQcCounter BarcodeQcCounter

A new BarcodeQcCounter object with the combined metrics.

Source code in callingcardstools/BarcodeParser/yeast/BarcodeQcCounter.py
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
@classmethod
def combine(
        cls, counters: Iterable["BarcodeQcCounter"]) -> "BarcodeQcCounter":
    """Combine multiple BarcodeQcCounter objects into a single object.

    Args:
        counters (Iterable[BarcodeQcCounter]): An iterable of
            BarcodeQcCounter objects.

    Returns:
        BarcodeQcCounter: A new BarcodeQcCounter object with the
            combined metrics.
    """
    result = BarcodeQcCounter()

    for counter in counters:
        result._combine(counter)

    return result

load(file_path)

Load a BarcodeQcCounter object from a file using Pickle.

Parameters:

Name Type Description Default
file_path str

The file path where the object is stored.

required
Source code in callingcardstools/BarcodeParser/yeast/BarcodeQcCounter.py
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
def load(self, file_path: str) -> None:
    """Load a BarcodeQcCounter object from a file using Pickle.

    Args:
        file_path (str): The file path where the object is stored.
    """
    logger.info("loading BarcodeQcCounter object from %s", file_path)
    with open(file_path, "rb") as file:
        file_data = pickle.load(file)
        if not isinstance(file_data, BarcodeQcCounter):
            raise TypeError(
                f"{file_path} is not a BarcodeQcCounter object")
        # copy the data from the loaded object to the current instance
        self._metrics = file_data._metrics
        self._r1_transposon_seq_dict = file_data._r1_transposon_seq_dict

update(component_tuple, r1_transposon_edit_dist, r2_restriction_enzyme_name)

Updates the metrics with given component and deviation tuples.

Parameters:

Name Type Description Default
component_tuple tuple

A tuple containing R1 primer, R1 transposon, and R2 transposon sequences.

required
r1_transposon_edit_dist int

The edit distance between the R1 transposon sequence and the expected R1 transposon

required
r2_restriction_enzyme_name str

The R2 restriction enzyme name.

required
Source code in callingcardstools/BarcodeParser/yeast/BarcodeQcCounter.py
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
def update(self,
           component_tuple: tuple,
           r1_transposon_edit_dist: int,
           r2_restriction_enzyme_name: str) -> None:
    """Updates the metrics with given component and deviation tuples.

    Args:
        component_tuple (tuple): A tuple containing R1 primer,
            R1 transposon, and R2 transposon sequences.
        r1_transposon_edit_dist (int): The edit distance between the
            R1 transposon sequence and the expected R1 transposon
        r2_restriction_enzyme_name (str): The R2 restriction enzyme name.
    """
    (r1_primer_seq,
     r1_transposon_seq,
     r2_transposon_seq) = component_tuple

    (self._metrics
     [r1_transposon_edit_dist]
     [r1_primer_seq]
     [r2_transposon_seq]
     [r2_restriction_enzyme_name]) += 1

    self._r1_transposon_seq_dict[r1_transposon_edit_dist]\
        .add(r1_transposon_seq)

write(raw=False, component_dict=None, output_dirpath='.', filename='barcode_qc', suffix='')

Write a pickle and/or a comma-delimited file summarizing the barcode QC metrics.

Parameters:

Name Type Description Default
raw bool

If True, pickles the object. Defaults to False.

False
component_dict dict

A dictionary containing keys for ‘tf’, ‘r1_primers’, and ‘r2_transposons’, and their respective lists of values. If provided, writes summaries for each component. Defaults to None.

None
output_dirpath str

The output directory path where the files will be saved. Defaults to the current directory.

'.'
filename str

The base filename for the output files. Defaults to “barcode_qc”.

'barcode_qc'
suffix str

A suffix to be appended to the base filename. Defaults to an empty string.

''
Source code in callingcardstools/BarcodeParser/yeast/BarcodeQcCounter.py
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
def write(self,
          raw: bool = False,
          component_dict: dict = None,
          output_dirpath: str = ".",
          filename: str = "barcode_qc",
          suffix: str = "") -> None:
    """Write a pickle and/or a comma-delimited file summarizing the
    barcode QC metrics.

    Args:
        raw (bool, optional): If True, pickles the object.
            Defaults to False.
        component_dict (dict, optional): A dictionary containing keys
            for 'tf', 'r1_primers', and 'r2_transposons', and their
            respective lists of values. If provided, writes summaries
            for each component. Defaults to None.
        output_dirpath (str, optional): The output directory path where
            the files will be saved. Defaults to the current directory.
        filename (str, optional): The base filename for the output files.
            Defaults to "barcode_qc".
        suffix (str, optional): A suffix to be appended to the base
            filename. Defaults to an empty string.
    """
    # check that the output_dirpath is a valid directory
    if not os.path.join(output_dirpath):
        raise ValueError("output_dirpath must be a valid directory")
    # if raw is true, then pickle the object
    if raw:
        pickle_path = os.path.join(
            output_dirpath, filename + '_' + suffix + ".pickle")
        logger.info("pickling barcode_qc object to %s{pick_path}")
        with open(pickle_path, "wb") as pickle_file:
            pickle.dump(self, pickle_file)

    # if component_dict is passed
    if component_dict:
        # input checks
        if not isinstance(component_dict, dict):
            raise TypeError("component_dict must be a dictionary")
        if not {'tf', 'r1_primer', 'r2_transposon'} == \
                set(list(component_dict.keys())):
            raise ValueError("component_dict must be a dictionary "
                             "where the keys are 'tf', 'r1_primers', "
                             "'r2_transposons' and the values are "
                             "lists of the same length. The index of "
                             "each list corresponds to the same "
                             "transcription factor.")
        for k, v in component_dict.items():
            if not isinstance(v, list):
                raise TypeError("component_dict values must be lists")
        if len({len(x) for x in component_dict.values()}) != 1:
            raise ValueError("component_dict values must be lists of "
                             "the same length")
        # extract summaries from the metrics
        r1_primer_summary, r2_transposon_summary = \
            self._summarize_by_tf(component_dict)

        # write r1_primer_summary to file
        append_suffix = '_' + suffix if suffix else ''
        r1_primer_basename = \
            filename + "_r1_primer_summary" + append_suffix + ".csv"
        r1_primer_summary_path = os.path.join(
            output_dirpath, r1_primer_basename)
        r1_primer_summary_df = pd.DataFrame(r1_primer_summary)
        logger.info("writing r1_primer_summary "
                    "to %s{r1_primer_summary_path}")
        r1_primer_summary_df.to_csv(r1_primer_summary_path, index=False)

        # write r2_transposon summary to file
        r2_transposon_summary_basename = \
            filename + "_r2_transposon_summary" + append_suffix + ".csv"
        r2_transposon_summary_path = os.path.join(
            output_dirpath, r2_transposon_summary_basename)
        r2_transposon_summary_df = pd.DataFrame(r2_transposon_summary)
        logger.info("writing r2_transposon_summary "
                    "to %s{r2_transposon_summary_path}")
        r2_transposon_summary_df.to_csv(
            r2_transposon_summary_path, index=False)