Skip to content

chipexo_promoter_sig

Find the promoter signature of the chipexo data. This is calculated as the most significant peak in each promoter region.

Parameters:

Name Type Description Default
chipexo_data_path str

path to the chipexo allevents file.

required
chipexo_orig_chr_convention str

chromosome convention of the chipexo allevents file.

required
promoter_data_path str

path to the promoter data file.

required
promoter_orig_chr_convention str

chromosome convention of the promoter data file.

required
chrmap_data_path str

path to the chromosome map file.

required
unified_chr_convention str

chromosome convention to convert to.

required

Returns:

Type Description
DataFrame

pandas.DataFrame: A pandas DataFrame containing the promoter signature of the chipexo data.

Example

import pandas as pd import tempfile

Create temporary chipexo data file

with tempfile.NamedTemporaryFile(mode=’w+’, … suffix=’.tsv’) as chipexo_file: … _ = chipexo_file.write(‘chr\tcoord\tYPD_log2Fold\t’ … ‘ YPD_log2P\nchr1\t150\t2.0\t0.05\n’)

Create temporary promoter data file

with tempfile.NamedTemporaryFile(mode=’w+’, … suffix=’.tsv’) as promoter_file: … _ = promoter_file.write(‘chr\tstart\tend\t’ … ‘associated_feature\nchr1\t100\t’ … ‘200\tpromoter1\n’)

Create temporary chromosome map file

with tempfile.NamedTemporaryFile(mode=’w+’, … suffix=’.tsv’) as chrmap_file: … - = chrmap_file.write(‘chr\tucsc\nchr1\tchr1\n’)

Call the function

result = chipexo_promoter_sig(chipexo_file.name, ‘chr’, … promoter_file.name, ‘chr’, … chrmap_file.name, ‘ucsc’) isinstance(result, pd.DataFrame) True

Source code in callingcardstools/Analysis/yeast/chipexo_promoter_sig.py
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
def chipexo_promoter_sig(chipexo_data_path: str,
                         chipexo_orig_chr_convention: str,
                         promoter_data_path: str,
                         promoter_orig_chr_convention: str,
                         chrmap_data_path: str,
                         unified_chr_convention: str) -> pd.DataFrame:
    """
    Find the promoter signature of the chipexo data. This is calculated as
    the most significant peak in each promoter region.

    Args:
        chipexo_data_path (str): path to the chipexo allevents file.
        chipexo_orig_chr_convention (str): chromosome convention of the
            chipexo allevents file.
        promoter_data_path (str): path to the promoter data file.
        promoter_orig_chr_convention (str): chromosome convention of the
            promoter data file.
        chrmap_data_path (str): path to the chromosome map file.
        unified_chr_convention (str): chromosome convention to convert to.

    Returns:
        pandas.DataFrame: A pandas DataFrame containing the promoter
            signature of the chipexo data.

    Example:
        >>> import pandas as pd
        >>> import tempfile
        >>> # Create temporary chipexo data file
        >>> with tempfile.NamedTemporaryFile(mode='w+',
        ...                                  suffix='.tsv') as chipexo_file:
        ...     _ = chipexo_file.write('chr\\tcoord\\tYPD_log2Fold\\t'
        ...                        ' YPD_log2P\\nchr1\\t150\\t2.0\\t0.05\\n')
        >>> # Create temporary promoter data file
        >>> with tempfile.NamedTemporaryFile(mode='w+',
        ...                                  suffix='.tsv') as promoter_file:
        ...     _ = promoter_file.write('chr\\tstart\\tend\\t'
        ...                         'associated_feature\\nchr1\\t100\\t'
        ...                         '200\\tpromoter1\\n')
        >>> # Create temporary chromosome map file
        >>> with tempfile.NamedTemporaryFile(mode='w+',
        ...                                  suffix='.tsv') as chrmap_file:
        ...     - = chrmap_file.write('chr\\tucsc\\nchr1\\tchr1\\n')
        >>> # Call the function
        >>> result = chipexo_promoter_sig(chipexo_file.name, 'chr',
        ...                               promoter_file.name, 'chr',
        ...                               chrmap_file.name, 'ucsc')
        >>> isinstance(result, pd.DataFrame)
        True
    """
    # read in chrmap data
    chrmap_df = read_in_chrmap(chrmap_data_path,
                               {chipexo_orig_chr_convention,
                                promoter_orig_chr_convention,
                                unified_chr_convention})
    # read in promoter data
    promoter_df = read_in_promoter_data(promoter_data_path,
                                        promoter_orig_chr_convention,
                                        unified_chr_convention,
                                        chrmap_df)
    # read in chipexo data
    chipexo_df = read_in_chipexo_data(chipexo_data_path,
                                      chipexo_orig_chr_convention,
                                      unified_chr_convention,
                                      chrmap_df)

    # Step 1: Inner Join
    return pd.merge(promoter_df, chipexo_df,
                    on='chr',
                    how='inner')\
        .query('start <= chipexo_start <= end')\
        .groupby(['chr', 'start', 'end', 'name', 'strand'])\
        .agg(
        n_sig_peaks=pd.NamedAgg(column='chr',
                                aggfunc='count'),
        max_fc=pd.NamedAgg(column='YPD_log2Fold',
                           aggfunc='max'),
        min_pval=pd.NamedAgg(column='YPD_log2P',
                             aggfunc='min'))\
        .reset_index()