Tutorial 2: Annotating the In situ metabonomics data

import os
import numpy as np
from scipy.stats import gaussian_kde
from CONTINUED.annotation import *
import pandas as pd

os.chdir('/data/yuchen_data/desi_scripts/data/annotation_data/result/')

work_dir = '/data/yuchen_data/desi_scripts/data/annotation_data/combined'
output_prefix = '/data/yuchen_data/desi_scripts/data/annotation_data/result/colon_cancer_desi_'

input_lipid = '/data/yuchen_data/desi_scripts/data/annotation_data/20210930.Lipid.8_samples.uniq.txt'
input_small_mol = '/data/yuchen_data/desi_scripts/data/annotation_data/20220107.combined.small_molecule.neg.uniq.txt'

input_sample_list = '/data/yuchen_data/desi_scripts/data/annotation_data/sample.list.selected.txt'
mass_cutoff = 0.02

Step1: Parse DESI data and LC-MS data

sample_mass, mass_sample, mass = Parsing_Mass_Table(input_sample_list, work_dir)
lipid = Parsing_Lipid(input_lipid)
small_mol = Parsing_Small_Molecule(input_small_mol)

Step2: Generate a file named ‘mass_dis_in_samples.txt’

output_sample_mass = 'mass_dis_in_samples.txt'
Print_Mass_Diff_By_Samples(sample_mass, output_sample_mass)

Step3: Utilize kde to clustering all m/z

mass_index_group = Group_Mass(mass, lipid, small_mol, mass_cutoff)
mass_clustered = Clustering_Mass_by_KDE(mass_index_group, lipid, small_mol, mass_cutoff)

Step4: Generate the file ‘colon_cancer_desi_.clustered_mass.table.with.anno.txt’ that recoded the annotation information for all m/z across all samples

Print_Clustered_Mass_By_Sample(mass_clustered, mass_sample, lipid, small_mol, output_prefix)
# Each row represents an LC-MS annotated metabolite, each column represents a sample, and each cell indicates whether an m/z value in that sample has been annotated as the corresponding metabolite. If it has, the cell value is the m/z for that sample; if not, the cell value is NaN.
df = pd.read_csv('colon_cancer_desi_.clustered_mass.table.with.anno.txt', index_col=0, sep='\t')
df.head()
ST06_20210716 ST06_20211019 ST08_20211019 ST103_20210718 ST109_20210330 ST114_20210730 ST118_20211222 ST121_20210806 ST124_20211223 ST129_20201210 ... ST73_20210728_mass ST73_20210729_mass ST84_20211223_mass ST87_20210331_mass ST88_20210331_mass ST91_20210406_mass ST98_20210715_mass ST98_20210804_mass anno_lipid anno_small_mol
Index
1 0 0 0 0 0 0 0 0 0 0 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN 71.0133;C3 H4 O2;H;Acrylic acid
2 0 0 0 0 0 0 0 0 0 0 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN 74.02421;C2 H5 N O2;H;Glycine
3 0 0 0 0 0 0 0 0 0 1 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN 78.91830999999999;H Br;H;Hydrogen bromide
4 0 0 0 0 0 0 0 0 0 0 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN 79.95662999999999;None;H;None
5 0 0 0 0 0 0 0 0 0 0 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

5 rows × 66 columns