library(scPipe) library(Rsubread) # fq_R1 should contain the transcripts # fq_R2 should contain UMI and barcodes fq_R1 = "HCATisStab_R2.fastq.gz" # Note R2 is the transcript-containing read here fq_R2 = "HCATisStab_R1.fastq.gz" # output folder where files will be generated out_dir = "output" # output file names combined_fastq = file.path(out_dir, "trimmed.fastq.gz") aligned_bam = file.path(out_dir, "aligned.bam") mapped_bam = file.path(out_dir, "aligned.mapped.bam") # scPipe requires read structures to be defined in terms of position # and lengths of barcodes and UMI sequences. # positions are 0-indexed so the first base is considered base 0 read_structure = list( bs1 = -1, # barcode start position in fq_R1, -1 indicates no barcode bl1 = 0, # barcode length in fq_R1, 0 since no barcode present bs2 = 0, # barcode start position in fq_R2 bl2 = 16, # barcode length in fq_R2 us = 16, # UMI start position in fq_R2 ul = 10 # UMI length ) # trim out and store barcode and UMI sequences in fastq header sc_trim_barcode( outfq = combined_fastq, r1 = fq_R1, r2 = fq_R2, read_structure = read_structure, filter_settings = list(rmlow = FALSE, rmN = FALSE, minq = 20, numbq = 2) ) # align reads align( index = "Index-ensembl/GRCh_v91", readfile1 = combined_fastq, output_file = aligned_bam, nthreads = 4 ) # file names of barcode and gene annotations exon_anno = "Annotation/ensembl_anno.gff3" barcode_anno = "sample_index.csv" # detect 10X barcodes and generate sample_index.csv file sc_detect_bc( infq = combined_fastq, outcsv = barcode_anno, # bacode annotation output file name bc_len = read_structure$bl2, # 16bp barcode max_reads = 5000000, # only process first 5 million reads min_count = 100 # discard cell barcodes with few than 100 hits ) # produce count matrix output/gene_count.csv and annotation statistics # from aligned bam file # resulting folder can be read into SingleCellExperiment in R using create_sce_by_dir(output) sc_count_aligned_bam( inbam = aligned_bam, outbam = mapped_bam, annofn = exon_anno, bc_len = read_structure$bl2, UMI_len = read_structure$ul, outdir = out_dir, bc_anno = barcode_anno )