Wrapper function for ctk's tag2collapse
ctk_tag2collapse(
filesToRun,
outFile = file.path(dirname(filesToRun), paste0("TC_", basename(filesToRun))),
sb = "tag2collapse.pl",
perl = "perl",
PATHTOPERLLIB = NULL,
keepMaxScore = TRUE,
keepTagName = TRUE,
weight = TRUE,
bigFile = FALSE,
weightInName = TRUE,
randomBarcode = TRUE,
seqErrorModel = "em-local",
outputSeqError = NULL,
em = NULL,
stderr = file.path(dirname(fileToRun), paste0(basename(fileToRun),
"_ctk_tag2collapse_stderr.txt")),
stdout = file.path(dirname(fileToRun), paste0(basename(fileToRun),
"_ctk_tag2collapse_stdout.txt")),
useClipRConda = ifelse(is.null(getOption("CLIPflexR.condaEnv")), FALSE, TRUE),
additional_Args = NULL,
verbose = FALSE,
writelog = T
)
path to file to process (BED).
path to output file
path to tag2collapse.pl from CTK.
path to PERL.
path to PERL5LIB.
keep the tag with the most weight (instead of the longest one) as representative.
do not change tag name (no extra information).
consider the weight of each tag.
Set to TRUE when files are big.
find weight in name.
random barcode exists, no collapse for different barcodes.
sequencing error model to use, "alignment" or "em-local" (default) or "em-global" or "fix=0.01".
output sequencing errors estimated by the EM algorithm.
EM threshold to infer reliability of each collapsed read (when have random linker, -1=no EM).
path to stdout file.
path to stdout file.
use conda environment installed by Herper, TRUE (default) or FALSE.
Additional arguments to be passed to system call.
print messages, TRUE or FALSE (default).
write stderr/stdout logs, TRUE (default) or FALSE
path to collapsed BED file.
testFasta <- system.file("extdata/hg19Small.fa",package="CLIPflexR")
myIndex <-suppressWarnings(bowtie2_index(testFasta, overwrite = TRUE))
testFQ <- system.file("extdata/Fox3_Std_small.fq.gz",package="CLIPflexR")
FqFile_FF <- ctk_fastqFilter(testFQ,qsFilter="mean:0-29:20",verbose=TRUE)
#> fastq_filter.pl command is /Users/runner/Library/r-miniconda/envs/CLIPflexR_0.1.20/bin/ctk/fastq_filter.pl
#> fastq_filter.pl arguments are /Users/runner/Library/r-miniconda/envs/CLIPflexR_0.1.20/bin/ctk/fastq_filter.pl -if sanger -of fastq -f mean:0-29:20 /Users/runner/work/_temp/Library/CLIPflexR/extdata/Fox3_Std_small.fq.gz /Users/runner/work/_temp/Library/CLIPflexR/extdata/FF_Fox3_Std_small.fq.gz
FqFile <- decompress(FqFile_FF,overwrite=TRUE)
FqFile_clipped <- fastx_clipper(FqFile,length=20)
FqFile_QF <- fastq_quality_trimmer(FqFile_clipped)
FqFile_Col <- ctk_fastq2collapse(FqFile_QF,verbose=TRUE)
#> fastq_filter.pl command is /Users/runner/Library/r-miniconda/envs/CLIPflexR_0.1.20/bin/ctk/fastq2collapse.pl
#> fastq_filter.pl arguments are /Users/runner/Library/r-miniconda/envs/CLIPflexR_0.1.20/bin/ctk/fastq2collapse.pl /Users/runner/work/_temp/Library/CLIPflexR/extdata/QT_FF_Fox3_Std_small_clip.fq /Users/runner/work/_temp/Library/CLIPflexR/extdata/Collapsed_QT_FF_Fox3_Std_small_clip.fq
FqFile_QFColStripped <- ctk_stripBarcode(FqFile_Col,linkerlength=5,inputFormat="fastq")
bam <- suppressWarnings(bowtie_align(FqFile_QFColStripped,myIndex,
overwrite=TRUE, inputFormat="fastq"))
parsedAlignment <- ctk_parseAlignment(bam)
ctk_tag2collapse(parsedAlignment,weight=FALSE,randomBarcode=FALSE,
weightInName = FALSE,verbose = TRUE)
#> tag2collapse.pl command is /Users/runner/Library/r-miniconda/envs/CLIPflexR_0.1.20/bin/ctk/tag2collapse.pl
#> tag2collapse.pl arguments are /Users/runner/Library/r-miniconda/envs/CLIPflexR_0.1.20/bin/ctk/tag2collapse.pl --keep-tag-name --keep-max-score -EM -1 --seq-error-model em-local /Users/runner/work/_temp/Library/CLIPflexR/extdata/Collapsed_QT_FF_Fox3_Std_small_clip_rm5.bed /Users/runner/work/_temp/Library/CLIPflexR/extdata/TC_Collapsed_QT_FF_Fox3_Std_small_clip_rm5.bed
#> [1] "/Users/runner/work/_temp/Library/CLIPflexR/extdata/TC_Collapsed_QT_FF_Fox3_Std_small_clip_rm5.bed"