CFAV_short_read_size

Get size dist of CFAV mapped, short reads

Input CFAV-mapped reads are in the zipped file “CFAV_beds.zip” in my Github

Processed reads were mapped to:
NC_001564.2 Cell fusing agent virus (CFAV) strain Galveston
See Processing_to_matrix script for details on how these beds were obtained

Dir <- "/Users/kathryn/Reprocess_all_paper_datasets/CFAV_mapped/CFAV_beds"
beds <- dir(Dir, pattern="*.bed$",full.names = TRUE)
cat_beds <- grep("cat*", beds, value = TRUE)

cat <- lapply(cat_beds, import, format = "BED")
names(cat) <- gsub("/Users/kathryn/Reprocess_all_paper_datasets/CFAV_mapped/CFAV_beds/", "", cat_beds)
names(cat) <- gsub(".bed", "", names(cat))
CFAV_width <-lapply(cat, function(x) data.frame(width = width(x), strand = x@strand))
CFAV_df <- rbindlist(CFAV_width, idcol = TRUE)
CFAV_df <- setNames(CFAV_df, c("sample", "width", "strand"))
CFAV_df$sample <- paste0(CFAV_df$sample, "_CFAV_width")
CFAV_df <- CFAV_df %>% group_by(.dots=c("sample","width", "strand")) %>% mutate(count = n()) %>% summarise_all(dplyr::first)

CFAV_df_short <- CFAV_df %>% filter(width < 25)

CFAV_df_short<- CFAV_df_short %>% group_by(sample) %>% mutate(nrow = sum(count), percent=count/nrow)

CFAV_df_short$percent <- ifelse(CFAV_df_short$strand=="-", CFAV_df_short$percent* -1, CFAV_df_short$percent*1)

##very low abundance of CFAV-mapped aegypti reads, doesn't make sense to include

Now for host mapped, short reads

These input bedfiles were too large for Github, but are available upon request

Dir <- "/Users/kathryn/Reprocess_all_paper_datasets/AaegL5_mapped/cat_beds"
beds <- dir(Dir, pattern="*.bed$",full.names = TRUE)
cat_beds <- grep("cat*", beds, value = TRUE)
cat_beds <- grep("Ago1|Ago2", cat_beds, value = TRUE)
cat <- lapply(cat_beds, import, format = "BED")
names(cat) <- gsub("/Users/kathryn/Reprocess_all_paper_datasets/AaegL5_mapped/cat_beds/", "", cat_beds)
names(cat) <- gsub(".bed", "", names(cat))
AaeL5_width <-lapply(cat, function(x) data.frame(width = width(x), strand = x@strand))
AaegL5_df <- rbindlist(AaeL5_width, idcol = TRUE)
AaegL5_df <- setNames(AaegL5_df, c("sample", "width", "strand"))
AaegL5_df$sample <- paste0(AaegL5_df$sample, "_AaegL5_width")
AaegL5_df <- AaegL5_df %>% group_by(.dots=c("sample","width", "strand")) %>% mutate(count = n()) %>% summarise_all(dplyr::first)

AaegL5_df_short <- AaegL5_df %>% filter(width < 25)
AaegL5_df_short <- AaegL5_df_short %>% filter(width > 17) #don't have any CFAV reads less than 18nt

AaegL5_df_short <- AaegL5_df_short %>% group_by(sample) %>% mutate(nrow = sum(count), percent=count/nrow)
AaegL5_df_short$percent <- ifelse(AaegL5_df_short$strand=="-", AaegL5_df_short$percent* -1, AaegL5_df_short$percent*1)

short_mapped <- rbind(AaegL5_df_short, CFAV_df_short)
cells_short <- short_mapped[grepl("Aag2", short_mapped$sample),]

#write.table(cells_short, "/Users/kathryn/Reprocess_all_paper_datasets/Supp_tables/CFAV_AaegL5_short_size_strand_distribution.txt", col.names = TRUE, row.names = TRUE, sep = "\t", quote = FALSE)

output data table for graphing, “CFAV_AaegL5_short_size_strand_distribution.txt” is provided on my Github

Figure 6D

cells_short <- read.delim("/Users/kathryn/Reprocess_all_paper_datasets/Supp_tables/CFAV_AaegL5_short_size_strand_distribution.txt", header = TRUE, sep = "\t")

ggplot(cells_short, aes(x = width, y = percent, fill=sample, color=sample)) + geom_bar(stat = "identity", position=position_dodge()) +  scale_fill_manual(values = c("white", "white", "#1B0B80", "#8A0F09")) + scale_colour_manual(values = c("#1B0B80", "#8A0F09", "black", "black")) + theme_bw()

CFAV_short_read_size_length

KRG

8/3/2020

Get size dist of CFAV mapped, short reads

Now for host mapped, short reads

Figure 6D