Arabidopsis_thaliana_TSSs.rds
, located in
the Datasets
folder, and assign it to a variable named
tss
.GenomicRanges
object representing the
transcription start site (TSS) coordinates of Arabidopsis
thaliana transcripts.tss = readRDS("Exercises/Datasets/Arabidopsis_thaliana_TSSs.rds")
library(GenomicRanges)
seqnames(tss)
## factor-Rle of length 41671 with 7 runs
## Lengths: 10646 6855 8219 6384 9288 146 133
## Values : 1 2 3 4 5 Mt Pt
## Levels(7): 1 2 3 4 5 Mt Pt
tss
, keeping only sequences from sequence 1, and
reassign the result to the variable tss
.tss = tss[seqnames(tss) == 1]
1050
bases
wide, but only by extending the start position.tss
.tss = resize(tss,width = 1050, fix = "end")
50
bases and reassign the result to
the variable tss
.tss = shift(tss, shift = 50)
tss
.tss = reduce(tss)
Arabidopsis_casual_regions.bed
,
located in the Datasets
folder.library(rtracklayer)
bed = import.bed("Exercises/Datasets/Arabidopsis_casual_regions.bed")
Arabidopsis_casual_regions.bed.
Then:OV = findOverlaps(tss, bed)
OV_TSS = tss[queryHits(OV)]
OV_BED = bed[subjectHits(OV)]
OV_MERGE = c(OV_TSS,OV_BED)
export.bed(OV_MERGE, "Bed_file_overlaps.bed")
BSgenome Object
for Arabidopsis
thaliana (use BSgenome.Athaliana.TAIR.TAIR9
).GenomicRanges Object
with the following
values:chr = c("Chr5", "Chr1", "Chr4")
start = c(129999, 340, 23456)
end = c(130000, 400, 23470)
Extract the base sequences of the genomic intervals contained in this
GenomicRanges object
and assign them to the variable
sequences
.What type of object do you obtain?
#BiocManager::install("BSgenome.Athaliana.TAIR.TAIR9")
library(BSgenome.Athaliana.TAIR.TAIR9)
intervals = IRanges(start = c(129999, 340, 23456)
, end = c(130000, 400, 23470))
GR = GRanges(seqnames = c("Chr5", "Chr1", "Chr4")
, ranges = intervals)
GR
## GRanges object with 3 ranges and 0 metadata columns:
## seqnames ranges strand
## <Rle> <IRanges> <Rle>
## [1] Chr5 129999-130000 *
## [2] Chr1 340-400 *
## [3] Chr4 23456-23470 *
## -------
## seqinfo: 3 sequences from an unspecified genome; no seqlengths
sequences = getSeq(BSgenome.Athaliana.TAIR.TAIR9, GR)
sequences
## DNAStringSet object of length 3:
## width seq
## [1] 2 AT
## [2] 61 TTGTAACCTTAGGGTTGGTTTATCTCAAGAATCTTATTAATTGTTTGGACTGTTTATGTTT
## [3] 15 AGGAAGTTGTCCCAG
sequences
.reverseComplement(sequences)
## DNAStringSet object of length 3:
## width seq
## [1] 2 AT
## [2] 61 AAACATAAACAGTCCAAACAATTAATAAGATTCTTGAGATAAACCAACCCTAAGGTTACAA
## [3] 15 CTGGGACAACTTCCT
GenomicRanges Object
using the following
coordinates:chrom = c("chr1", "chr2", "chrM")
start = c(1000000; 120000; 340000)
end = c(10000000; 560000; 98000)
What do you obtain? Why?
library(IRanges)
intervals = IRanges(start = c(1000000, 120000, 340000)
, end = c(10000000, 560000, 98000)
)
library(GenomicRanges)
GR = GRanges(seqnames = c("chr1", "chr2", "chrM")
, ranges = intervals)
GR
chrom = c("chr1", "chr2", "chrM")
start = c(1000000, 120000, 340000)
end = c(10000000, 560000, 9,800000)
intervals = IRanges(start = c(1000000, 120000, 340000)
, end = c(10000000, 560000, 9800000))
GR = GRanges(seqnames = c("chr1", "chr2", "chrM")
, ranges = intervals)
GR
## GRanges object with 3 ranges and 0 metadata columns:
## seqnames ranges strand
## <Rle> <IRanges> <Rle>
## [1] chr1 1000000-10000000 *
## [2] chr2 120000-560000 *
## [3] chrM 340000-9800000 *
## -------
## seqinfo: 3 sequences from an unspecified genome; no seqlengths
GenomicRanges Object
,
choosing among the allowed symbols.strand(GR) <- c("*", "+", "-")
Gene_1
,
Gene_2
, and Gene_3
.names(GR) <- c("Gene_1", "Gene_2" , "Gene_3")