Today we will focus on the exploration of hg38 assembly, HepG2 cell line (see https://depmap.org/portal/cell_line/HEPG2_LIVER?tab=mutation if you want to deepen) and MYC regulation.
TxDb.Hsapiens.UCSC.hg38.knownGene
. Plot the
density of transcripts. Use plot.type=1
.suppressWarnings(library("TxDb.Hsapiens.UCSC.hg38.knownGene"))
txdb=TxDb.Hsapiens.UCSC.hg38.knownGene
transcripts <- transcripts(txdb)
library(karyoploteR)
kp <- plotKaryotype(genome = "hg38",plot.type = 1 )
kpAddBaseNumbers(kp)
kpPlotDensity(kp, transcripts, col="plum2")
CCLE_RRBS_TSS1kb_20181022_HepG2.rds
in
Datasets/karyoploteR/
. This file contains DNA methylation
values in proximity of transcription start sites (TSS) for HepG2 cell
line. It has been retrieved from depmap @ https://depmap.org/portal/download/all/ and slighly
modified. Make a lollipop plot representing DNA
methylation values across chromosomes chr1, chr2 and chr3. Scale colors
according to values. Use plot.type=1
.met=readRDS("Datasets/karyoploteR/CCLE_RRBS_TSS1kb_20181022_HepG2.rds")
met_red=subset(met,chr%in%c("chr1", "chr2", "chr3") )
library(circlize)
color_scale=colorRamp2(quantile(met_red$HEPG2_LIVER)[c(1,3,5)], c("thistle1", "maroon1", "magenta4"))
colors=color_scale(met_red$HEPG2_LIVER)
kp <- plotKaryotype(genome = "hg38",plot.type = 1, chromosomes = c("chr1", "chr2", "chr3") )
kpAddBaseNumbers(kp)
kpPoints(kp,chr=met_red$chr, x=met_red$start, y=met_red$HEPG2_LIVER, col=colors)
kpSegments(kp,chr=met_red$chr, x0=met_red$start, x1=met_red$start, y0=0, y1=met_red$HEPG2_LIVER,col=colors)
kpAxis(kp, side=2, cex=0.7)
HEPG2_mutations.csv
in
Datasets/karyoploteR/
. This file contains mutations that
characterize HepG2 cell line. It has been retrieved from depmap @ https://depmap.org/portal/download/all/ and slightly
modified. Use plot.type=6
and plot points at mutation
location. Assign a different color to each Variant.Info
and
a shape to each Variant.Type
.mut=read.csv("Datasets/karyoploteR/HEPG2_mutations.csv")
unique(mut$Variant.Info)
## [1] "MISSENSE" "SILENT" "FRAME_SHIFT_DEL" "IN_FRAME_INS"
## [5] "SPLICE_SITE" "NONSENSE" "START_CODON_SNP" "FRAME_SHIFT_INS"
unique(mut$Variant.Type)
## [1] "SNP" "DEL" "DNP" "INS" "TNP"
map_color=c("MISSENSE"= "cyan",
"SILENT"= "white",
"FRAME_SHIFT_DEL"= "orange",
"IN_FRAME_INS" = "violet",
"SPLICE_SITE"= "pink",
"NONSENSE" = "green",
"START_CODON_SNP"= "peru",
"FRAME_SHIFT_INS"="skyblue3")
map_shape=c("SNP"=15,
"DEL"=16,
"DNP"=17,
"INS"=18,
"TNP"=19)
color_vector=map_color[mut$Variant.Info]
shape_vector=map_shape[mut$Variant.Type]
kp <- plotKaryotype(genome = "hg38",plot.type = 6)
kpAddBaseNumbers(kp)
kpPoints(kp,chr=mut$Chromosome, x=mut$Position, y=0.5, col=color_vector, pch =shape_vector, cex=1.5 )
Hepg2_copy_number_segments.csv
in
Datasets/karyoploteR/
. This file contains copy numbers of
HepG2 cell line and has been retrieved from depmap @ https://depmap.org/portal/download/all/ and slighly
modified. Plot segments. Use plot.type=4
and color segments
according to values (black if ==2, red if >2 and blue if <2).cnv=read.csv("Datasets/karyoploteR/Hepg2_copy_number_segments.csv")
cnv$color="black"
cnv$color[which(cnv$Total_CN>2)]="red"
cnv$color[which(cnv$Total_CN<2)]="blue"
min(cnv$Total_CN)
## [1] 0
max(cnv$Total_CN)
## [1] 7
kp <- plotKaryotype(genome = "hg38",plot.type = 4)
kpSegments(kp,chr = cnv$Chromosome, x0 =cnv$Start ,x1=cnv$End,y0=cnv$Total_CN, y1=cnv$Total_CN, col=cnv$color, ymin = min(cnv$Total_CN), ymax=max(cnv$Total_CN), lwd=6)
kpAxis(kp, data.panel=1, side=2, ymin=0, ymax=7, tick.pos = c(0,1,2,3,4,5,6,7))
ENCSR000DLR.MYC.Hep-G2.bed
in
Datasets/karyoploteR/
. This file contains ChIP-seq
significant peaks of the transcription factor MYC in HepG2 cell line. It
has been retrieved from Remap @ https://remap2022.univ-amu.fr/biotype_page/Hep-G2:9606
and slightly modified. Plot density of regions and insert a marker,
indicating were MYC gene is located.l=read.table("Datasets/karyoploteR/ENCSR000DLR.MYC.Hep-G2.bed", header=F)
regions=makeGRangesFromDataFrame(l,seqnames.field="V2",start.field="V3",end.field="V4")
kp <- plotKaryotype(genome = "hg38",plot.type = 1)
kpPlotDensity(kp, regions, col="skyblue1")
kpPlotMarkers(kp, chr="chr8", x=127736231, labels="MYC",y = 0.1, text.orientation = "horizontal" )
chr1_hg38_geneHancer.csv
in
Datasets/karyoploteR/
. This file contains Enhancer
positions together with putative regulated genes for chr1. It has been
retrieved from UCSC geneHancer @ https://genome.ucsc.edu/cgi-bin/hgTables?hgsid=1563652731_GeAnj3cf9S70pGQWKAUtajUAdUTn&clade=mammal&org=Human&db=hg38&hgta_group=regulation&hgta_track=geneHancer&hgta_table=geneHancerInteractionsDoubleElite&hgta_regionType=range&position=chr1%3A1-248%2C956%2C422&hgta_outputType=primaryTable&hgta_outFileName=
and slighly modified. Randomly choose 50 links and plot them.library(GenomicRanges)
ga=read.csv("Datasets/karyoploteR/chr1_hg38_geneHancer.csv")
l=sample(1:nrow(ga), 50)
reg1=makeGRangesFromDataFrame(ga[l, c("geneHancerChrom", "geneHancerStart", "geneHancerEnd", "geneHancerIdentifier")],keep.extra.columns=TRUE,seqnames.field="geneHancerChrom",start.field="geneHancerStart",end.field="geneHancerEnd")
reg2=makeGRangesFromDataFrame(ga[l, c("geneChrom", "geneStart", "geneEnd", "geneName")],keep.extra.columns=TRUE,seqnames.field="geneChrom",start.field="geneStart",end.field="geneEnd")
kp <- plotKaryotype(genome = "hg38",plot.type = 2, chromosomes = "chr1")
kpAddBaseNumbers(kp)
kpPlotLinks(kp,data=reg1, data2=reg2,col="turquoise2", border = "turquoise4")
TxDb.Hsapiens.UCSC.hg38.knownGene
by extending TSS of 500
bps upstream and 500 bps downstream.proms=unique(promoters(txdb, upstream = 500, downstream = 500))
all_enh=makeGRangesFromDataFrame(unique(ga[, c("geneHancerChrom", "geneHancerStart", "geneHancerEnd")]),keep.extra.columns=FALSE,seqnames.field="geneHancerChrom",start.field="geneHancerStart",end.field="geneHancerEnd")
ov=findOverlaps(regions,all_enh )
ov_reg=regions[unique(queryHits(ov))]
ov_enh=all_enh[unique(subjectHits(ov))]
ov2=findOverlaps(regions,proms )
ov2_reg=regions[unique(queryHits(ov2))]
ov2_prom=proms[unique(subjectHits(ov2))]
all=GenomicRanges::reduce(c(ov_reg, ov2_reg))
kp <- plotKaryotype(genome = "hg38",plot.type = 1)
kpPlotDensity(kp, all, col="yellow")