2025-03-19

Today we will focus on the exploration of hg38 assembly, HepG2 cell line (see https://depmap.org/portal/cell_line/HEPG2_LIVER?tab=mutation if you want to deepen) and MYC regulation.

Load TxDb.Hsapiens.UCSC.hg38.knownGene. Plot the density of transcripts. Use plot.type=1.

suppressWarnings(library("TxDb.Hsapiens.UCSC.hg38.knownGene"))
txdb=TxDb.Hsapiens.UCSC.hg38.knownGene
transcripts <- transcripts(txdb)

library(karyoploteR)
kp <- plotKaryotype(genome = "hg38",plot.type = 1 )
kpAddBaseNumbers(kp)
kpPlotDensity(kp, transcripts, col="plum2")

Read file CCLE_RRBS_TSS1kb_20181022_HepG2.rds in Datasets/karyoploteR/. This file contains DNA methylation values in proximity of transcription start sites (TSS) for HepG2 cell line. It has been retrieved from depmap @ https://depmap.org/portal/download/all/ and slighly modified. Make a lollipop plot representing DNA methylation values across chromosomes chr1, chr2 and chr3. Scale colors according to values. Use plot.type=1.

met=readRDS("Datasets/karyoploteR/CCLE_RRBS_TSS1kb_20181022_HepG2.rds")
met_red=subset(met,chr%in%c("chr1", "chr2", "chr3") )

library(circlize)
color_scale=colorRamp2(quantile(met_red$HEPG2_LIVER)[c(1,3,5)], c("thistle1", "maroon1", "magenta4"))
colors=color_scale(met_red$HEPG2_LIVER)


kp <- plotKaryotype(genome = "hg38",plot.type = 1, chromosomes = c("chr1", "chr2", "chr3") )
kpAddBaseNumbers(kp)
kpPoints(kp,chr=met_red$chr, x=met_red$start, y=met_red$HEPG2_LIVER, col=colors)
kpSegments(kp,chr=met_red$chr, x0=met_red$start, x1=met_red$start, y0=0, y1=met_red$HEPG2_LIVER,col=colors)
kpAxis(kp, side=2, cex=0.7)

Read file HEPG2_mutations.csv in Datasets/karyoploteR/. This file contains mutations that characterize HepG2 cell line. It has been retrieved from depmap @ https://depmap.org/portal/download/all/ and slightly modified. Use plot.type=6 and plot points at mutation location. Assign a different color to each Variant.Info and a shape to each Variant.Type.

mut=read.csv("Datasets/karyoploteR/HEPG2_mutations.csv")
unique(mut$Variant.Info)

## [1] "MISSENSE"        "SILENT"          "FRAME_SHIFT_DEL" "IN_FRAME_INS"   
## [5] "SPLICE_SITE"     "NONSENSE"        "START_CODON_SNP" "FRAME_SHIFT_INS"

unique(mut$Variant.Type)

## [1] "SNP" "DEL" "DNP" "INS" "TNP"

map_color=c("MISSENSE"= "cyan",
            "SILENT"= "white",
            "FRAME_SHIFT_DEL"= "orange",
            "IN_FRAME_INS" = "violet",
            "SPLICE_SITE"= "pink",
            "NONSENSE" = "green",
            "START_CODON_SNP"= "peru",
            "FRAME_SHIFT_INS"="skyblue3")

map_shape=c("SNP"=15,
            "DEL"=16,
            "DNP"=17,
            "INS"=18,
            "TNP"=19)

color_vector=map_color[mut$Variant.Info]
shape_vector=map_shape[mut$Variant.Type]


kp <- plotKaryotype(genome = "hg38",plot.type = 6)
kpAddBaseNumbers(kp)
kpPoints(kp,chr=mut$Chromosome, x=mut$Position, y=0.5, col=color_vector, pch =shape_vector, cex=1.5 )

Read file Hepg2_copy_number_segments.csv in Datasets/karyoploteR/. This file contains copy numbers of HepG2 cell line and has been retrieved from depmap @ https://depmap.org/portal/download/all/ and slighly modified. Plot segments. Use plot.type=4 and color segments according to values (black if ==2, red if >2 and blue if <2).

cnv=read.csv("Datasets/karyoploteR/Hepg2_copy_number_segments.csv")
cnv$color="black"
cnv$color[which(cnv$Total_CN>2)]="red"
cnv$color[which(cnv$Total_CN<2)]="blue"

min(cnv$Total_CN)

## [1] 0

max(cnv$Total_CN)

## [1] 7

kp <- plotKaryotype(genome = "hg38",plot.type = 4)
kpSegments(kp,chr = cnv$Chromosome,  x0 =cnv$Start ,x1=cnv$End,y0=cnv$Total_CN, y1=cnv$Total_CN, col=cnv$color, ymin = min(cnv$Total_CN), ymax=max(cnv$Total_CN), lwd=6)
kpAxis(kp,  data.panel=1, side=2, ymin=0, ymax=7, tick.pos = c(0,1,2,3,4,5,6,7))

Read file ENCSR000DLR.MYC.Hep-G2.bed in Datasets/karyoploteR/. This file contains ChIP-seq significant peaks of the transcription factor MYC in HepG2 cell line. It has been retrieved from Remap @ https://remap2022.univ-amu.fr/biotype_page/Hep-G2:9606 and slightly modified. Plot density of regions and insert a marker, indicating were MYC gene is located.

l=read.table("Datasets/karyoploteR/ENCSR000DLR.MYC.Hep-G2.bed", header=F)
regions=makeGRangesFromDataFrame(l,seqnames.field="V2",start.field="V3",end.field="V4")
  
  
kp <- plotKaryotype(genome = "hg38",plot.type = 1)
kpPlotDensity(kp, regions, col="skyblue1")
kpPlotMarkers(kp, chr="chr8", x=127736231, labels="MYC",y = 0.1, text.orientation = "horizontal" )

Read file chr1_hg38_geneHancer.csv in Datasets/karyoploteR/. This file contains Enhancer positions together with putative regulated genes for chr1. It has been retrieved from UCSC geneHancer @ https://genome.ucsc.edu/cgi-bin/hgTables?hgsid=1563652731_GeAnj3cf9S70pGQWKAUtajUAdUTn&clade=mammal&org=Human&db=hg38&hgta_group=regulation&hgta_track=geneHancer&hgta_table=geneHancerInteractionsDoubleElite&hgta_regionType=range&position=chr1%3A1-248%2C956%2C422&hgta_outputType=primaryTable&hgta_outFileName= and slighly modified. Randomly choose 50 links and plot them.

library(GenomicRanges)
ga=read.csv("Datasets/karyoploteR/chr1_hg38_geneHancer.csv")
l=sample(1:nrow(ga), 50)

reg1=makeGRangesFromDataFrame(ga[l, c("geneHancerChrom", "geneHancerStart", "geneHancerEnd", "geneHancerIdentifier")],keep.extra.columns=TRUE,seqnames.field="geneHancerChrom",start.field="geneHancerStart",end.field="geneHancerEnd")
  
reg2=makeGRangesFromDataFrame(ga[l, c("geneChrom", "geneStart", "geneEnd", "geneName")],keep.extra.columns=TRUE,seqnames.field="geneChrom",start.field="geneStart",end.field="geneEnd")

kp <- plotKaryotype(genome = "hg38",plot.type = 2, chromosomes = "chr1")
kpAddBaseNumbers(kp)  
kpPlotLinks(kp,data=reg1, data2=reg2,col="turquoise2", border = "turquoise4")

Extract promoters from TxDb.Hsapiens.UCSC.hg38.knownGene by extending TSS of 500 bps upstream and 500 bps downstream.

proms=unique(promoters(txdb, upstream = 500, downstream = 500))

Find overlaps between MYC ChIP-seq peaks and enhancers. Extract unique peaks that overlap and unique enhancers that overlap.

all_enh=makeGRangesFromDataFrame(unique(ga[, c("geneHancerChrom", "geneHancerStart", "geneHancerEnd")]),keep.extra.columns=FALSE,seqnames.field="geneHancerChrom",start.field="geneHancerStart",end.field="geneHancerEnd")


ov=findOverlaps(regions,all_enh )

ov_reg=regions[unique(queryHits(ov))]
ov_enh=all_enh[unique(subjectHits(ov))]

Find overlaps between MYC ChIP-seq peaks and promoters. Extract unique peaks that overlap and unique promoters that overlap.

ov2=findOverlaps(regions,proms )

ov2_reg=regions[unique(queryHits(ov2))]
ov2_prom=proms[unique(subjectHits(ov2))]

Concatenate peaks that overlapped with promoters and enhancer and merge regions that overlap.

all=GenomicRanges::reduce(c(ov_reg, ov2_reg))

Plot density of chip regions that overlap with promoters and enhancers.

kp <- plotKaryotype(genome = "hg38",plot.type = 1)
kpPlotDensity(kp, all, col="yellow")