stringsAsFactors=FALSE) ann1 ann1 <- with(ann1, GRanges(as.character(Chr), IRanges stringsAsFactors=FALSE) ann2 ann2 <- with(ann2, GRanges(as.character(Chr), IRanges S4Vectors"] with 4 slots,有93个染色体信息,以及每条染色体上面有多少个外显子信息 ranges(exon_txdb)返回外显子的起始终止位点,长度,以及其它信息,也是一个对象class 'IRanges ' [package "IRanges"] with 6 slots strand(exon_txdb)返回外显子的正负链信息,要么在正链要么在负链 mcols(exon_txdb)返回exon的id编号
使用R语言,输入为基因的GTF文件 包的安装 依赖data.table, IRanges,rtracklayer install.packages("data.table") if (! TRUE)) install.packages("BiocManager") BiocManager::install("rtracklayer") BiocManager::install("IRanges ") 代码 library(data.table) library("IRanges") require("rtracklayer") hg19 <- readGFF("hg19.gencodev27 (Chr,ExonStart,ExonEnd,Exon_number,Gene)]) Exon_region <- Exon_region[,{x <- IRanges(ExonStart,ExonEnd
先来一个简单的棒棒图: rm(list=ls()) library(trackViewer) SNP <- c(10, 12, 1400, 1402) sample.gr <- GRanges("chr1", IRanges (SNP, width=1, names=paste0("snp", SNP))) features <- GRanges("chr1", IRanges(c(1, 501, 1001), <- c(10, 100, 105, 108, 400, 410, 420, 600, 700, 805, 840, 1400, 1402) sample.gr <- GRanges("chr1", IRanges ranges and 3 metadata columns: # seqnames ranges strand | color border alpha # <Rle> <IRanges 3 metadata columns: # seqnames ranges strand | fill height featureLayerID # <Rle> <IRanges
IRanges 就像Views能够被用来查看子序列。一个通常的任务是描述染色体一系列的起始位点,并接着查看每个起始位点给定长度后的子序列。 ir1 = IRanges(start=1:10, width=10:1) # 只要定义好start,width,end其中两个,序列就被唯一确定 ir2 = IRanges(start=1:10, end =11) ir3 = IRanges(end=11, width=10:1) 让我们看看IRanges对象的结构: str(ir1) ## Formal class 'IRanges' [package 对象取子集会得到新的IRanges对象。 一个正常的IRanges对象表现为:1. 非空 (i.e. 它的宽度不为0); 2. 没有重叠; 3. 顺序是从左到右; 4. 不会连接在一起 (i.e. 两个连续的IRanges不会有空的间隔).
用到的 ggbio 这个包中的 **autoplot()**这个函数 library(ggbio) autoplot(txdb, which=GRanges("CP002684.1", IRanges 可以通过fill参数设置不同的颜色 autoplot(txdb, which=GRanges("CP002684.1", IRanges(100, 9000)), names.expr image.png 不同的基因填充不同的颜色 autoplot(txdb, which=GRanges("CP002684.1", IRanges(100, 9000)),
GRanges builds on IRanges, “integer ranges”. An IRanges has a starts and ends. # forward seq range1 <- GRanges("chrI", IRanges(start=3,end=5), strand="+") range1 getSeq(myset, range1 ) # reverse seq range2 <- GRanges("chrI", IRanges(start=3,end=5), strand="-") getSeq(myset, range2) stringsAsFactors = F) # transfer gene and location info to GRange format ranges<- GRanges(sites$Chromosome, IRanges
使用IRanges包 library(IRanges) start <- proc.time() final_result <- IRanges::reverse(dna) final_result end
library(GenomicRanges) my_seq <- with(tmp, GRanges(as.character(rname), IRanges S4Vectors"] with 4 slots,有93个染色体信息,以及每条染色体上面有多少个外显子信息 ranges(exon_txdb)返回外显子的起始终止位点,长度,以及其它信息,也是一个对象class 'IRanges ' [package "IRanges"] with 6 slots strand(exon_txdb)返回外显子的正负链信息,要么在正链要么在负链 mcols(exon_txdb)返回exon的id编号
Eg: Chr1<- GRanges("chr1",IRanges(SNP, width=1, names=paste0("snp", SNP))) features <- GRanges("chr1" ,IRanges(c(1, 501, 1001), width=c(120,400, 405), 当然我们也可以通过IRanges进行横坐标的范围缩放。 Eg: gr <- GRanges("chr1",IRanges(1000,2000, names="TP53")) lolliplot(Chr1, features,ranges=gr) ?
需要事先安装并加载一些相关的R包: library(TxDb.Hsapiens.UCSC.hg19.knownGene)library(org.Hs.eg.db)gr <- GRanges("chr11", IRanges 结构信息: chrname <- "chr21"filedir <- "D://trackViewer"#################设置需要显示的基因组范围gr <- GRanges(chrname,IRanges rspossnpname = snpinfo$rsnamesnpscore = snpinfo$rsscore#SNP得分设置为0~10,此时显示为1分为一个圆圈snpwidth <- c(1)snprange <- IRanges sample(c("#999999"), length(snpname), replace=TRUE)sample.gr$score <- snpscoregr = GRanges(chrname, IRanges
GRanges 对象由存储为 IRanges 的染色体名称和间隔组成。 library(GenomicRanges) macsPeaks_GR <- GRanges(seqnames = macsPeaks_DF[, "chr"], IRanges(macsPeaks_DF and 0 metadata columns: ## seqnames ranges strand ## <Rle> <IRanges seqnames ranges strand | name score ## <Rle> <IRanges
GRanges 对象由存储为 IRanges 的染色体名称和间隔组成。 library(GenomicRanges)macsPeaks_GR <- GRanges(seqnames = macsPeaks_DF[, "chr"], IRanges(macsPeaks_DF[ and 0 metadata columns:## seqnames ranges strand## <Rle> <IRanges seqnames ranges strand | name score## <Rle> <IRanges
真实情况下其实是读取你的突变坐标文件: # pos=read.table('pos.txt') # head(pos) # 突变位点前后400bp供引物设计 pos1=GRanges(seqnames=pos[,1], ranges=IRanges (start=pos[,2]-400,end=pos[,2])) pos2=GRanges(seqnames=pos[,1], ranges=IRanges(start=pos[,2],end=pos[ ,2])) pos3=GRanges(seqnames=pos[,1], ranges=IRanges(start=pos[,2]+1,end=pos[,2]+401)) seq1 = BSgenome
需要事先安装并加载一些相关的R包: library(TxDb.Hsapiens.UCSC.hg19.knownGene) library(org.Hs.eg.db) gr <- GRanges("chr11", IRanges chrname <- "chr21" filedir <- "D://trackViewer" #################设置需要显示的基因组范围 gr <- GRanges(chrname,IRanges = snpinfo$rsname snpscore = snpinfo$rsscore #SNP得分设置为0~10,此时显示为1分为一个圆圈 snpwidth <- c(1) snprange <- IRanges sample(c("#999999"), length(snpname), replace=TRUE) sample.gr$score <- snpscore gr = GRanges(chrname, IRanges
library(GenomicRanges) gr_probes= GRanges( seqnames = paste0('chr',probe2pos$Chromosome), ranges = IRanges ), probe=probe2gene$probe_id ) gr_probes gr_cytobands= GRanges( seqnames = hc$chr, ranges = IRanges
AnnotationDbi "R (>= 2.7.0), methods, utils, stats4, BiocGenerics (>=\n0.29.2), Biobase (>= 1.17.0), IRanges annotate "hgu95av2.db, genefilter, Biostrings (>= 2.25.10), IRanges
接下来就是如何绘制我们想要的可视化图像: 首先是基础的获取track信息,所用的函数是AnnotationTrack,他可以灵活的去做任何的定位,类似UCSC的定位方式输入的可以是data.frame,IRanges introns <- GRanges("chr12",IRanges(start = c(2973662,2973919), end = c(2973848, 2974520))) plotTracks
(org.Mm.eg.db) library(GenomicRanges) peak <- GRanges(seqnames=Rle(pos[,1]), ranges=IRanges
type, ".peaks.stringent.bed"), header = FALSE, fill = TRUE) peakInfo.gr = GRanges(peakInfo$V1, IRanges seacr_control.peaks.stringent.bed"), header = FALSE, fill = TRUE) peak.gr = GRanges(seqnames = peakRes$V1, IRanges
Session=browserSession("UCSC")#链接UCSC数据库 browserView(session,GRangesForUCSCGenome("hg19","chr2", IRanges