New Functions
Details of functions that should soon be added to the bio3d package.
## UNREFINED functions for Bio3D incoperation useful for modeling
## These include:
## motif.find - Return indices of a motif within a sequence
## getArgs - Parse command line options when using Rscript
## txt2num - Convert a character string to numeric
## read.apbs - Read elec binding energy from APBS log files
## chain.pdb - Find possible chian breaks
## pdbaln - Quick and dirty alignment of PDB sequences
## alitrim - Trim cols from alignment data structure
## ncmap - *see bio3d 'cmap'
## ndm - *see bio3d 'dm.xyz'
## seq2aln - Add a sequence to an existing alignment
## write.pir - write alignment for modeler
## write.sge - write a series of Sun Grid Engine scripts
## for running AMBER on chemcca2, 31 or oolite
## interp - interpolate between two vectors, useful for
## PCA z-score interpolation in combination
## with "pca.z2xyz"
## fit - A simple wrapper for fit.xyz when using
## pdbs style objects
## ide.group - Return the indices of the largest group of
## sequences that have identity values above
## a particular 'cutoff'
## rama.inds - Return xyz indices for PHI-PSI Ramachendran atoms
## plot.rama - Ramachendron plot (basic)
## aln2aln - Add one alignment to another that contains
## at least one similar entry
## get.pdb - download PDB files from a list of ids
## get.uniprot- download FASTA sequence files from a list of
## swissprot or uniprot ids
## seq.pdb - Return basic 1-letter calpha ATOM sequence from a
## pdb object
## read.propka - Read the output of PropKa produced by pdb2pqr
## write.crdbox- Write AMBER CRD format trajectory files
## blast.n - Blast with nucleotide sequences
## gb2fasta - Convert GENEBANK to FASTA format
## free2fasta - Convert FREE (SCA) to FASTA format
## fasta2free - Convert FASTA to FREE (SCA) format.
## tlsq & ulsq - Fitting rotation and translation matrices
## See also:
## ~/work/scop/scop.sf
## ~/tmpwork/Rpackage/new_funs/HB.back.R
## ~/tmpwork/Rpackage/new_funs/Hbond.R
## ~/tmpwork/Rpackage/new_funs/io.R
## ~/tmpwork/Rpackage/new_funs/plot.cij.R
## ~/tmpwork/Rpackage/new_funs/read.prmtop.r
## ~/tmpwork/Rpackage/new_funs/read.inpcrd.r
## ~/tmpwork/Rpackage/new_funs/read.psf.R
## ~/tmpwork/Rpackage/new_funs/read.pqr.R
## ~/tmpwork/Rpackage/new_funs/read.pdb.R (with resolution etc)
## ~/tmpwork/Rpackage/new_funs/rot_trans/rot.trans.R
motif.find <- function(motif, sequence) {
## return indices of motif within sequence
position <- regexpr( paste(motif, collapse=""), paste(sequence,collapse=""))
inds <- c(position):c(position+attr(position, "match.length")-1)
return(inds)
}
getArgs <- function(set) {
## Parse command line options (for Rscript IO handeling)
## If there is an equals sign (=) in the input args then it
## is parsed and the first part (before the = sign) treated
## as the variable name, with the second (after the = sign )
## taken as the value of the argument.
## If there is no = sign then the arg value is assigned
## logical TRUE
##
## Note that all values will be characters (i.e. strings).
## so YOU must sort them out afterwords
##
## E.g.
##
## #!/usr/bin/env Rscript
## got <- getArgs( c("dir","id","nfile") )
##
## if(!got["dir"])
## stop("Command line argument 'dir=?' is required")
## if(!got["id"]) {
## warning("Assuming 'id' is the same as basename(dir)")
## id <- basename(dir) }
## if(!got["nfile"])
## nfile <- NULL
## ...
passed.args <- commandArgs(TRUE)
cat( paste(" Called with", length(passed.args),"arguments:\n\t- ",
paste(passed.args, collapse=",\n\t- ")),"\n\n")
for (e in passed.args) {
ta = strsplit(e,"=",fixed=TRUE)
if(ta[[1]][1] %in% set) {
if(! is.na(ta[[1]][2])) {
assign(ta[[1]][1], ta[[1]][2], env = .GlobalEnv)
cat(" Assigned ",ta[[1]][1]," the value of:",ta[[1]][2],"\n")
} else {
assign(ta[[1]][1],TRUE, env = .GlobalEnv)
cat(" Assigned ",ta[[1]][1]," the value of: TRUE\n")
}
} else {
warning(paste(" Command line argument:", ta[[1]][1],
"is undefined and will be ignored"))
}
}
sapply(set, exists)
}
txt2num <- function(x) {
## Convert a character string to numeric
## txt2num( "1:4,6,lkk,1:10,fdsfsd,99a" )
if(length(x)==0)
return(x)
num1 <- unlist(strsplit(x, split = ","))
num2 <- suppressWarnings(as.numeric(num1))
nsplit <- grep(":",num1)
num3 <- unlist(strsplit( num1[ grep(":",num1) ], split = ":"))
num4 <- apply( matrix(as.numeric(num3),nrow=2),
2, function(x){ x[1]:x[2] })
num <- NULL; j <- 1
for(i in 1:length(num1)) {
if(!is.na(num2[i])) {
num <- c(num, num2[i])
} else {
if( i %in% nsplit) {
if( ncol(num4)==1 ) {
num <- c(num, num4[,j])
} else {
num <- c(num, num4[[j]])
}
j <- j+1
} else {
cat(paste(" Ignoring character component: ",num1[i],"\n"))
}
}
}
return(num)
}
chain.pdb <- function(pdb, ca.dist=4, blank="X", rtn.vec=TRUE) {
##- Find possible chian breaks
## i.e. Concetive Caplpa's that are further than 'ca.dist' apart,
## print basic chain info and rtn a vector of chain ids
## consisting of the 26 upper-case letters of the Roman
## alphabet
##
## chn <- chain.pdb(pdb)
## pdb$atom[,"chain"] <- chain.pdb(pdb)
##
## Distance between concetive C-alphas
ca <- atom.select(pdb, "calpha", verbose=FALSE)
xyz <- matrix(pdb$xyz[ca$xyz], nrow=3)
d <- sqrt( rowSums( apply(xyz , 1, diff)^2 )/3 )
## Chain break distance check
ind <- which(d > ca.dist)
len <- diff( c(1,ind,length(d)) )
cat(paste("Found",length(ind), " possible chain breaks\n"))
cat(paste("After resno(s)",
paste( pdb$atom[ca$atom,"resno"][(ind)], collapse=", " ),"\n" ))
cat(paste("chain length(s)",
paste(len+1, collapse=", " ),"\n" ))
## Make a chain id vector
if(rtn.vec) {
resno.ind <- as.numeric(c(1, sort(as.numeric(c(ind,(ind+1)))), (length(d)+1)))
resno.val <- pdb$atom[ca$atom,"resno"][resno.ind]
resno.val <- matrix(as.numeric(resno.val),nrow=2)
vec <- rep(blank, nrow(pdb$atom))
for(i in 1:(length(resno.val)/2)) {
sel.ind <- atom.select(pdb,
resno=c(resno.val[1,i]:resno.val[2,i]),
verbose=FALSE)
vec[sel.ind$atom]=LETTERS[i]
}
return(vec)
}
}
pdbaln <- function(files, ...) {
##- Quick and dirty alignment of pdb sequences
## pdbs <- pdbaln(files)
##
##
## 'files' should be a character vector of input PDB file names
## '...' extra arguments for seqaln
##
## Improvements to include 'atom.select' arguments (chain
## spliting etc), formalisation of 'pdb.list' into a specific
## bio3d object of multiple structures like '3dalign'.
##
pdb.list <- NULL
for(i in 1:length(files))
pdb.list[[ i ]] <- read.pdb(files[i])
s <- lapply(pdb.list, seq.pdb)
s <- t(sapply(s, `[`, 1:max(sapply(s, length))))
s[is.na(s)] <- "-"
s <- seqaln(s, id=files, ...)
s <- read.fasta.pdb(s, pdb.path = "", pdbext = "")
return(s)
}
alitrim <- function(aln, cols=NULL, rows=NULL, atom=TRUE) {
##- Trim cols from alignment data structure
## l <- alitrim(pdbs, cols=c(1:351))
if(!atom) {
stop("Only atom col inds supported")
}
naln <- NULL
if(!is.null(cols)) {
naln$id <- aln$id
naln$b <- aln$b[,-cols]
naln$ali <- aln$ali[,-cols]
naln$resno <- aln$resno[,-cols]
naln$chain <- aln$chain[,-cols]
naln$xyz <- aln$xyz[,-atom2xyz(cols)]
} else { naln <- aln }
if(!is.null(rows)) {
naln$id <- naln$id[-rows]
naln$b <- naln$b[-rows,]
naln$ali <- naln$ali[-rows,]
naln$resno <- naln$resno[-rows,]
naln$chain <- naln$chain[-rows,]
naln$xyz <- naln$xyz[-rows,]
}
return(naln)
}
seq2aln <- function(seq, aln, aln.ind=NULL, id="seq") {
##- Add a sequence 'seq' to an existing alignment 'aln'
## aln.ind is the row indice of the closest guy in aln to seq
## l <- seq2aln(c(seq$ali), aln, which(aln$id %in% "d1i6ia_") )
if(is.null(aln.ind)) {
## no aln.ind provided so lets try one from each aln group
stop("Need to provide an 'aln.ind' as I havent botherd doing this yet")
}
##- Align seq to masked template from alignment
tmp.msk <- aln$ali[aln.ind,]
tmp.msk[is.gap(tmp.msk)] <- "X"
seq2tmp <- seqaln.pair(seqbind(seq, tmp.msk), id=c("seq","tmp"))
##- Insert gaps to adjust alignment
ins <- which(is.gap( seq2tmp$ali[2,] ))
if( length(ins)==0 ) {
ntmp <- aln$ali
} else {
ins <- which(is.gap( seq2tmp$ali[2,] ))
ntmp <- matrix(".", nrow=nrow(aln$ali), ncol=(ncol(aln$ali)+length(ins)))
ntmp[,-ins] <- aln$ali
}
## Add seq to top of adjusted alignment
naln <- seqbind( seq2tmp$ali[1,], ntmp)
rownames(naln) <- c(id, aln$id)
return( list(id=c(id, aln$id), ali=naln) )
}
"write.pir" <- function (pdbs=NULL, ##ids=NULL, seqs=alignment$ali,
file, append = FALSE) {
# if (is.null(seqs))
# stop("write.fasta: please provide a 'seqs' or 'alignment' input object")
# if (!is.null(alignment)) {
# if (is.null(alignment$id) | is.null(alignment$ali)) {
# stop("write.fasta: 'alignment' should be a list with '$id' and '$ali'components")
# }
# if (is.null(ids)) {
# ids=alignment$id
# }
# } else {
# if (is.null(ids)) {
# n.ids <- nrow(seqs)
# if(is.null(n.ids)) { n.ids=1 }
# ids=seq( 1, length=n.ids )
# }
# }
## RESIDUE NUMBERING MUST BE CONSECITIVE! (bounds will have multiple rows)
line2 <- rep("sequence::::::::0.00: 0.00", nrow(pdbs$resno))
stru <- which(rowSums(is.na(pdbs$resno)) != ncol(pdbs$resno))
## structure:1ii6:1:A:347:A:bio3d:::
for(i in 1:length(stru)) {
rnum <- bounds( as.numeric(pdbs$resno[stru[i],]) )
line2[stru[i]] <- paste("structure:", pdbs$id[ stru[i] ],
":",rnum[,"start"],
":",unique(na.omit(pdbs$chain[ stru[i] ,])),
":", rnum[,"end"],":",
unique(na.omit(pdbs$chain[ stru[i] ,])),
":bio3d:::",sep="")
}
if (!append)
file.remove(file, showWarnings = FALSE)
nseqs <- length(pdbs$id)
if (nseqs == 1) {
cat(">P1;", pdbs$id, "\n", line2, "\n", pdbs$ali, "*\n", file = file,
append = TRUE, sep = "")
}
else {
for (i in 1:nseqs) {
cat(">P1;", pdbs$id[i], "\n", line2[i], "\n", pdbs$ali[i,],
"*\n", file = file, append = TRUE, sep = "")
}
}
}
##======================================##
## All atom contact Functions
##======================================##
ndm <- function(xyz, grpby=NULL, scut=NULL) {
##-- New distance matrix function with 'grpby' option
## ndm(pdb$xyz, grpby=pdb$atom[,"resno"], scut=3)
##- Full Distance matrix (could use 'dm' or 'dist.xyz')
dmat <- as.matrix(dist(matrix(xyz, ncol = 3, byrow = TRUE)))
if(is.null(grpby)) {
##- Mask lower.tri
dmat[lower.tri(dmat)] = NA
##- Mask concetive residues
if (!is.null(scut))
dmat[diag.ind(dmat, n = scut)] = NA
return(dmat)
} else {
##- Group by concetive numbers in 'grpby'
if( length(xyz) != (length(grpby)*3) )
stop("dimension miss-match in 'xyz' and 'grpby', check lengths")
##- Bounds of 'grpby' numbers
inds <- bounds(grpby, dup=TRUE)
nres <- nrow(inds)
##- Per-residue matrix
m <- matrix(, ncol=nres, nrow=nres)
ij <- pairwise(nres)
## Ignore concetive residues
if (!is.null(scut))
ij <- ij[ij[,2]-ij[,1] > (scut-1),]
##- Min per residue
for(k in 1 : nrow(ij) ) {
m[ij[k,1],ij[k,2]] <- min( dmat[ (inds[ij[k,1],"start"]:inds[ij[k,1],"end"]),
(inds[ij[k,2],"start"]:inds[ij[k,2],"end"])],
na.rm=TRUE )
}
return(m)
}
}
ncmap <- function(xyz, grpby, dcut=4, scut=3) {
## Distance matrix (all-atom)
dmat <- ndm( xyz, grpby, scut)
## Contact map
return(matrix(as.numeric(dmat < dcut),
ncol = ncol(dmat),
nrow = nrow(dmat)))
}
write.sge <- function(prefix = "r",
runtime = "72:00:00",
ncpu = 16,
iter = 1:10,
cluster="c31") {
## Write out a series of Sun Grid Engine
## shell scripts for running AMBER on chemcca31
## > write.sge(prefix="4q21")
if (nchar(prefix) > 8) {
warning(paste("Your filename 'prefix' is over 8 characters in length,\n\t",
"consider shortning so iteration digits are visible with qstat"))
}
if(cluster=="c2") {
## Run pmemd on new cluster2
for(i in 1:length(iter)) {
cat(paste("#!/bin/bash
#$ -cwd
#$ -l h_rt=",runtime,"
#$ -V
#$ -S /bin/bash
#$ -pe orte ",ncpu,"
echo Running on host `hostname`
echo Time is `date`
echo Directory is `pwd`
prv=",sprintf("%02.0f", (iter[i]-1)),"
cur=",sprintf("%02.0f", iter[i]),"
MPIRUN=/soft/linux/pkg/openmpi/bin/mpirun
AMBER=/soft/linux/pkg/amber10/exe/pmemd
AMBER_ARGS=\"-O -i dyna_prod.sander -p sys_box.prmtop -c dyna.$prv.rst -o dyna.$cur.out -r dyna.$cur.rst -x dyna.$cur.traj.crd -inf dyna.$cur.inf\"
$MPIRUN -np $NSLOTS $AMBER $AMBER_ARGS
echo Dinished at time: `date`\n", sep=""),
file=paste(prefix,".",sprintf("%02.0f", iter[i]),".sge",sep=""))
}
}
if(cluster=="c31") {
## Run on pmemd chemcca31
for(i in 1:length(iter)) {
cat(paste("#!/bin/bash
#$ -cwd
#$ -l h_rt=",runtime,"
#$ -V
#$ -S /bin/bash
#$ -pe mpich ",ncpu,"
echo Running on host `hostname`
echo Time is `date`
echo Directory is `pwd`
cat $TMPDIR/machines
prv=",sprintf("%02.0f", (iter[i]-1)),"
cur=",sprintf("%02.0f", iter[i]),"
/soft/linux/pkg/openmpi-1.2.7-intel-mx2g/bin/mpirun -v -np $NSLOTS -machinefile $TMPDIR/machines /soft/linux/pkg/amber10/exe/pmemd -O -i dyna_prod.sander -p sys_box.prmtop -c dyna.$prv.rst -o dyna.$cur.out -r dyna.$cur.rst -x dyna.$cur.traj.crd -inf dyna.$cur.inf\n", sep=""),
file=paste(prefix,".",sprintf("%02.0f", iter[i]),".sge",sep=""))
}
} else {
if(cluster=="oolite") {
## Run on pmemd oolite
for(i in 1:length(iter)) {
cat(paste("#!/bin/tcsh
#$ -cwd
#$ -l h_rt=",runtime,"
#$ -e sge.err
#$ -o sge.out
#$ -pe mpi ",ncpu,"
#$ -S /bin/tcsh
setenv MPI_MAX_CLUSTER_SIZE 4
setenv P4_GLOBMEMSIZE 32000000
#$ -v P4_GLOBMEMSIZE
#$ -V
echo Running on host `hostname`
echo Time is `date`
echo Directory is `pwd`
cat $TMPDIR/machines
echo This job has allocated $NSLOTS processors
set mpirun=/usr/local/topspin/mpi/mpich/bin/mpirun
set pmemd=/share/apps/amber9/exe.pmemd.ts/pmemd
set prv=",sprintf("%02.0f", (iter[i]-1)),"
set cur=",sprintf("%02.0f", iter[i]),"
$mpirun -machinefile $TMPDIR/machines -np $NSLOTS $pmemd -O -i dyna_prod.sander -p sys_box.prmtop -c dyna.$prv.rst -o dyna.$cur.out -r dyna.$cur.rst -x dyna.$cur.traj.crd -inf dyna.$cur.inf\n", sep=""),
file=paste(prefix,".",sprintf("%02.0f", iter[i]),".sge",sep=""))
}
}
############
## source("~/tmpwork/Rpackage/new_funs/model.R")
## write.sge("a3_apo", ncpu=32, iter=1:20, cluster="ctbp1m")
if(cluster=="ctbp1m") {
## Run on pmemd ctbp1m
for(i in 1:length(iter)) {
cat(paste("#!/bin/bash
#$ -cwd
#$ -l h_rt=",runtime,"
#$ -V
#$ -S /bin/bash
#$ -pe mpich ",ncpu,"
echo Running on host `hostname`
echo Time is `date`
echo Directory is `pwd`
cat $TMPDIR/machines
prv=",sprintf("%02.0f", (iter[i]-1)),"
cur=",sprintf("%02.0f", iter[i]),"
/soft/linux/openmpi-1.2.7-intel-mx2g/bin/mpirun -v -np $NSLOTS -machinefile $TMPDIR/machines /soft/linux/amber10/exe/pmemd -O -i dyna_prod.sander -p sys_box.prmtop -c dyna.$prv.rst -o dyna.$cur.out -r dyna.$cur.rst -x dyna.$cur.traj.crd -inf dyna.$cur.inf\n", sep=""),
file=paste(prefix,".",sprintf("%02.0f", iter[i]),".sge",sep=""))
}
}
###########
if (cluster=="amd") {
## Run aMD on chemcca31
for(i in 1:length(iter)) {
cat(paste("#!/bin/bash
#$ -cwd
#$ -l h_rt=",runtime,"
#$ -V
#$ -S /bin/bash
#$ -pe mpich ",ncpu,"
unsetenv LD_LIBRARY_PATH
echo Running on host `hostname`
echo Time is `date`
echo Directory is `pwd`
cat $TMPDIR/machines
prv=",sprintf("%02.0f", (iter[i]-1)),"
cur=",sprintf("%02.0f", iter[i]),"
/opt/mpich/myrinet/intel/bin/mpirun -v -np $NSLOTS -machinefile $TMPDIR/machines /home/dhamelbe/amber8/exe.torsion.whole.fix/sander -O -i production.in -p sys_box.prmtop -c production.$prv.restrt -o production.$cur.out -r production.$cur.restrt -x production.$cur.mdcrd -inf mdinfo.$cur.inf\n", sep=""),
file=paste(prefix,".",sprintf("%02.0f", iter[i]),".sge",sep=""))
}
} else {
cat("Have not added the code for other clusters yet\n")
}
}
## Running instructions
cat(paste(" * SCP files to ",
cluster, "\n\t(including dyna.",
sprintf("%02.0f", (iter[1]-1)),
".rst sys_box.prmtop and dyna_prod.sander)\n\t",
"(possibly cp dyna_equil.rst to dyna.00.rst)\n\t",
"> cp dyna_equil.rst dyna.00.rst\n\t",
"> scp dyna.00.rst sys_box.prmtop dyna_prod.sander ",
prefix,".*.sge bgrant@",cluster,":somepath/.\n\n",
" * Submit jobs with:\n\t> qsub ",
paste(prefix,".",sprintf("%02.0f", iter[1]),
".sge",sep=""), "\n\t> qsub -hold_jid <JOBID> ",
paste(prefix,".",sprintf("%02.0f", iter[2]),
".sge",sep=""),"\n\t ...etc...\n",sep=""))
}
##---- some PCA utility functions ----##
## z.coord: a numeric vector or row-wise matrix of z scores (i.e. centered and rotated data projected onto PCs) for conversion to xyz coordinates
##
## xyz.coord: a numeric vector or row-wise matrix of xyz coordinates for projection onto PCs
## see pca.z2xyz and pca.xyz2z
interp <- function(start, end, nbeads=25) {
## interpolate between two vectors
incby <- (end-start)/nbeads
curr <- start; store <- curr
for (i in 1:nbeads) {
curr <- curr+incby
store <- rbind(store, curr)
}
return(store)
}
##-- Define a new functions (an easy fit.xyz wrapper)
fit <- function(pdbs, inds, outpath=NULL, pdb.path = "", pdbext = "") {
full <- ifelse(is.null(outpath), FALSE, TRUE)
return( fit.xyz( fixed=pdbs$xyz[1,], mobile=pdbs,
fixed.inds = inds, mobile.inds = inds,
pdb.path = pdb.path, pdbext = pdbext,
outpath = outpath, full.pdbs = full,het=TRUE ))
}
fit2 <- function(pdbs, inds, outpath=NULL) {
full <- ifelse(is.null(outpath), FALSE, TRUE)
return( fit.xyz( fixed=pdbs$xyz[1,], mobile=pdbs,
fixed.inds = inds, mobile.inds = inds,
pdb.path = "./", pdbext = "",
outpath = outpath, full.pdbs = full ))
}
ide.group <- function(aln=NULL, ide=NULL, cutoff=0.6) {
##
## Return the indices of the largest group of sequences
## that have identity values above a particular 'cutoff'
## (see ide.filter for different functionality)
if(is.null(ide)) {
if(is.null(aln))
stop("Must provide either an alignment 'aln' or identity matrix 'ide'")
ide <- identity(aln)
}
tree <- hclust( as.dist(1-ide) )
grps <- cutree(tree, k = NULL, h = (1-cutoff))
freq <- table(grps)
if( max(freq)==1 ) {
warning("No pair of sequences are above cutoff\n... returing a single index of the most representative sequence")
return( which.max(colSums(ide)) )
} else {
ord <- rev( order(freq) )
return( which(names(freq[ord])[1]==grps) )
}
}
##plot.blast <- function(b, cutoff=110) {
## ##- Plot alignment stats
## par(mfcol=c(4,1), mar=c(4, 4, 1, 2), cex.lab=1.5)
## plot(b$mlog.evalue, xlab="Hit No", ylab="-log(Evalue)")
## if(!is.null(cutoff))
## abline(h=cutoff, col="red", lty=3)
## plot(b$bitscore, xlab="Hit No", ylab="Bitscore")
## plot(b$hit.tbl[,"identity"], xlab="Hit No", ylab="Identity")
## plot(b$hit.tbl[,"alignmentlength"], xlab="Hit No", ylab="Length")
##
## i <- b$mlog.evalue >= cutoff
##
## out <- cbind(b$pdb.id[i], b$gi.id[i])
## rownames(out) <- which(i)
## colnames(out) <- c("pdb.id", "gi.id")
## return(out)
##}
##get.pdb <- function(ids, path="./") {
##
## if(any(nchar(ids) != 4)) {
## warning("ids should be standard 4 character PDB formart: trying first 4 char...")
## ids <- substr(basename(ids),1,4)
## }
## ids <- unique(ids)
##
## pdb.files <- paste(ids, ".pdb", sep="")
## get.files <- file.path("http://www.rcsb.org/pdb/files", pdb.files)
## put.files <- file.path( path, pdb.files)
##
## dir.create(path)
## rtn <- rep(NA, length(pdb.files))
## for(k in 1:length(pdb.files)) {
## rtn[k] <- download.file( get.files[k], put.files[k] )
## }
##
## names(rtn) <- paste( path, "/", ids, ".pdb", sep="")
## if(any(rtn==1)) {
## warning("Some files could not be downloaded, check returned value")
## return(rtn)
## } else {
## return(names(rtn))
## }
##}
##get.uniprot <- function(ids, path="./", onefile=FALSE) {
##
## if(any(nchar(ids) != 6)) {
## warning("ids should be standard 6 character SWISSPROT/UNIPROT formart: trying first 6 char...")
## ids <- substr(basename(ids),1,6)
## }
## ids <- unique(ids)
##
## pdb.files <- paste(ids, ".fasta", sep="")
## get.files <- file.path("http://www.uniprot.org/uniprot", pdb.files)
## put.files <- file.path( path, pdb.files)
## mode="w"
##
## if(onefile) {
## mode="a"
## put.files <- rep(file.path( path, "combined_seqs.fasta"), length(pdb.files))
## }
## dir.create(path)
## rtn <- rep(NA, length(pdb.files))
## for(k in 1:length(pdb.files)) {
## rtn[k] <- download.file( get.files[k], put.files[k], mode=mode )
## }
##
## names(rtn) <- ids
## rtn
##}
##get.nr <- function(gi, path="./", onefile=FALSE) {
##
## ids <- unique(gi)
## pdb.files <- paste(ids, ".fasta", sep="")
## get.files <- paste("http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?db=protein&val=",
## ids, "&dopt=fasta&sendto=t", sep="")
## put.files <- file.path( path, pdb.files)
##
## mode="w"
## if(onefile) {
## mode="a"
## put.files <- rep(file.path( path, "combined_nr_seqs.fasta"), length(pdb.files))
## }
## dir.create(path)
## rtn <- rep(NA, length(pdb.files))
## for(k in 1:length(pdb.files)) {
## rtn[k] <- download.file( get.files[k], put.files[k], mode=mode )
## }
##
## names(rtn) <- ids
## rtn
##}
##fetch.seqs <- function(gi, path="./", onefile=TRUE) {
## ## download FASTA format sequences from the NR database via
## ## there gi number (see also get.pdb, get.uniprot and get.nr)
##
## ids <- unique(gi)
## pdb.files <- paste(ids, ".fasta", sep="")
## get.files <- paste("http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?db=protein&val=",
## ids, "&dopt=fasta&sendto=t", sep="")
## put.files <- file.path( path, pdb.files)
##
## mode="w"
## if(onefile) {
## mode="a"
## put.files <- rep(file.path( path, "nr_seqs.fasta"), length(pdb.files))
## }
## dir.create(path)
## rtn <- rep(NA, length(pdb.files))
## for(k in 1:length(pdb.files)) {
## rtn[k] <- download.file( get.files[k], put.files[k], mode=mode )
## }
##
## names(rtn) <- ids
## if(all(!rtn)) {
## if(onefile) {
## return(read.fasta( file.path( path, "nr_seqs.fasta") ))
## } else {
## return(rtn)
## }
## } else {
## warning("Not all downloads were sucesfull, see returned values")
## return(rtn)
## }
##}
##fetch.pdbs <- function(ids, path="./") {
##
## if(any(nchar(ids) != 4)) {
## warning("ids should be standard 4 character PDB formart: trying first 4 char...")
## ids <- substr(basename(ids),1,4)
## }
## ids <- unique(ids)
##
## pdb.files <- paste(ids, ".pdb", sep="")
## get.files <- file.path("http://www.rcsb.org/pdb/files", pdb.files)
## put.files <- file.path( path, pdb.files)
##
## dir.create(path)
## rtn <- rep(NA, length(pdb.files))
## for(k in 1:length(pdb.files)) {
## rtn[k] <- download.file( get.files[k], put.files[k] )
## }
##
## names(rtn) <- ids
## rtn
##}
##split.pdb <- function(raw.pdbs, path="split_chain/") {
##
## dir.create(path)
## for(i in 1:length(raw.pdbs)) {
## pdb <- read.pdb(raw.pdbs[i], het2atom=TRUE)
## chains <- unique(pdb$atom[,"chain"])
## if(length(chains) > 0) {
## for(j in 1:length(chains)) {
## if( !is.na(chains[j]) ) {
## sel <- paste("//",chains[j],"/////")
## new <- atom.select(pdb,sel)
## new.pdb <- NULL
## new.pdb$atom <- pdb$atom[new$atom,]
## new.pdb$xyz <- pdb$xyz[new$xyz]
##
## new.name <- paste(path,
## substr(basename(raw.pdbs[i]),1,4),
## "_", chains[j], ".pdb", sep="")
##
## write.pdb(new.pdb, file=new.name)
## }
## }
## }
## }
##}
pdb.seq.aln <- function(pdb.files, alnfile="pdb_aln.fa", ...) {
raw <- NULL
for(i in 1:length(pdb.files)) {
pdb <- read.pdb(pdb.files[i], het2atom=TRUE)
seq <- aa321(pdb$atom[pdb$calpha,"resid"])
if (is.null(seq)) {
raw <- seqbind(raw, c("-","-"))
} else {
raw <- seqbind(raw, seq)
}
}
return(seqaln(raw, id=pdb.files, file=alnfile, ...))
}
rama.inds <- function( pdb, zone=c(33:40) ) {
##- Return xyz indices for PHI-PSI Ramachendran atoms
phi <- matrix(NA,ncol=4*3, nrow=length(zone))
psi <- phi
for(i in 1:length(zone)) {
b <- zone[i]; a <- b-1; c <- b+1
phi[i,1:3] <- atom.select(pdb, paste("///", a ,"///C/"),verbose=FALSE)$xyz
phi[i,4:6] <- atom.select(pdb, paste("///", b ,"///N/"),verbose=FALSE)$xyz
phi[i,7:9] <- atom.select(pdb, paste("///", b ,"///CA/"),verbose=FALSE)$xyz
phi[i,10:12] <- atom.select(pdb, paste("///", b ,"///C/"),verbose=FALSE)$xyz
psi[i,1:3] <- atom.select(pdb, paste("///", b ,"///N/"),verbose=FALSE)$xyz
psi[i,4:6] <- atom.select(pdb, paste("///", b ,"///CA/"),verbose=FALSE)$xyz
psi[i,7:9] <- atom.select(pdb, paste("///", b ,"///C/"),verbose=FALSE)$xyz
psi[i,10:12] <- atom.select(pdb, paste("///", c ,"///N/"),verbose=FALSE)$xyz
}
return( list(psi = psi, phi=phi) )
}
plot.rama <- function(phi, psi, col=densCols( cbind(phi, psi)), ...) {
library("geneplotter")
require("RColorBrewer")
plot(phi, psi, col=col,
xlim=c(-180,180), ylim=c(-180,180),pch=20,
xlab=expression(phi),
ylab=expression(psi), ...)
}
aln2aln <- function(aln.mv, aln.mv.ind=1, aln.rf, aln.rf.ind=1) {
## saln <- read.fasta("coords/saln.15.afasta")
## moved <- saln
## for (i in 1:length(rep.str)) {
## galn <- read.fasta(paste("coords/grp_",i,".fa",sep=""))
## ## mask X to Z
## galn$ali[galn$ali=="X"] <- "Z"
##
## moved <- aln2aln(aln.mv = galn,
## aln.mv.ind = which(basename(galn$id) %in% moved$id),
## aln.rf = moved,
## aln.rf.ind = which(moved$id %in% basename(galn$id)) )
## }
## write.fasta(moved, file="tmp.fa")
## moved$ali[which(moved$ali == "X",arr.ind=T)]="-"
## write.fasta(moved, file="moved.fa")
mask <- function(x, char="X") {
if(is.list(x))
x <- x$ali
x[which(x == "-",arr.ind=T)]=char
x[which(x == ".",arr.ind=T)]=char
return(x)
}
rf <- mask(aln.rf)
mv <- mask(aln.mv)
rf.seq <- rf[aln.rf.ind,]
mv.seq <- mv[aln.mv.ind,]
rm.aln <- seqaln.pair(seqbind(mv.seq, rf.seq), id=c("mv","rf") )$ali
## Insure ncol consistancy
n.col <- max( ncol(rm.aln), length(mv.seq), length(rf.seq) )
rm.aln <- seqbind(rm.aln, rep("-",n.col))
rf.ind <- which(!is.gap(rm.aln["rf",]))
mv.ind <- which(!is.gap(rm.aln["mv",]))
blank <- matrix("-", ncol=n.col, nrow=sum(nrow(rf),nrow(mv)))
blank[1:nrow(rf), rf.ind] <- rf
blank[(nrow(rf)+1):(nrow(rf)+nrow(mv)), mv.ind] <- mv
naln <- NULL
naln$ali <- blank
naln$id <- c(aln.rf$id, aln.mv$id)
return(naln)
}
##seq.pdb <- function(pdb, inds=NULL) {
## ## b.inds <- atom.select(pdb, "//B////CA/")
## ## seq.pdb(pdb, b.inds)
##
## if(is.null(inds))
## inds <- atom.select(pdb, "//////CA/")$atom
##
## if(is.list(inds))
## inds <- inds$atom
##
## return(aa321(pdb$atom[inds,"resid"]))
##}
read.propka <- function(file) {
##-- examine out.propka
## ~/software/pdb2pqr/pdb2pqr.py --with-ph=7 --chain --salt --ffout=amber --ff=amber 1BE9.pdb out
## pka <- read.propka("out.propka")
## wiki.tbl(pka$his)
## lineformat
first <- c(1, 4, 8, 16, 25, 48, 64, 80)
last <- c(3, 7, 15, 24, 47, 63, 79, 95)
split.string <- function(x) {
x <- substring(x, first, last)
x[nchar(x) == 0] <- as.character(NA)
x
}
trim <- function (s) {
s <- sub("^ +", "", s)
s <- sub(" +$", "", s)
s[(s=="")]<-NA
s
}
raw.lines <- readLines(file)
type <- substring(raw.lines,1,6)
blank <- type==" "
type <- type[!blank]; raw.lines <- raw.lines[!blank]
col <- which(type=="RESIDU")
start <- col+2
end <- which(type=="SUMMAR")-2
##trim(split.string(raw.lines[start]))
mat <- matrix(trim(sapply(raw.lines[start:end], split.string)),byrow=TRUE, ncol=length(first))
colnames(mat) <- c("resid","resno","pka","location","desolv","hside","hback","coulombic")
return(list(pka=mat, his=mat[mat[,"resid"]=="HIS",]))
}
write.crdbox <- function(x, trjfile = "R.mdcrd") {
##- Output an AMBER crdbox format FORMAT(10F8.3) trajectory file
cat(paste("TITLE : Created by Bio3D with",ncol(x)/3,"atoms"),
fill=80, file=trjfile)
for(j in 1:nrow(traj)) {
cat( sprintf("%8.3f",x[j,]), sep="", fill=80,
file=trjfile, append=TRUE )
cat(" 0.000 0.000 0.000", sep="",
fill=80, file=trjfile, append=TRUE )
}
}
## Basic utility functions 'cat0' and 'paste0', which are
## like 'cat' and 'paste' except that their default code{sep}
## value is '""' (i.e. no space)
paste0 <- function(..., sep = "") {
paste(..., sep = sep)
}
cat0 <- function (..., sep = "") {
cat(..., sep = sep)
}
odd <- function(x) {
x != as.integer(x/2) * 2
}
even <- function(x) {
x == as.integer(x/2) * 2
}
less <- function(x) {
txt <- tempfile()
sink(txt)
print(x)
sink()
system(paste("less", txt))
unlink(txt)
}
## makes all letters except first in word lower case
##gsubfn("\\B.", tolower, "I WOULD LIKE A BANANA SPLIT", perl = TRUE)
g03.min <- function(pdbfile, charge, multiplicity=1, comfile="out.com") {
## Run Geometry optimization with Gaussian (generate input for g03)
##
## N.B. the pdbfile and total charge values NEED to be set below
##
## You can get charge and multiplicity values (and your input
## pdbfile with hydrogens) from mastro (or some other such source).
## See the and wizard setup option and Jaguar molecule tab in Mastro.
## Could also check http://ligand-expo.rcsb.org
if(is.null(charge))
stop("Need know the net charge, charge=")
if(is.null(pdbfile))
stop("Need an input pdbfile, pdbfile=")
pdb <- read.pdb(pdbfile, het2atom=TRUE)
atoms <- substr(pdb$atom[,"elety"],1,1)
if(sum(atoms=="H")==0)
stop("Looks like your molecule has _NO_HYDROGENS_???")
x <- as.numeric(pdb$atom[,"x"])
y <- as.numeric(pdb$atom[,"y"])
z <- as.numeric(pdb$atom[,"z"])
format <- "%1s %10.6f %10.6f %10.6f"
lines <- NULL
for (i in 1:length(atoms)) {
lines <- rbind(lines, sprintf(format, atoms[i],
x[i], y[i], z[i]) )
}
##- TO Do. add charge determination section!!
header <- paste("%Chk=Job1-gaussian.chk
%Mem=256MB
%NProc=1
#P hf/6-31G* Opt=(Tight,CalcFC) Freq SCF(Conver=8) Test
Gaussian optimization input, to be used by R.E.D.
",charge, multiplicity,"\n")
## Could use some other options, e.g.
##P AM1 Opt=Tight GFInput GFPrint SCF(Conver=8) Test\n
cat(header, file = comfile)
cat(lines, file = comfile, sep = "\n", append = TRUE)
cat("\n", file = comfile, append = TRUE)
cat(paste(" To run Gaussian optimization use the output script (",
comfile,") with the cmd:",
"\n\t g03 < ",comfile," > min.log\n", sep=""))
}
read.xyz <- function(xyzfile) {
raw <- as.matrix(read.fwf(xyzfile,
widths=c(2,15,14,15),
skip = 2, strip.white=TRUE,
col.names=c("elety","x","y","z")) )
return(raw)
}
g03.epot <- function(xyzfile=NULL, pdbfile=NULL, charge, multiplicity=1,
comfile="epot.com") {
if(is.null(charge))
stop("Need know the net charge, charge=")
if(is.null(xyzfile)) {
if(is.null(pdbfile)) {
stop("Need an input coordfile, either xyzfile= or pdbfile=")
} else {
xyz <- read.pdb(pdbfile, het2atom=TRUE)$atom
}
} else {
xyz <- read.xyz(xyzfile)
}
elety <- xyz[,"elety"]
x <- as.numeric(xyz[,"x"])
y <- as.numeric(xyz[,"y"])
z <- as.numeric(xyz[,"z"])
format <- "%1s %10.6f %10.6f %10.6f"
lines <- NULL
for (i in 1:length(elety)) {
lines <- rbind(lines, sprintf(format, elety[i],
x[i], y[i], z[i]) )
}
header <- paste("%Mem=64MB
%NProc=1\n
#P HF/6-31G* SCF(Conver=6) NoSymm Test
Pop=mk IOp(6/33=2) GFInput GFPrint\n
Molecular Electrostatic Potential min.xyz2epot.com\n
",charge, multiplicity,"\n")
## Could use some other options, e.g.
##P AM1 Opt=Tight GFInput GFPrint SCF(Conver=8) Test\n
cat(header, file = comfile)
cat(lines, file = comfile, sep = "\n", append = TRUE)
cat("\n", file = comfile, append = TRUE)
cat(paste(" To run Gaussian potential calculation use the output script (",
comfile,") with the cmd:",
"\n\t g03 < ",comfile," > epot.log\n", sep=""))
}
elety2num <- function(atoms) {
ele <- c("H", "He", "Li", "Be", "B", "C", "N", "O", "F",
"Ne", "Na", "Mg", "Al", "Si", "P", "S", "Cl",
"Ar", "K", "Ca", "Sc", "Ti", "V", "Cr", "Mn",
"Fe", "Co", "Ni", "Cu", "Zn", "Ga", "Ge",
"As", "Se", "Br")
convert <- function(x) {
if (all(x != ele)) {
return("X")
} else {
return(which(x == ele))
}
}
return(as.vector(unlist(sapply(atoms, convert))))
}
xyz2resp <- function(xyzfile, charge, rest=NULL,
respfile="resp_input.out",
ioutopt=0, iqopt=1, nmol=1,
ihfree=1, irstrnt=1, qwt=0.0005) {
if(is.null(charge))
stop("Need know the net charge, charge=")
if(is.null(xyzfile))
stop("Need an input coordfile, xyzfile=")
raw <- read.xyz(xyzfile)
charge <- as.character(charge) ## <--- !****! --->
natom <- as.character(nrow(raw))
nums <- as.character(elety2num(raw[,"elety"]))
if(is.null(rest)) ## <--- !****! --->
rest <- as.character(rep(0,nrow(raw)))
format <- "%5s%5s"
lines <- sprintf(format,charge,natom)
for (i in 1:nrow(raw)) {
lines <- rbind(lines, sprintf(format, nums[i], rest[i]) )
}
header <- paste(" title\n",
" &cntrl\n",
" ioutopt=", ioutopt,
", iqopt=", iqopt,
", nmol=", nmol,
", ihfree=", ihfree,
", irstrnt=",irstrnt,
", qwt=", qwt,
"\n &end\n 1.0\n title\n", sep="")
cat(header, file = respfile)
cat(lines, file = respfile, sep = "\n", append = TRUE)
cat("\n", file = respfile, append = TRUE)
}
`blast.n` <-
function(seq, database="nr") {
## Run NCBI blastn on a given 'seq' sequence against a given 'database'
if(!is.vector(seq)) {
stop("Input 'seq' should be a single sequence as a single or multi element character vector")
}
seq <- paste(seq, collapse="")
if( !(database %in% c("pdb", "nr", "swissprot")) )
stop("Option database should be one of pdb, nr or swissprot")
##- Submit
urlput <- paste("http://www.ncbi.nlm.nih.gov/BLAST/Blast.cgi?CMD=Put&DATABASE=",
database,"&HITLIST_SIZE=20000&PROGRAM=blastn&CLIENT=web&QUERY=",
paste(seq,collapse=""),
sep="")
txt <- scan(urlput, what="raw", sep="\n", quiet=TRUE)
rid <- sub("^.*RID = " ,"",txt[ grep("RID =",txt) ])
cat(paste(" Searching ... please wait (updates every 5 seconds) RID =",rid,"\n "))
##- Retrive results via RID code (note 'Sys.sleep()')
urlget <- paste("http://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Get",
"&FORMAT_OBJECT=Alignment",
"&ALIGNMENT_VIEW=Tabular",
"&RESULTS_FILE=on",
"&FORMAT_TYPE=CSV",
"&RID=",rid, sep="")
raw <- read.csv(urlget,
header = FALSE, sep = ",", quote="\"", dec=".",
fill = TRUE, comment.char="")
## Check for job completion (retrive html or cvs?)
html <- 1
while(length(html) == 1) {
cat("."); Sys.sleep(5)
raw <- read.csv(urlget,
header = FALSE, sep = ",", quote="\"", dec=".",
fill = TRUE, comment.char="")
html <- grep("DOCTYPE", raw[1,])
}
colnames(raw) <- c("queryid", "subjectids", "identity", ##"positives",
"alignmentlength", "mismatches", "gapopens",
"q.start", "q.end", "s.start", "s.end",
"evalue", "bitscore")
## expand 'raw' for each hit in 'subjectids' (i.e. split on ";")
rawm <- as.matrix(raw)
eachsubject <- strsplit(rawm[,"subjectids"],";")
subjectids <- unlist(eachsubject)
n.subjects <- sapply(eachsubject, length)
rawm <- apply(rawm, 2, rep, times=n.subjects)
rawm[,"subjectids"] <- subjectids
## parse ids
all.ids <- strsplit(subjectids, "\\|")
gi.id <- sapply(all.ids, '[', 2)
pdb.id <- paste(sapply(all.ids, '[', 4),"_",sapply(all.ids, '[', 5),sep="")
## N.B. hack: zero evalues to arbirtrly high value!!
mlog.evalue <- -log(as.numeric(rawm[,"evalue"]))
mlog.evalue[is.infinite(mlog.evalue)] <- -log(1e-308)
cat(paste("\n Reporting",length(pdb.id),"hits\n"))
return( list(bitscore= as.numeric(rawm[,"bitscore"]),
evalue = as.numeric(rawm[,"evalue"]),
mlog.evalue = mlog.evalue,
gi.id = gi.id,
pdb.id = pdb.id,
hit.tbl = rawm,
raw = raw) )
}
gb2fasta <- function(source.file,
destination.file = paste(basename(source.file),".fasta",sep="")) {
input <- readLines(source.file)
head <- input[1]
head <- unlist(strsplit(head, split = " "))
head <- head[nchar(head) > 0]
seqname <- head[2]
seqsize <- as.integer(head[3])
outheader <- sprintf(">%s %d bp", seqname, seqsize)
confile <- file(destination.file, open = "w")
writeLines(outheader, confile)
debut <- which(substring(input, 1, 6) == "ORIGIN") + 1
if (length(debut) > 1)
stop("Multiple entries not yet implemented !")
fin <- which(substring(input, 1, 2) == "//") - 1
if (length(fin) > 1)
stop("Multiple entries not yet implemented !")
input <- input[debut:fin]
input <- sapply(input, function(x) {
return(paste(substr(x, 11, 20), substr(x, 22, 31), substr(x,
33, 42), substr(x, 44, 53), substr(x, 55, 64), substr(x,
66, 75), sep = "", collapse = ""))
})
names(input) <- NULL
writeLines(input, confile)
close(confile)
return(read.fasta(destination.file))
}
free2fasta <- function(infile, outfile) {
## Convert FREE (SCA) format to FASTA format
## fasta2free("PDZ_final.fasta", "poo")
## free2fasta("PDZ_final.free", "PDZ_final.fasta")
raw <- read.table(infile, stringsAsFactors=FALSE)
suppressWarnings( file.remove(outfile) )
for(i in 1:nrow(raw)) {
cat(paste(">",raw[i,1],sep=""), append=TRUE, sep ="\n", file=outfile)
cat(raw[i,2], append=TRUE, sep ="\n",file=outfile)
}
}
fasta2free <- function(infile, outfile) {
## Convert FATA format to FREE (SCA) format
## fasta2free("PDZ_final.fasta", "poo")
## free2fasta("PDZ_final.free", "PDZ_final.fasta")
raw <- read.fasta(infile)
suppressWarnings( file.remove(outfile) )
for(i in 1:length(raw$id)) {
space <- paste( rep(" ", (10 - nchar(raw$id[i]))), collapse="")
cat(paste( raw$id[i], space,
paste(raw$ali[i,], collapse=""), sep=""),
append=TRUE, sep ="\n",file=outfile)
}
}
read.apbs <- function(f) {
## Read one or more APBS log files
## use regular UNIX * eild card for multi file reads e.g.
## read.apbs("tub_mutant_pqrs/apbs_C.*.log")
cmd <- paste("grep 'Global net ELEC energy ='",f)
raw <- read.table( pipe(cmd) )
ind <- which(raw[1,]=="energy")+2
return( as.numeric(raw[1,ind]) )
## if(length(grep("\\*",f))>0 ) {
## raw <- c(as.matrix(read.table( pipe(cmd),
## colClasses=c(rep('NULL',6), "numeric", "NULL")) ))
## } else {
### raw <- c(as.matrix( read.table( pipe(cmd),
### colClasses=c(rep('NULL',5),"numeric", "NULL")) ))
## raw <- c(as.matrix( read.table( pipe(cmd),
## colClasses=c(rep('NULL',6),"numeric", "NULL")) ))
## }
## return(unique(raw))
}
"ulsq" <-function(xx,
yy,
xfit=rep(TRUE,length(xx)),
yfit=xfit,
verbose=FALSE) {
## Return the rotation and translation matrix ... (see tlsq)
##
## Coordinate superposition with the Kabsch algorithm
## from Acta Cryst (1978) A34 pp827-828 (to which equation no. refer)
## yy is the target (i.e. fixed)
xx <- matrix(xx,nrow=3, )
x <- matrix(xx[xfit],nrow=3, )
y <- matrix(yy[yfit],nrow=3, )
if(length(x) != length(y)) stop("dimension mismatch in x and y")
## mean positions
xbar <- apply(x,1,mean) ; ybar <- apply(y,1,mean)
## center both sets
xx <- sweep(xx,1,xbar) # NB xx centred on xbar
x <- sweep(x,1,xbar) ; y <- sweep(y,1,ybar)
##irmsd <- sqrt(sum((x-y)^2)/dim(y)[2])
##cat("#irmsd= ",round(irmsd,6),"\n")
# generate the 3x3 moment matrix: R (Equation 3)
R <- y %*% t(x)
# form R'R
RR <- t(R) %*% R
# diagonalize R'R
prj <- eigen(RR)
prj$values[prj$values < 0 & prj$values >= -1.0E-12]<-1.0E-12
# form A
A <- prj$vectors
# make explicitly rh system
# A[,3] <- v3cross(A[,1],A[,2])
# inline the cross-product function call.
b<-A[,1]; c <- A[,2]
A[1,3] <- (b[2] * c[3]) - (b[3] * c[2])
A[2,3] <- (b[3] * c[1]) - (b[1] * c[3])
A[3,3] <- (b[1] * c[2]) - (b[2] * c[1])
# form B (==RA) (Equation 8)
B <- R %*% A
# normalize B
# B <- sweep(B,2,sqrt(apply(B^2,2,sum)),"/")
B <- sweep(B,2,sqrt(prj$values),"/")
# make explicitly rh system
# B[,3] <- v3cross(B[,1],B[,2])
# inline the cross-product function call.
b<-B[,1]; c <- B[,2]
B[1,3] <- (b[2] * c[3]) - (b[3] * c[2])
B[2,3] <- (b[3] * c[1]) - (b[1] * c[3])
B[3,3] <- (b[1] * c[2]) - (b[2] * c[1])
# form U (==Ba) (Equation 7)
# U is the rotation matrix
U <- B %*% t(A)
####====###
return(list(U=U, ybar=ybar, xbar=xbar))
####====###
## here we apply transformation matrix to *all* elements of xx
## rotate xx (Uxx)
### xx <- U %*% xx
## return xx centred on y
### xx <- sweep(xx,1,ybar,"+")
### as.vector(xx)
}
tlsq <- function(xyz, u) {
## transform coords bu rotation and translation from ulsq
return( as.vector(sweep( (u$U %*%
sweep( matrix(xyz, nrow=3),1,u$xbar) ),
1, u$ybar,"+" )) )
}
read AMBER log/mden files
DONE read.mden()
read AMBER netCDF binary trajectorys
DONE see read.ncdf()
read CHARMM log files
REMOVED
read NAMD log files
not started
read CHARMM crd files
DONE read.crd()
read.crd <- function(file, verbose = TRUE) {
split.string <- function(x) {
x <- substring(x, first, last)
xnchar(x) == 0 <- as.character(NA)
x
}
trim <- function(s) {
s <- sub("^ +", "", s)
s <- sub(" +$", "", s)
s[(s == "")] <- NA
s
}
atom.format <- c(5, 5, 4, 5, -1, 10,10,10, -1, 4, -1, 4, 10)
atom.names <- c("eleno", "resno", "resid", "elety",
"blank", "x", "y", "z", "blank",
"segid", "blank", "resno2", "b")
widths <- abs(atom.format)
drop.ind <- (atom.format < 0)
st <- c(1, 1 + cumsum(widths))
first <- st-length(st)[!drop.ind]
last <- cumsum(widths)[!drop.ind]
raw.lines <- readLines(file)
head.ind <- which(substr(raw.lines,1,1)=="*")
head.ind <- c(head.ind, (head.ind[length(head.ind)]+1) )
if(verbose)
cat(raw.lines[head.ind],sep="\n")
atom <- matrix(trim(sapply(raw.lines[-head.ind], split.string)),
byrow = TRUE,
ncol = length(atom.format[!drop.ind]),
dimnames = list(NULL,
atom.names[!drop.ind]) )
output <- list(atom = atom,
xyz = as.numeric(t(atom[, c("x", "y", "z")])),
calpha = as.logical(atom[, "elety"] == "CA"))
return(output)
}
Output
write.dcd
The ability to output a binary dcd file from any coordinate matrix.
started but incomplete use write.ncdf()
write netcdf
DONE write.ncdf()
write.crd
REMOVED, Output CHARMM crd coordinate file
Manipulation
convert.pdb
Rewrite convert.pdb() for conversion between CHARMM, AMBER, GROMACS and PDB format
Analysis
torsion analysis
DONE, torsion.xyz() and torsion.pdb()
A function for basic phi, psi, omega and chi dihedral anlysis. Should probably call a more general angle function which can also be called for sse angle analysis, hbond analysis, Ramachandran plots etc.
Already have SOLW phi psi analysis with the secondary structure functions stride and dssp (see below). These are a little too slow for trajectory analysis.
hbond analysis
DONE, hbond() note not tested extensively.
Build on angle and distance functions employing a default distance cutoff of 2.5 angstroms (0.25 nm) and a default angle cutoff of 60 degrees (i.e. bond angle must be 60 degrees or greater). The angle cutoff is rather liberal. Some researchers prefer a more stringent angle cutoff of 120 degrees.
As an example doc Rd file look at possible salt bridges i.e. distances and angles between charged plus/minus residues
rmsf
DONE, rmsf()
radius of gyration
compute the radius of gyration for selected atoms and the radii of gyration about the x, y and z axes. Should we have an option to mass weight atoms or just take Calphas?. For the moment take the mass from a PQR input file (B col).
pdb <- read.pqr("1L2y.pqr")
xyz <- as.vec3d(pdb$xyz)
mass <- as.numeric(pdb$atom[,"b"])
## distance from centroid
centered <- sweep(xyz, 2, apply(xyz,2,mean))
rg <- sqrt( sum( (apply(centered, 1, vec.norm)^2)*mass)/sum(mass) )
cross-correlation
DONE, see dccm(). Also have a plot function that should be pushed to bio3d()
Time series analysis
For the moment just use acf()
secondary structure analysis
DONE, see dssp() and stride()
seqaln
DONE, seqaln() etc...
Comments (0)
You don't have permission to comment on this page.